From 026fcb6336d6e2917ec375964a36c72183d8fd21 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 11 Oct 2022 09:01:47 +0200 Subject: [PATCH 0001/1477] ALSA: oss: Fix potential deadlock at unregistration commit 97d917879d7f92df09c3f21fd54609a8bcd654b2 upstream. We took sound_oss_mutex around the calls of unregister_sound_special() at unregistering OSS devices. This may, however, lead to a deadlock, because we manage the card release via the card's device object, and the release may happen at unregister_sound_special() call -- which will take sound_oss_mutex again in turn. Although the deadlock might be fixed by relaxing the rawmidi mutex in the previous commit, it's safer to move unregister_sound_special() calls themselves out of the sound_oss_mutex, too. The call is race-safe as the function has a spinlock protection by itself. Link: https://lore.kernel.org/r/CAB7eexJP7w1B0mVgDF0dQ+gWor7UdkiwPczmL7pn91xx8xpzOA@mail.gmail.com Cc: Link: https://lore.kernel.org/r/20221011070147.7611-2-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/sound_oss.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/sound/core/sound_oss.c b/sound/core/sound_oss.c index 610f317bea9d..99874e80b682 100644 --- a/sound/core/sound_oss.c +++ b/sound/core/sound_oss.c @@ -162,7 +162,6 @@ int snd_unregister_oss_device(int type, struct snd_card *card, int dev) mutex_unlock(&sound_oss_mutex); return -ENOENT; } - unregister_sound_special(minor); switch (SNDRV_MINOR_OSS_DEVICE(minor)) { case SNDRV_MINOR_OSS_PCM: track2 = SNDRV_MINOR_OSS(cidx, SNDRV_MINOR_OSS_AUDIO); @@ -174,12 +173,18 @@ int snd_unregister_oss_device(int type, struct snd_card *card, int dev) track2 = SNDRV_MINOR_OSS(cidx, SNDRV_MINOR_OSS_DMMIDI1); break; } - if (track2 >= 0) { - unregister_sound_special(track2); + if (track2 >= 0) snd_oss_minors[track2] = NULL; - } snd_oss_minors[minor] = NULL; mutex_unlock(&sound_oss_mutex); + + /* call unregister_sound_special() outside sound_oss_mutex; + * otherwise may deadlock, as it can trigger the release of a card + */ + unregister_sound_special(minor); + if (track2 >= 0) + unregister_sound_special(track2); + kfree(mptr); return 0; } From ef1658bc482c4ea8d5c55735fd8f4558d2bb5948 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 11 Oct 2022 09:01:46 +0200 Subject: [PATCH 0002/1477] ALSA: rawmidi: Drop register_mutex in snd_rawmidi_free() commit a70aef7982b012e86dfd39fbb235e76a21ae778a upstream. The register_mutex taken around the dev_unregister callback call in snd_rawmidi_free() may potentially lead to a mutex deadlock, when OSS emulation and a hot unplug are involved. Since the mutex doesn't protect the actual race (as the registration itself is already protected by another means), let's drop it. Link: https://lore.kernel.org/r/CAB7eexJP7w1B0mVgDF0dQ+gWor7UdkiwPczmL7pn91xx8xpzOA@mail.gmail.com Cc: Link: https://lore.kernel.org/r/20221011070147.7611-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/rawmidi.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c index 257ad5206240..0d91143eb464 100644 --- a/sound/core/rawmidi.c +++ b/sound/core/rawmidi.c @@ -1736,10 +1736,8 @@ static int snd_rawmidi_free(struct snd_rawmidi *rmidi) snd_info_free_entry(rmidi->proc_entry); rmidi->proc_entry = NULL; - mutex_lock(®ister_mutex); if (rmidi->ops && rmidi->ops->dev_unregister) rmidi->ops->dev_unregister(rmidi); - mutex_unlock(®ister_mutex); snd_rawmidi_free_substreams(&rmidi->streams[SNDRV_RAWMIDI_STREAM_INPUT]); snd_rawmidi_free_substreams(&rmidi->streams[SNDRV_RAWMIDI_STREAM_OUTPUT]); From bc1d16d282bca421c6fc31de4b8fd412010f01bd Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 30 Sep 2022 12:01:51 +0200 Subject: [PATCH 0003/1477] ALSA: usb-audio: Fix potential memory leaks commit 6382da0828995af87aa8b8bef28cc61aceb4aff3 upstream. When the driver hits -ENOMEM at allocating a URB or a buffer, it aborts and goes to the error path that releases the all previously allocated resources. However, when -ENOMEM hits at the middle of the sync EP URB allocation loop, the partially allocated URBs might be left without released, because ep->nurbs is still zero at that point. Fix it by setting ep->nurbs at first, so that the error handler loops over the full URB list. Cc: Link: https://lore.kernel.org/r/20220930100151.19461-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/endpoint.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c index 8527267725bb..655b6d30e998 100644 --- a/sound/usb/endpoint.c +++ b/sound/usb/endpoint.c @@ -998,6 +998,7 @@ static int sync_ep_set_params(struct snd_usb_endpoint *ep) if (!ep->syncbuf) return -ENOMEM; + ep->nurbs = SYNC_URBS; for (i = 0; i < SYNC_URBS; i++) { struct snd_urb_ctx *u = &ep->urb[i]; u->index = i; @@ -1017,8 +1018,6 @@ static int sync_ep_set_params(struct snd_usb_endpoint *ep) u->urb->complete = snd_complete_urb; } - ep->nurbs = SYNC_URBS; - return 0; out_of_memory: From 3e29645fbaa659d50f64dcbeab0ed30df2953477 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 30 Sep 2022 12:01:29 +0200 Subject: [PATCH 0004/1477] ALSA: usb-audio: Fix NULL dererence at error path commit 568be8aaf8a535f79c4db76cabe17b035aa2584d upstream. At an error path to release URB buffers and contexts, the driver might hit a NULL dererence for u->urb pointer, when u->buffer_size has been already set but the actual URB allocation failed. Fix it by adding the NULL check of urb. Also, make sure that buffer_size is cleared after the error path or the close. Cc: Reported-by: Sabri N. Ferreiro Link: https://lore.kernel.org/r/CAKG+3NRjTey+fFfUEGwuxL-pi_=T4cUskYG9OzpzHytF+tzYng@mail.gmail.com Link: https://lore.kernel.org/r/20220930100129.19445-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/endpoint.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c index 655b6d30e998..80dcac5abe0c 100644 --- a/sound/usb/endpoint.c +++ b/sound/usb/endpoint.c @@ -73,12 +73,13 @@ static inline unsigned get_usb_high_speed_rate(unsigned int rate) */ static void release_urb_ctx(struct snd_urb_ctx *u) { - if (u->buffer_size) + if (u->urb && u->buffer_size) usb_free_coherent(u->ep->chip->dev, u->buffer_size, u->urb->transfer_buffer, u->urb->transfer_dma); usb_free_urb(u->urb); u->urb = NULL; + u->buffer_size = 0; } static const char *usb_error_string(int err) From 768cd2cd1ae631bcfb22eb2e8bc1e9d4f7b77a50 Mon Sep 17 00:00:00 2001 From: Callum Osmotherly Date: Wed, 5 Oct 2022 17:44:16 +1030 Subject: [PATCH 0005/1477] ALSA: hda/realtek: remove ALC289_FIXUP_DUAL_SPK for Dell 5530 commit 417b9c51f59734d852e47252476fadc293ad994a upstream. After some feedback from users with Dell Precision 5530 machines, this patch reverts the previous change to add ALC289_FIXUP_DUAL_SPK. While it improved the speaker output quality, it caused the headphone jack to have an audible "pop" sound when power saving was toggled. Fixes: 1885ff13d4c4 ("ALSA: hda/realtek: Enable 4-speaker output Dell Precision 5530 laptop") Signed-off-by: Callum Osmotherly Cc: Link: https://lore.kernel.org/r/Yz0uyN1zwZhnyRD6@piranha Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 574fe798d512..10225c23b467 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8774,7 +8774,6 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0871, "Dell Precision 3630", ALC255_FIXUP_DELL_HEADSET_MIC), SND_PCI_QUIRK(0x1028, 0x0872, "Dell Precision 3630", ALC255_FIXUP_DELL_HEADSET_MIC), SND_PCI_QUIRK(0x1028, 0x0873, "Dell Precision 3930", ALC255_FIXUP_DUMMY_LINEOUT_VERB), - SND_PCI_QUIRK(0x1028, 0x087d, "Dell Precision 5530", ALC289_FIXUP_DUAL_SPK), SND_PCI_QUIRK(0x1028, 0x08ad, "Dell WYSE AIO", ALC225_FIXUP_DELL_WYSE_AIO_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x08ae, "Dell WYSE NB", ALC225_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0935, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB), From 4285d06d1296755b28d5ac617b6319313b96f11f Mon Sep 17 00:00:00 2001 From: "Luke D. Jones" Date: Mon, 10 Oct 2022 19:57:02 +1300 Subject: [PATCH 0006/1477] ALSA: hda/realtek: Correct pin configs for ASUS G533Z commit 66ba7c88507344dee68ad1acbdb630473ab36114 upstream. The initial fix for ASUS G533Z was based on faulty information. This fixes the pincfg to values that have been verified with no existing module options or other hacks enabled. Enables headphone jack, and 5.1 surround. [ corrected the indent level by tiwai ] Fixes: bc2c23549ccd ("ALSA: hda/realtek: Add pincfg for ASUS G533Z HP jack") Signed-off-by: Luke D. Jones Cc: Link: https://lore.kernel.org/r/20221010065702.35190-1-luke@ljones.dev Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 10225c23b467..20fd1608622e 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8164,11 +8164,13 @@ static const struct hda_fixup alc269_fixups[] = { [ALC285_FIXUP_ASUS_G533Z_PINS] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { - { 0x14, 0x90170120 }, + { 0x14, 0x90170152 }, /* Speaker Surround Playback Switch */ + { 0x19, 0x03a19020 }, /* Mic Boost Volume */ + { 0x1a, 0x03a11c30 }, /* Mic Boost Volume */ + { 0x1e, 0x90170151 }, /* Rear jack, IN OUT EAPD Detect */ + { 0x21, 0x03211420 }, { } }, - .chained = true, - .chain_id = ALC294_FIXUP_ASUS_G513_PINS, }, [ALC294_FIXUP_ASUS_COEF_1B] = { .type = HDA_FIXUP_VERBS, From 4491fbd0a79c1aaf22c942e9de156f62b471ea24 Mon Sep 17 00:00:00 2001 From: "Luke D. Jones" Date: Mon, 10 Oct 2022 20:03:47 +1300 Subject: [PATCH 0007/1477] ALSA: hda/realtek: Add quirk for ASUS GV601R laptop commit 2ea8e1297801f7b0220ebf6ae61a5b74ca83981e upstream. The ASUS ROG X16 (GV601R) series laptop has the same node-to-DAC pairs as early models and the G14, this includes bass speakers which are by default mapped incorrectly to the 0x06 node. Add a quirk to use the same DAC pairs as the G14. Signed-off-by: Luke D. Jones Cc: Link: https://lore.kernel.org/r/20221010070347.36883-1-luke@ljones.dev Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 20fd1608622e..eec4928591ba 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8964,6 +8964,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1e8e, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA401), SND_PCI_QUIRK(0x1043, 0x1c52, "ASUS Zephyrus G15 2022", ALC289_FIXUP_ASUS_GA401), SND_PCI_QUIRK(0x1043, 0x1f11, "ASUS Zephyrus G14", ALC289_FIXUP_ASUS_GA401), + SND_PCI_QUIRK(0x1043, 0x1f92, "ASUS ROG Flow X16", ALC289_FIXUP_ASUS_GA401), SND_PCI_QUIRK(0x1043, 0x3030, "ASUS ZN270IE", ALC256_FIXUP_ASUS_AIO_GPIO2), SND_PCI_QUIRK(0x1043, 0x831a, "ASUS P901", ALC269_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x1043, 0x834a, "ASUS S101", ALC269_FIXUP_STEREO_DMIC), From e8eb44eeee59041ab3b18060f2ff2648f52f932b Mon Sep 17 00:00:00 2001 From: Saranya Gopal Date: Tue, 11 Oct 2022 10:19:16 +0530 Subject: [PATCH 0008/1477] ALSA: hda/realtek: Add Intel Reference SSID to support headset keys commit 4f2e56a59b9947b3e698d3cabcb858765c12b1e8 upstream. This patch fixes the issue with 3.5mm headset keys on RPL-P platform. [ Rearranged the entry in SSID order by tiwai ] Signed-off-by: Saranya Gopal Signed-off-by: Ninad Naik Cc: Link: https://lore.kernel.org/r/20221011044916.2278867-1-saranya.gopal@intel.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index eec4928591ba..60e3bc124836 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8986,6 +8986,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x10ec, 0x10f2, "Intel Reference board", ALC700_FIXUP_INTEL_REFERENCE), SND_PCI_QUIRK(0x10ec, 0x118c, "Medion EE4254 MD62100", ALC256_FIXUP_MEDION_HEADSET_NO_PRESENCE), SND_PCI_QUIRK(0x10ec, 0x1230, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK), + SND_PCI_QUIRK(0x10ec, 0x124c, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK), SND_PCI_QUIRK(0x10ec, 0x1252, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK), SND_PCI_QUIRK(0x10ec, 0x1254, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK), SND_PCI_QUIRK(0x10f7, 0x8338, "Panasonic CF-SZ6", ALC269_FIXUP_HEADSET_MODE), From 50d3d895375cfce305850602ba21431a0e7bb4e5 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Thu, 28 Jul 2022 10:40:14 +0300 Subject: [PATCH 0009/1477] mtd: rawnand: atmel: Unmap streaming DMA mappings commit 1161703c9bd664da5e3b2eb1a3bb40c210e026ea upstream. Every dma_map_single() call should have its dma_unmap_single() counterpart, because the DMA address space is a shared resource and one could render the machine unusable by consuming all DMA addresses. Link: https://lore.kernel.org/lkml/13c6c9a2-6db5-c3bf-349b-4c127ad3496a@axentia.se/ Cc: stable@vger.kernel.org Fixes: f88fc122cc34 ("mtd: nand: Cleanup/rework the atmel_nand driver") Signed-off-by: Tudor Ambarus Acked-by: Alexander Dahl Reported-by: Peter Rosin Tested-by: Alexander Dahl Reviewed-by: Boris Brezillon Tested-by: Peter Rosin Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20220728074014.145406-1-tudor.ambarus@microchip.com Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/raw/atmel/nand-controller.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mtd/nand/raw/atmel/nand-controller.c b/drivers/mtd/nand/raw/atmel/nand-controller.c index 2228c34f3dea..0d84f8156d8e 100644 --- a/drivers/mtd/nand/raw/atmel/nand-controller.c +++ b/drivers/mtd/nand/raw/atmel/nand-controller.c @@ -405,6 +405,7 @@ static int atmel_nand_dma_transfer(struct atmel_nand_controller *nc, dma_async_issue_pending(nc->dmac); wait_for_completion(&finished); + dma_unmap_single(nc->dev, buf_dma, len, dir); return 0; From 64f23e5430d35e5ab81bfd7067eca078bbd20425 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Tue, 20 Sep 2022 14:32:02 +1000 Subject: [PATCH 0010/1477] cifs: destage dirty pages before re-reading them for cache=none commit bb44c31cdcac107344dd2fcc3bd0504a53575c51 upstream. This is the opposite case of kernel bugzilla 216301. If we mmap a file using cache=none and then proceed to update the mmapped area these updates are not reflected in a later pread() of that part of the file. To fix this we must first destage any dirty pages in the range before we allow the pread() to proceed. Cc: stable@vger.kernel.org Reviewed-by: Paulo Alcantara (SUSE) Reviewed-by: Enzo Matsumiya Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/file.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index fafb69d338c2..a648146e49cf 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -3936,6 +3936,15 @@ static ssize_t __cifs_readv( len = ctx->len; } + if (direct) { + rc = filemap_write_and_wait_range(file->f_inode->i_mapping, + offset, offset + len - 1); + if (rc) { + kref_put(&ctx->refcount, cifs_aio_ctx_release); + return -EAGAIN; + } + } + /* grab a lock here due to read response handlers can access ctx */ mutex_lock(&ctx->aio_mutex); From 9312e04b6c6bc46354ecd0cc82052a2b3df0b529 Mon Sep 17 00:00:00 2001 From: Zhang Xiaoxu Date: Mon, 26 Sep 2022 11:36:29 +0800 Subject: [PATCH 0011/1477] cifs: Fix the error length of VALIDATE_NEGOTIATE_INFO message commit e98ecc6e94f4e6d21c06660b0f336df02836694f upstream. Commit d5c7076b772a ("smb3: add smb3.1.1 to default dialect list") extend the dialects from 3 to 4, but forget to decrease the extended length when specific the dialect, then the message length is larger than expected. This maybe leak some info through network because not initialize the message body. After apply this patch, the VALIDATE_NEGOTIATE_INFO message length is reduced from 28 bytes to 26 bytes. Fixes: d5c7076b772a ("smb3: add smb3.1.1 to default dialect list") Signed-off-by: Zhang Xiaoxu Cc: Acked-by: Paulo Alcantara (SUSE) Reviewed-by: Tom Talpey Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2pdu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 7ee8abd1f79b..4232dc80deeb 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1075,9 +1075,9 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) pneg_inbuf->Dialects[0] = cpu_to_le16(server->vals->protocol_id); pneg_inbuf->DialectCount = cpu_to_le16(1); - /* structure is big enough for 3 dialects, sending only 1 */ + /* structure is big enough for 4 dialects, sending only 1 */ inbuflen = sizeof(*pneg_inbuf) - - sizeof(pneg_inbuf->Dialects[0]) * 2; + sizeof(pneg_inbuf->Dialects[0]) * 3; } rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID, From 30e1bd0d3e66d06117110fdffafe11a74c6f13a4 Mon Sep 17 00:00:00 2001 From: Michael Hennerich Date: Tue, 13 Sep 2022 09:34:12 +0200 Subject: [PATCH 0012/1477] iio: dac: ad5593r: Fix i2c read protocol requirements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 558a25f903b4af6361b7fbeea08a6446a0745653 upstream. For reliable operation across the full range of supported interface rates, the AD5593R needs a STOP condition between address write, and data read (like show in the datasheet Figure 40) so in turn i2c_smbus_read_word_swapped cannot be used. While at it, a simple helper was added to make the code simpler. Fixes: 56ca9db862bf ("iio: dac: Add support for the AD5592R/AD5593R ADCs/DACs") Signed-off-by: Michael Hennerich Signed-off-by: Nuno Sá Cc: Link: https://lore.kernel.org/r/20220913073413.140475-2-nuno.sa@analog.com Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/dac/ad5593r.c | 46 +++++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/drivers/iio/dac/ad5593r.c b/drivers/iio/dac/ad5593r.c index 5b4df36fdc2a..4cc855c78121 100644 --- a/drivers/iio/dac/ad5593r.c +++ b/drivers/iio/dac/ad5593r.c @@ -13,6 +13,8 @@ #include #include +#include + #define AD5593R_MODE_CONF (0 << 4) #define AD5593R_MODE_DAC_WRITE (1 << 4) #define AD5593R_MODE_ADC_READBACK (4 << 4) @@ -20,6 +22,24 @@ #define AD5593R_MODE_GPIO_READBACK (6 << 4) #define AD5593R_MODE_REG_READBACK (7 << 4) +static int ad5593r_read_word(struct i2c_client *i2c, u8 reg, u16 *value) +{ + int ret; + u8 buf[2]; + + ret = i2c_smbus_write_byte(i2c, reg); + if (ret < 0) + return ret; + + ret = i2c_master_recv(i2c, buf, sizeof(buf)); + if (ret < 0) + return ret; + + *value = get_unaligned_be16(buf); + + return 0; +} + static int ad5593r_write_dac(struct ad5592r_state *st, unsigned chan, u16 value) { struct i2c_client *i2c = to_i2c_client(st->dev); @@ -38,13 +58,7 @@ static int ad5593r_read_adc(struct ad5592r_state *st, unsigned chan, u16 *value) if (val < 0) return (int) val; - val = i2c_smbus_read_word_swapped(i2c, AD5593R_MODE_ADC_READBACK); - if (val < 0) - return (int) val; - - *value = (u16) val; - - return 0; + return ad5593r_read_word(i2c, AD5593R_MODE_ADC_READBACK, value); } static int ad5593r_reg_write(struct ad5592r_state *st, u8 reg, u16 value) @@ -58,25 +72,19 @@ static int ad5593r_reg_write(struct ad5592r_state *st, u8 reg, u16 value) static int ad5593r_reg_read(struct ad5592r_state *st, u8 reg, u16 *value) { struct i2c_client *i2c = to_i2c_client(st->dev); - s32 val; - val = i2c_smbus_read_word_swapped(i2c, AD5593R_MODE_REG_READBACK | reg); - if (val < 0) - return (int) val; - - *value = (u16) val; - - return 0; + return ad5593r_read_word(i2c, AD5593R_MODE_REG_READBACK | reg, value); } static int ad5593r_gpio_read(struct ad5592r_state *st, u8 *value) { struct i2c_client *i2c = to_i2c_client(st->dev); - s32 val; + u16 val; + int ret; - val = i2c_smbus_read_word_swapped(i2c, AD5593R_MODE_GPIO_READBACK); - if (val < 0) - return (int) val; + ret = ad5593r_read_word(i2c, AD5593R_MODE_GPIO_READBACK, &val); + if (ret) + return ret; *value = (u8) val; From ea4dcd3d6accf2bbe87314cad428c40ed2b31dae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 15 Aug 2022 09:16:47 +0000 Subject: [PATCH 0013/1477] iio: ltc2497: Fix reading conversion results MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7f4f1096d5921f5d90547596f9ce80e0b924f887 upstream. After the result of the previous conversion is read the chip automatically starts a new conversion and doesn't accept new i2c transfers until this conversion is completed which makes the function return failure. So add an early return iff the programming of the new address isn't needed. Note this will not fix the problem in general, but all cases that are currently used. Once this changes we get the failure back, but this can be addressed when the need arises. Fixes: 69548b7c2c4f ("iio: adc: ltc2497: split protocol independent part in a separate module ") Reported-by: Meng Li Signed-off-by: Uwe Kleine-König Tested-by: Denys Zagorui Cc: Link: https://lore.kernel.org/r/20220815091647.1523532-1-dzagorui@cisco.com Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/ltc2497.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/iio/adc/ltc2497.c b/drivers/iio/adc/ltc2497.c index 1adddf5a88a9..61f373fab9a1 100644 --- a/drivers/iio/adc/ltc2497.c +++ b/drivers/iio/adc/ltc2497.c @@ -41,6 +41,19 @@ static int ltc2497_result_and_measure(struct ltc2497core_driverdata *ddata, } *val = (be32_to_cpu(st->buf) >> 14) - (1 << 17); + + /* + * The part started a new conversion at the end of the above i2c + * transfer, so if the address didn't change since the last call + * everything is fine and we can return early. + * If not (which should only happen when some sort of bulk + * conversion is implemented) we have to program the new + * address. Note that this probably fails as the conversion that + * was triggered above is like not complete yet and the two + * operations have to be done in a single transfer. + */ + if (ddata->addr_prev == address) + return 0; } ret = i2c_smbus_write_byte(st->client, From ae49d80400e66505272fd2d8fcbf705002443c75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Mon, 12 Sep 2022 10:12:21 +0200 Subject: [PATCH 0014/1477] iio: adc: ad7923: fix channel readings for some variants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f4f43f01cff2f29779343ade755191afd2581c77 upstream. Some of the supported devices have 4 or 2 LSB trailing bits that should not be taken into account. Hence we need to shift these bits out which fits perfectly on the scan type shift property. This change fixes both raw and buffered reads. Fixes: f2f7a449707e ("iio:adc:ad7923: Add support for the ad7904/ad7914/ad7924") Fixes: 851644a60d20 ("iio: adc: ad7923: Add support for the ad7908/ad7918/ad7928") Signed-off-by: Nuno Sá Link: https://lore.kernel.org/r/20220912081223.173584-2-nuno.sa@analog.com Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/ad7923.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iio/adc/ad7923.c b/drivers/iio/adc/ad7923.c index 8c1e866f72e8..96eeda433ad6 100644 --- a/drivers/iio/adc/ad7923.c +++ b/drivers/iio/adc/ad7923.c @@ -93,6 +93,7 @@ enum ad7923_id { .sign = 'u', \ .realbits = (bits), \ .storagebits = 16, \ + .shift = 12 - (bits), \ .endianness = IIO_BE, \ }, \ } @@ -274,7 +275,8 @@ static int ad7923_read_raw(struct iio_dev *indio_dev, return ret; if (chan->address == EXTRACT(ret, 12, 4)) - *val = EXTRACT(ret, 0, 12); + *val = EXTRACT(ret, chan->scan_type.shift, + chan->scan_type.realbits); else return -EIO; From 9daadd1d101596a45dce7cabdf515bc343a3c976 Mon Sep 17 00:00:00 2001 From: Eddie James Date: Thu, 15 Sep 2022 14:57:18 -0500 Subject: [PATCH 0015/1477] iio: pressure: dps310: Refactor startup procedure commit c2329717bdd3fa62f8a2f3d8d85ad0bee4556bd7 upstream. Move the startup procedure into a function, and correct a missing check on the return code for writing the PRS_CFG register. Cc: Signed-off-by: Eddie James Reviewed-by: Joel Stanley Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220915195719.136812-2-eajames@linux.ibm.com Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/pressure/dps310.c | 188 ++++++++++++++++++---------------- 1 file changed, 99 insertions(+), 89 deletions(-) diff --git a/drivers/iio/pressure/dps310.c b/drivers/iio/pressure/dps310.c index 0730380ceb69..9b787caaf67c 100644 --- a/drivers/iio/pressure/dps310.c +++ b/drivers/iio/pressure/dps310.c @@ -159,6 +159,102 @@ static int dps310_get_coefs(struct dps310_data *data) return 0; } +/* + * Some versions of the chip will read temperatures in the ~60C range when + * it's actually ~20C. This is the manufacturer recommended workaround + * to correct the issue. The registers used below are undocumented. + */ +static int dps310_temp_workaround(struct dps310_data *data) +{ + int rc; + int reg; + + rc = regmap_read(data->regmap, 0x32, ®); + if (rc) + return rc; + + /* + * If bit 1 is set then the device is okay, and the workaround does not + * need to be applied + */ + if (reg & BIT(1)) + return 0; + + rc = regmap_write(data->regmap, 0x0e, 0xA5); + if (rc) + return rc; + + rc = regmap_write(data->regmap, 0x0f, 0x96); + if (rc) + return rc; + + rc = regmap_write(data->regmap, 0x62, 0x02); + if (rc) + return rc; + + rc = regmap_write(data->regmap, 0x0e, 0x00); + if (rc) + return rc; + + return regmap_write(data->regmap, 0x0f, 0x00); +} + +static int dps310_startup(struct dps310_data *data) +{ + int rc; + int ready; + + /* + * Set up pressure sensor in single sample, one measurement per second + * mode + */ + rc = regmap_write(data->regmap, DPS310_PRS_CFG, 0); + if (rc) + return rc; + + /* + * Set up external (MEMS) temperature sensor in single sample, one + * measurement per second mode + */ + rc = regmap_write(data->regmap, DPS310_TMP_CFG, DPS310_TMP_EXT); + if (rc) + return rc; + + /* Temp and pressure shifts are disabled when PRC <= 8 */ + rc = regmap_write_bits(data->regmap, DPS310_CFG_REG, + DPS310_PRS_SHIFT_EN | DPS310_TMP_SHIFT_EN, 0); + if (rc) + return rc; + + /* MEAS_CFG doesn't update correctly unless first written with 0 */ + rc = regmap_write_bits(data->regmap, DPS310_MEAS_CFG, + DPS310_MEAS_CTRL_BITS, 0); + if (rc) + return rc; + + /* Turn on temperature and pressure measurement in the background */ + rc = regmap_write_bits(data->regmap, DPS310_MEAS_CFG, + DPS310_MEAS_CTRL_BITS, DPS310_PRS_EN | + DPS310_TEMP_EN | DPS310_BACKGROUND); + if (rc) + return rc; + + /* + * Calibration coefficients required for reporting temperature. + * They are available 40ms after the device has started + */ + rc = regmap_read_poll_timeout(data->regmap, DPS310_MEAS_CFG, ready, + ready & DPS310_COEF_RDY, 10000, 40000); + if (rc) + return rc; + + rc = dps310_get_coefs(data); + if (rc) + return rc; + + return dps310_temp_workaround(data); +} + static int dps310_get_pres_precision(struct dps310_data *data) { int rc; @@ -677,52 +773,12 @@ static const struct iio_info dps310_info = { .write_raw = dps310_write_raw, }; -/* - * Some verions of chip will read temperatures in the ~60C range when - * its actually ~20C. This is the manufacturer recommended workaround - * to correct the issue. The registers used below are undocumented. - */ -static int dps310_temp_workaround(struct dps310_data *data) -{ - int rc; - int reg; - - rc = regmap_read(data->regmap, 0x32, ®); - if (rc < 0) - return rc; - - /* - * If bit 1 is set then the device is okay, and the workaround does not - * need to be applied - */ - if (reg & BIT(1)) - return 0; - - rc = regmap_write(data->regmap, 0x0e, 0xA5); - if (rc < 0) - return rc; - - rc = regmap_write(data->regmap, 0x0f, 0x96); - if (rc < 0) - return rc; - - rc = regmap_write(data->regmap, 0x62, 0x02); - if (rc < 0) - return rc; - - rc = regmap_write(data->regmap, 0x0e, 0x00); - if (rc < 0) - return rc; - - return regmap_write(data->regmap, 0x0f, 0x00); -} - static int dps310_probe(struct i2c_client *client, const struct i2c_device_id *id) { struct dps310_data *data; struct iio_dev *iio; - int rc, ready; + int rc; iio = devm_iio_device_alloc(&client->dev, sizeof(*data)); if (!iio) @@ -747,54 +803,8 @@ static int dps310_probe(struct i2c_client *client, if (rc) return rc; - /* - * Set up pressure sensor in single sample, one measurement per second - * mode - */ - rc = regmap_write(data->regmap, DPS310_PRS_CFG, 0); - - /* - * Set up external (MEMS) temperature sensor in single sample, one - * measurement per second mode - */ - rc = regmap_write(data->regmap, DPS310_TMP_CFG, DPS310_TMP_EXT); - if (rc < 0) - return rc; - - /* Temp and pressure shifts are disabled when PRC <= 8 */ - rc = regmap_write_bits(data->regmap, DPS310_CFG_REG, - DPS310_PRS_SHIFT_EN | DPS310_TMP_SHIFT_EN, 0); - if (rc < 0) - return rc; - - /* MEAS_CFG doesn't update correctly unless first written with 0 */ - rc = regmap_write_bits(data->regmap, DPS310_MEAS_CFG, - DPS310_MEAS_CTRL_BITS, 0); - if (rc < 0) - return rc; - - /* Turn on temperature and pressure measurement in the background */ - rc = regmap_write_bits(data->regmap, DPS310_MEAS_CFG, - DPS310_MEAS_CTRL_BITS, DPS310_PRS_EN | - DPS310_TEMP_EN | DPS310_BACKGROUND); - if (rc < 0) - return rc; - - /* - * Calibration coefficients required for reporting temperature. - * They are available 40ms after the device has started - */ - rc = regmap_read_poll_timeout(data->regmap, DPS310_MEAS_CFG, ready, - ready & DPS310_COEF_RDY, 10000, 40000); - if (rc < 0) - return rc; - - rc = dps310_get_coefs(data); - if (rc < 0) - return rc; - - rc = dps310_temp_workaround(data); - if (rc < 0) + rc = dps310_startup(data); + if (rc) return rc; rc = devm_iio_device_register(&client->dev, iio); From afbbf305dbaca2b98d7f0ae2ed4a6394645b99b7 Mon Sep 17 00:00:00 2001 From: Eddie James Date: Thu, 15 Sep 2022 14:57:19 -0500 Subject: [PATCH 0016/1477] iio: pressure: dps310: Reset chip after timeout commit 7b4ab4abcea4c0c10b25187bf2569e5a07e9a20c upstream. The DPS310 chip has been observed to get "stuck" such that pressure and temperature measurements are never indicated as "ready" in the MEAS_CFG register. The only solution is to reset the device and try again. In order to avoid continual failures, use a boolean flag to only try the reset after timeout once if errors persist. Fixes: ba6ec48e76bc ("iio: Add driver for Infineon DPS310") Cc: Signed-off-by: Eddie James Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220915195719.136812-3-eajames@linux.ibm.com Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/pressure/dps310.c | 74 ++++++++++++++++++++++++++++++----- 1 file changed, 64 insertions(+), 10 deletions(-) diff --git a/drivers/iio/pressure/dps310.c b/drivers/iio/pressure/dps310.c index 9b787caaf67c..cf8b92fae1b3 100644 --- a/drivers/iio/pressure/dps310.c +++ b/drivers/iio/pressure/dps310.c @@ -89,6 +89,7 @@ struct dps310_data { s32 c00, c10, c20, c30, c01, c11, c21; s32 pressure_raw; s32 temp_raw; + bool timeout_recovery_failed; }; static const struct iio_chan_spec dps310_channels[] = { @@ -393,11 +394,69 @@ static int dps310_get_temp_k(struct dps310_data *data) return scale_factors[ilog2(rc)]; } +static int dps310_reset_wait(struct dps310_data *data) +{ + int rc; + + rc = regmap_write(data->regmap, DPS310_RESET, DPS310_RESET_MAGIC); + if (rc) + return rc; + + /* Wait for device chip access: 2.5ms in specification */ + usleep_range(2500, 12000); + return 0; +} + +static int dps310_reset_reinit(struct dps310_data *data) +{ + int rc; + + rc = dps310_reset_wait(data); + if (rc) + return rc; + + return dps310_startup(data); +} + +static int dps310_ready_status(struct dps310_data *data, int ready_bit, int timeout) +{ + int sleep = DPS310_POLL_SLEEP_US(timeout); + int ready; + + return regmap_read_poll_timeout(data->regmap, DPS310_MEAS_CFG, ready, ready & ready_bit, + sleep, timeout); +} + +static int dps310_ready(struct dps310_data *data, int ready_bit, int timeout) +{ + int rc; + + rc = dps310_ready_status(data, ready_bit, timeout); + if (rc) { + if (rc == -ETIMEDOUT && !data->timeout_recovery_failed) { + /* Reset and reinitialize the chip. */ + if (dps310_reset_reinit(data)) { + data->timeout_recovery_failed = true; + } else { + /* Try again to get sensor ready status. */ + if (dps310_ready_status(data, ready_bit, timeout)) + data->timeout_recovery_failed = true; + else + return 0; + } + } + + return rc; + } + + data->timeout_recovery_failed = false; + return 0; +} + static int dps310_read_pres_raw(struct dps310_data *data) { int rc; int rate; - int ready; int timeout; s32 raw; u8 val[3]; @@ -409,9 +468,7 @@ static int dps310_read_pres_raw(struct dps310_data *data) timeout = DPS310_POLL_TIMEOUT_US(rate); /* Poll for sensor readiness; base the timeout upon the sample rate. */ - rc = regmap_read_poll_timeout(data->regmap, DPS310_MEAS_CFG, ready, - ready & DPS310_PRS_RDY, - DPS310_POLL_SLEEP_US(timeout), timeout); + rc = dps310_ready(data, DPS310_PRS_RDY, timeout); if (rc) goto done; @@ -448,7 +505,6 @@ static int dps310_read_temp_raw(struct dps310_data *data) { int rc; int rate; - int ready; int timeout; if (mutex_lock_interruptible(&data->lock)) @@ -458,10 +514,8 @@ static int dps310_read_temp_raw(struct dps310_data *data) timeout = DPS310_POLL_TIMEOUT_US(rate); /* Poll for sensor readiness; base the timeout upon the sample rate. */ - rc = regmap_read_poll_timeout(data->regmap, DPS310_MEAS_CFG, ready, - ready & DPS310_TMP_RDY, - DPS310_POLL_SLEEP_US(timeout), timeout); - if (rc < 0) + rc = dps310_ready(data, DPS310_TMP_RDY, timeout); + if (rc) goto done; rc = dps310_read_temp_ready(data); @@ -756,7 +810,7 @@ static void dps310_reset(void *action_data) { struct dps310_data *data = action_data; - regmap_write(data->regmap, DPS310_RESET, DPS310_RESET_MAGIC); + dps310_reset_wait(data); } static const struct regmap_config dps310_regmap_config = { From b239a0993aa2d603c11bd94fa4a93670fb117d45 Mon Sep 17 00:00:00 2001 From: Jean-Francois Le Fillatre Date: Tue, 27 Sep 2022 09:34:07 +0200 Subject: [PATCH 0017/1477] usb: add quirks for Lenovo OneLink+ Dock commit 37d49519b41405b08748392c6a7f193d9f77ecd2 upstream. The Lenovo OneLink+ Dock contains two VL812 USB3.0 controllers: 17ef:1018 upstream 17ef:1019 downstream These hubs suffer from two separate problems: 1) After the host system was suspended and woken up, the hubs appear to be in a random state. Some downstream ports (both internal to the built-in audio and network controllers, and external to USB sockets) may no longer be functional. The exact list of disabled ports (if any) changes from wakeup to wakeup. Ports remain in that state until the dock is power-cycled, or until the laptop is rebooted. Wakeup sources connected to the hubs (keyboard, WoL on the integrated gigabit controller) will wake the system up from suspend, but they may no longer work after wakeup (and in that case will no longer work as wakeup source in a subsequent suspend-wakeup cycle). This issue appears in the logs with messages such as: usb 1-6.1-port4: cannot disable (err = -71) usb 1-6-port2: cannot disable (err = -71) usb 1-6.1: clear tt 1 (80c0) error -71 usb 1-6-port4: cannot disable (err = -71) usb 1-6.4: PM: dpm_run_callback(): usb_dev_resume+0x0/0x10 [usbcore] returns -71 usb 1-6.4: PM: failed to resume async: error -71 usb 1-7: reset full-speed USB device number 5 using xhci_hcd usb 1-6.1-port1: cannot reset (err = -71) usb 1-6.1-port1: cannot reset (err = -71) usb 1-6.1-port1: cannot reset (err = -71) usb 1-6.1-port1: cannot reset (err = -71) usb 1-6.1-port1: cannot reset (err = -71) usb 1-6.1-port1: Cannot enable. Maybe the USB cable is bad? usb 1-6.1-port1: cannot disable (err = -71) usb 1-6.1-port1: cannot reset (err = -71) usb 1-6.1-port1: cannot reset (err = -71) usb 1-6.1-port1: cannot reset (err = -71) usb 1-6.1-port1: cannot reset (err = -71) usb 1-6.1-port1: cannot reset (err = -71) usb 1-6.1-port1: Cannot enable. Maybe the USB cable is bad? usb 1-6.1-port1: cannot disable (err = -71) 2) Some USB devices cannot be enumerated properly. So far I have only seen the issue with USB 3.0 devices. The same devices work without problem directly connected to the host system, to other systems or to other hubs (even when those hubs are connected to the OneLink+ dock). One very reliable reproducer is this USB 3.0 HDD enclosure: 152d:9561 JMicron Technology Corp. / JMicron USA Technology Corp. Mobius I have seen it happen sporadically with other USB 3.0 enclosures, with controllers from different manufacturers, all self-powered. Typical messages in the logs: xhci_hcd 0000:00:14.0: Timeout while waiting for setup device command xhci_hcd 0000:00:14.0: Timeout while waiting for setup device command usb 2-1.4: device not accepting address 6, error -62 xhci_hcd 0000:00:14.0: Timeout while waiting for setup device command xhci_hcd 0000:00:14.0: Timeout while waiting for setup device command usb 2-1.4: device not accepting address 7, error -62 usb 2-1-port4: attempt power cycle xhci_hcd 0000:00:14.0: Timeout while waiting for setup device command xhci_hcd 0000:00:14.0: Timeout while waiting for setup device command usb 2-1.4: device not accepting address 8, error -62 xhci_hcd 0000:00:14.0: Timeout while waiting for setup device command xhci_hcd 0000:00:14.0: Timeout while waiting for setup device command usb 2-1.4: device not accepting address 9, error -62 usb 2-1-port4: unable to enumerate USB device Through trial and error, I found that the USB_QUIRK_RESET_RESUME solved the second issue. Further testing then uncovered the first issue. Test results are summarized in this table: ======================================================================================= Settings USB2 hotplug USB3 hotplug State after waking up --------------------------------------------------------------------------------------- power/control=auto works fails broken usbcore.autosuspend=-1 works works broken OR power/control=on power/control=auto works (1) works (1) works and USB_QUIRK_RESET_RESUME power/control=on works works works and USB_QUIRK_RESET_RESUME HUB_QUIRK_DISABLE_AUTOSUSPEND works works works and USB_QUIRK_RESET_RESUME ======================================================================================= In those results, the power/control settings are applied to both hubs, both on the USB2 and USB3 side, before each test. From those results, USB_QUIRK_RESET_RESUME is required to reset the hubs properly after a suspend-wakeup cycle, and the hubs must not autosuspend to work around the USB3 issue. A secondary effect of USB_QUIRK_RESET_RESUME is to prevent the hubs' upstream links from suspending (the downstream ports can still suspend). This secondary effect is used in results (1). It is enough to solve the USB3 problem. Setting USB_QUIRK_RESET_RESUME on those hubs is the smallest patch that solves both issues. Prior to creating this patch, I have used the USB_QUIRK_RESET_RESUME via the kernel command line for over a year without noticing any side effect. Thanks to Oliver Neukum @Suse for explanations of the operations of USB_QUIRK_RESET_RESUME, and requesting more testing. Signed-off-by: Jean-Francois Le Fillatre Cc: stable Link: https://lore.kernel.org/r/20220927073407.5672-1-jflf_kernel@gmx.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index f03ee889ecc7..03473e20e218 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -438,6 +438,10 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x1532, 0x0116), .driver_info = USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL }, + /* Lenovo ThinkPad OneLink+ Dock twin hub controllers (VIA Labs VL812) */ + { USB_DEVICE(0x17ef, 0x1018), .driver_info = USB_QUIRK_RESET_RESUME }, + { USB_DEVICE(0x17ef, 0x1019), .driver_info = USB_QUIRK_RESET_RESUME }, + /* Lenovo USB-C to Ethernet Adapter RTL8153-04 */ { USB_DEVICE(0x17ef, 0x720c), .driver_info = USB_QUIRK_NO_LPM }, From 5d1cb7bfad21f4ec315cbe659a6fbd8ded5356e8 Mon Sep 17 00:00:00 2001 From: Anssi Hannula Date: Mon, 10 Oct 2022 17:08:27 +0200 Subject: [PATCH 0018/1477] can: kvaser_usb: Fix use of uninitialized completion commit cd7f30e174d09a02ca2afa5ef093fb0f0352e0d8 upstream. flush_comp is initialized when CMD_FLUSH_QUEUE is sent to the device and completed when the device sends CMD_FLUSH_QUEUE_RESP. This causes completion of uninitialized completion if the device sends CMD_FLUSH_QUEUE_RESP before CMD_FLUSH_QUEUE is ever sent (e.g. as a response to a flush by a previously bound driver, or a misbehaving device). Fix that by initializing flush_comp in kvaser_usb_init_one() like the other completions. This issue is only triggerable after RX URBs have been set up, i.e. the interface has been opened at least once. Cc: stable@vger.kernel.org Fixes: aec5fb2268b7 ("can: kvaser_usb: Add support for Kvaser USB hydra family") Tested-by: Jimmy Assarsson Signed-off-by: Anssi Hannula Signed-off-by: Jimmy Assarsson Link: https://lore.kernel.org/all/20221010150829.199676-3-extja@kvaser.com Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c | 1 + drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c index 416763fd1f11..38fb3decb126 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c @@ -690,6 +690,7 @@ static int kvaser_usb_init_one(struct kvaser_usb *dev, int channel) init_usb_anchor(&priv->tx_submitted); init_completion(&priv->start_comp); init_completion(&priv->stop_comp); + init_completion(&priv->flush_comp); priv->can.ctrlmode_supported = 0; priv->dev = dev; diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c index 01d4a731b579..5d642458bac5 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c @@ -1886,7 +1886,7 @@ static int kvaser_usb_hydra_flush_queue(struct kvaser_usb_net_priv *priv) { int err; - init_completion(&priv->flush_comp); + reinit_completion(&priv->flush_comp); err = kvaser_usb_hydra_send_simple_cmd(priv->dev, CMD_FLUSH_QUEUE, priv->channel); From 0f8c88978da4ec7ed12354f357fe646ee4a9058e Mon Sep 17 00:00:00 2001 From: Anssi Hannula Date: Mon, 10 Oct 2022 17:08:26 +0200 Subject: [PATCH 0019/1477] can: kvaser_usb_leaf: Fix overread with an invalid command commit 1499ecaea9d2ba68d5e18d80573b4561a8dc4ee7 upstream. For command events read from the device, kvaser_usb_leaf_read_bulk_callback() verifies that cmd->len does not exceed the size of the received data, but the actual kvaser_cmd handlers will happily read any kvaser_cmd fields without checking for cmd->len. This can cause an overread if the last cmd in the buffer is shorter than expected for the command type (with cmd->len showing the actual short size). Maximum overread seems to be 22 bytes (CMD_LEAF_LOG_MESSAGE), some of which are delivered to userspace as-is. Fix that by verifying the length of command before handling it. This issue can only occur after RX URBs have been set up, i.e. the interface has been opened at least once. Cc: stable@vger.kernel.org Fixes: 080f40a6fa28 ("can: kvaser_usb: Add support for Kvaser CAN/USB devices") Tested-by: Jimmy Assarsson Signed-off-by: Anssi Hannula Signed-off-by: Jimmy Assarsson Link: https://lore.kernel.org/all/20221010150829.199676-2-extja@kvaser.com Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- .../net/can/usb/kvaser_usb/kvaser_usb_leaf.c | 75 +++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c index 5e281249ad5f..5ee7d5c00986 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c @@ -309,6 +309,38 @@ struct kvaser_cmd { } u; } __packed; +#define CMD_SIZE_ANY 0xff +#define kvaser_fsize(field) sizeof_field(struct kvaser_cmd, field) + +static const u8 kvaser_usb_leaf_cmd_sizes_leaf[] = { + [CMD_START_CHIP_REPLY] = kvaser_fsize(u.simple), + [CMD_STOP_CHIP_REPLY] = kvaser_fsize(u.simple), + [CMD_GET_CARD_INFO_REPLY] = kvaser_fsize(u.cardinfo), + [CMD_TX_ACKNOWLEDGE] = kvaser_fsize(u.tx_acknowledge_header), + [CMD_GET_SOFTWARE_INFO_REPLY] = kvaser_fsize(u.leaf.softinfo), + [CMD_RX_STD_MESSAGE] = kvaser_fsize(u.leaf.rx_can), + [CMD_RX_EXT_MESSAGE] = kvaser_fsize(u.leaf.rx_can), + [CMD_LEAF_LOG_MESSAGE] = kvaser_fsize(u.leaf.log_message), + [CMD_CHIP_STATE_EVENT] = kvaser_fsize(u.leaf.chip_state_event), + [CMD_CAN_ERROR_EVENT] = kvaser_fsize(u.leaf.error_event), + /* ignored events: */ + [CMD_FLUSH_QUEUE_REPLY] = CMD_SIZE_ANY, +}; + +static const u8 kvaser_usb_leaf_cmd_sizes_usbcan[] = { + [CMD_START_CHIP_REPLY] = kvaser_fsize(u.simple), + [CMD_STOP_CHIP_REPLY] = kvaser_fsize(u.simple), + [CMD_GET_CARD_INFO_REPLY] = kvaser_fsize(u.cardinfo), + [CMD_TX_ACKNOWLEDGE] = kvaser_fsize(u.tx_acknowledge_header), + [CMD_GET_SOFTWARE_INFO_REPLY] = kvaser_fsize(u.usbcan.softinfo), + [CMD_RX_STD_MESSAGE] = kvaser_fsize(u.usbcan.rx_can), + [CMD_RX_EXT_MESSAGE] = kvaser_fsize(u.usbcan.rx_can), + [CMD_CHIP_STATE_EVENT] = kvaser_fsize(u.usbcan.chip_state_event), + [CMD_CAN_ERROR_EVENT] = kvaser_fsize(u.usbcan.error_event), + /* ignored events: */ + [CMD_USBCAN_CLOCK_OVERFLOW_EVENT] = CMD_SIZE_ANY, +}; + /* Summary of a kvaser error event, for a unified Leaf/Usbcan error * handling. Some discrepancies between the two families exist: * @@ -396,6 +428,43 @@ static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_imx_dev_cfg_32mhz = { .bittiming_const = &kvaser_usb_flexc_bittiming_const, }; +static int kvaser_usb_leaf_verify_size(const struct kvaser_usb *dev, + const struct kvaser_cmd *cmd) +{ + /* buffer size >= cmd->len ensured by caller */ + u8 min_size = 0; + + switch (dev->driver_info->family) { + case KVASER_LEAF: + if (cmd->id < ARRAY_SIZE(kvaser_usb_leaf_cmd_sizes_leaf)) + min_size = kvaser_usb_leaf_cmd_sizes_leaf[cmd->id]; + break; + case KVASER_USBCAN: + if (cmd->id < ARRAY_SIZE(kvaser_usb_leaf_cmd_sizes_usbcan)) + min_size = kvaser_usb_leaf_cmd_sizes_usbcan[cmd->id]; + break; + } + + if (min_size == CMD_SIZE_ANY) + return 0; + + if (min_size) { + min_size += CMD_HEADER_LEN; + if (cmd->len >= min_size) + return 0; + + dev_err_ratelimited(&dev->intf->dev, + "Received command %u too short (size %u, needed %u)", + cmd->id, cmd->len, min_size); + return -EIO; + } + + dev_warn_ratelimited(&dev->intf->dev, + "Unhandled command (%d, size %d)\n", + cmd->id, cmd->len); + return -EINVAL; +} + static void * kvaser_usb_leaf_frame_to_cmd(const struct kvaser_usb_net_priv *priv, const struct sk_buff *skb, int *frame_len, @@ -503,6 +572,9 @@ static int kvaser_usb_leaf_wait_cmd(const struct kvaser_usb *dev, u8 id, end: kfree(buf); + if (err == 0) + err = kvaser_usb_leaf_verify_size(dev, cmd); + return err; } @@ -1137,6 +1209,9 @@ static void kvaser_usb_leaf_stop_chip_reply(const struct kvaser_usb *dev, static void kvaser_usb_leaf_handle_command(const struct kvaser_usb *dev, const struct kvaser_cmd *cmd) { + if (kvaser_usb_leaf_verify_size(dev, cmd) < 0) + return; + switch (cmd->id) { case CMD_START_CHIP_REPLY: kvaser_usb_leaf_start_chip_reply(dev, cmd); From a3776e09b361aee925e405ca8fdfe04f83c1d107 Mon Sep 17 00:00:00 2001 From: Anssi Hannula Date: Mon, 10 Oct 2022 17:08:28 +0200 Subject: [PATCH 0020/1477] can: kvaser_usb_leaf: Fix TX queue out of sync after restart commit 455561fb618fde40558776b5b8435f9420f335db upstream. The TX queue seems to be implicitly flushed by the hardware during bus-off or bus-off recovery, but the driver does not reset the TX bookkeeping. Despite not resetting TX bookkeeping the driver still re-enables TX queue unconditionally, leading to "cannot find free context" / NETDEV_TX_BUSY errors if the TX queue was full at bus-off time. Fix that by resetting TX bookkeeping on CAN restart. Tested with 0bfd:0124 Kvaser Mini PCI Express 2xHS FW 4.18.778. Cc: stable@vger.kernel.org Fixes: 080f40a6fa28 ("can: kvaser_usb: Add support for Kvaser CAN/USB devices") Tested-by: Jimmy Assarsson Signed-off-by: Anssi Hannula Signed-off-by: Jimmy Assarsson Link: https://lore.kernel.org/all/20221010150829.199676-4-extja@kvaser.com Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/kvaser_usb/kvaser_usb.h | 2 ++ drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c | 2 +- drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c | 2 ++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb.h b/drivers/net/can/usb/kvaser_usb/kvaser_usb.h index 61e67986b625..62958f04a2f2 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb.h +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb.h @@ -178,6 +178,8 @@ struct kvaser_usb_dev_cfg { extern const struct kvaser_usb_dev_ops kvaser_usb_hydra_dev_ops; extern const struct kvaser_usb_dev_ops kvaser_usb_leaf_dev_ops; +void kvaser_usb_unlink_tx_urbs(struct kvaser_usb_net_priv *priv); + int kvaser_usb_recv_cmd(const struct kvaser_usb *dev, void *cmd, int len, int *actual_len); diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c index 38fb3decb126..7491f85e85b3 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c @@ -453,7 +453,7 @@ static void kvaser_usb_reset_tx_urb_contexts(struct kvaser_usb_net_priv *priv) /* This method might sleep. Do not call it in the atomic context * of URB completions. */ -static void kvaser_usb_unlink_tx_urbs(struct kvaser_usb_net_priv *priv) +void kvaser_usb_unlink_tx_urbs(struct kvaser_usb_net_priv *priv) { usb_kill_anchored_urbs(&priv->tx_submitted); kvaser_usb_reset_tx_urb_contexts(priv); diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c index 5ee7d5c00986..e7970be6f14b 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c @@ -1430,6 +1430,8 @@ static int kvaser_usb_leaf_set_mode(struct net_device *netdev, switch (mode) { case CAN_MODE_START: + kvaser_usb_unlink_tx_urbs(priv); + err = kvaser_usb_leaf_simple_cmd_async(priv, CMD_START_CHIP); if (err) return err; From 448fffc1aea6cc339f27ae25139a4f655c6dd020 Mon Sep 17 00:00:00 2001 From: Anssi Hannula Date: Mon, 10 Oct 2022 17:08:29 +0200 Subject: [PATCH 0021/1477] can: kvaser_usb_leaf: Fix CAN state after restart commit 0be1a655fe68c8e6dcadbcbddb69cf2fb29881f5 upstream. can_restart() expects CMD_START_CHIP to set the error state to ERROR_ACTIVE as it calls netif_carrier_on() immediately afterwards. Otherwise the user may immediately trigger restart again and hit a BUG_ON() in can_restart(). Fix kvaser_usb_leaf set_mode(CMD_START_CHIP) to set the expected state. Cc: stable@vger.kernel.org Fixes: 080f40a6fa28 ("can: kvaser_usb: Add support for Kvaser CAN/USB devices") Tested-by: Jimmy Assarsson Signed-off-by: Anssi Hannula Signed-off-by: Jimmy Assarsson Link: https://lore.kernel.org/all/20221010150829.199676-5-extja@kvaser.com Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c index e7970be6f14b..78d52a5e8fd5 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c @@ -1435,6 +1435,8 @@ static int kvaser_usb_leaf_set_mode(struct net_device *netdev, err = kvaser_usb_leaf_simple_cmd_async(priv, CMD_START_CHIP); if (err) return err; + + priv->can.state = CAN_STATE_ERROR_ACTIVE; break; default: return -EOPNOTSUPP; From 057d5838c795cccde749536daad61f0db251d697 Mon Sep 17 00:00:00 2001 From: Wenchao Chen Date: Tue, 11 Oct 2022 18:49:35 +0800 Subject: [PATCH 0022/1477] mmc: sdhci-sprd: Fix minimum clock limit commit 6e141772e6465f937458b35ddcfd0a981b6f5280 upstream. The Spreadtrum controller supports 100KHz minimal clock rate, which means that the current value 400KHz is wrong. Unfortunately this has also lead to fail to initialize some cards, which are allowed to require 100KHz to work. So, let's fix the problem by changing the minimal supported clock rate to 100KHz. Signed-off-by: Wenchao Chen Acked-by: Adrian Hunter Fixes: fb8bd90f83c4 ("mmc: sdhci-sprd: Add Spreadtrum's initial host controller") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20221011104935.10980-1-wenchao.chen666@gmail.com [Ulf: Clarified to commit-message] Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-sprd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-sprd.c b/drivers/mmc/host/sdhci-sprd.c index 9cd8862e6cbd..8575f4537e57 100644 --- a/drivers/mmc/host/sdhci-sprd.c +++ b/drivers/mmc/host/sdhci-sprd.c @@ -296,7 +296,7 @@ static unsigned int sdhci_sprd_get_max_clock(struct sdhci_host *host) static unsigned int sdhci_sprd_get_min_clock(struct sdhci_host *host) { - return 400000; + return 100000; } static void sdhci_sprd_set_uhs_signaling(struct sdhci_host *host, From 0b2d8e4db40c44d4bfe7c8819888f6299f7c0a54 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 15 Aug 2022 15:43:14 -0400 Subject: [PATCH 0023/1477] fs: dlm: fix race between test_bit() and queue_work() commit eef6ec9bf390e836a6c4029f3620fe49528aa1fe upstream. This patch fixes a race by using ls_cb_mutex around the bit operations and conditional code blocks for LSFL_CB_DELAY. The function dlm_callback_stop() expects to stop all callbacks and flush all currently queued onces. The set_bit() is not enough because there can still be queue_work() after the workqueue was flushed. To avoid queue_work() after set_bit(), surround both by ls_cb_mutex. Cc: stable@vger.kernel.org Signed-off-by: Alexander Aring Signed-off-by: David Teigland Signed-off-by: Greg Kroah-Hartman --- fs/dlm/ast.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c index 283c7b94edda..ca06069e95c8 100644 --- a/fs/dlm/ast.c +++ b/fs/dlm/ast.c @@ -198,13 +198,13 @@ void dlm_add_cb(struct dlm_lkb *lkb, uint32_t flags, int mode, int status, if (!prev_seq) { kref_get(&lkb->lkb_ref); + mutex_lock(&ls->ls_cb_mutex); if (test_bit(LSFL_CB_DELAY, &ls->ls_flags)) { - mutex_lock(&ls->ls_cb_mutex); list_add(&lkb->lkb_cb_list, &ls->ls_cb_delay); - mutex_unlock(&ls->ls_cb_mutex); } else { queue_work(ls->ls_callback_wq, &lkb->lkb_cb_work); } + mutex_unlock(&ls->ls_cb_mutex); } out: mutex_unlock(&lkb->lkb_cb_mutex); @@ -284,7 +284,9 @@ void dlm_callback_stop(struct dlm_ls *ls) void dlm_callback_suspend(struct dlm_ls *ls) { + mutex_lock(&ls->ls_cb_mutex); set_bit(LSFL_CB_DELAY, &ls->ls_flags); + mutex_unlock(&ls->ls_cb_mutex); if (ls->ls_callback_wq) flush_workqueue(ls->ls_callback_wq); From bfe60d7641b024142be238f3d584ca6068d9d809 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 15 Aug 2022 15:43:15 -0400 Subject: [PATCH 0024/1477] fs: dlm: handle -EBUSY first in lock arg validation commit 44637ca41d551d409a481117b07fa209b330fca9 upstream. During lock arg validation, first check for -EBUSY cases, then for -EINVAL cases. The -EINVAL checks look at lkb state variables which are not stable when an lkb is busy and would cause an -EBUSY result, e.g. lkb->lkb_grmode. Cc: stable@vger.kernel.org Signed-off-by: Alexander Aring Signed-off-by: David Teigland Signed-off-by: Greg Kroah-Hartman --- fs/dlm/lock.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index eaa28d654e9f..dde9afb6747b 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -2888,17 +2888,9 @@ static int set_unlock_args(uint32_t flags, void *astarg, struct dlm_args *args) static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb, struct dlm_args *args) { - int rv = -EINVAL; + int rv = -EBUSY; if (args->flags & DLM_LKF_CONVERT) { - if (lkb->lkb_flags & DLM_IFL_MSTCPY) - goto out; - - if (args->flags & DLM_LKF_QUECVT && - !__quecvt_compat_matrix[lkb->lkb_grmode+1][args->mode+1]) - goto out; - - rv = -EBUSY; if (lkb->lkb_status != DLM_LKSTS_GRANTED) goto out; @@ -2907,6 +2899,14 @@ static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb, if (is_overlap(lkb)) goto out; + + rv = -EINVAL; + if (lkb->lkb_flags & DLM_IFL_MSTCPY) + goto out; + + if (args->flags & DLM_LKF_QUECVT && + !__quecvt_compat_matrix[lkb->lkb_grmode+1][args->mode+1]) + goto out; } lkb->lkb_exflags = args->flags; From 5b1a56beb6b8069422a10bb57f3abfa4f4871bc4 Mon Sep 17 00:00:00 2001 From: Andri Yngvason Date: Wed, 7 Sep 2022 15:01:59 +0000 Subject: [PATCH 0025/1477] HID: multitouch: Add memory barriers commit be6e2b5734a425941fcdcdbd2a9337be498ce2cf upstream. This fixes broken atomic checks which cause a race between the release-timer and processing of hid input. I noticed that contacts were sometimes sticking, even with the "sticky fingers" quirk enabled. This fixes that problem. Cc: stable@vger.kernel.org Fixes: 9609827458c3 ("HID: multitouch: optimize the sticky fingers timer") Signed-off-by: Andri Yngvason Signed-off-by: Benjamin Tissoires Link: https://lore.kernel.org/r/20220907150159.2285460-1-andri@yngvason.is Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-multitouch.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index d686917cc3b1..a78ce16d4782 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -1155,7 +1155,7 @@ static void mt_touch_report(struct hid_device *hid, int contact_count = -1; /* sticky fingers release in progress, abort */ - if (test_and_set_bit(MT_IO_FLAGS_RUNNING, &td->mt_io_flags)) + if (test_and_set_bit_lock(MT_IO_FLAGS_RUNNING, &td->mt_io_flags)) return; scantime = *app->scantime; @@ -1236,7 +1236,7 @@ static void mt_touch_report(struct hid_device *hid, del_timer(&td->release_timer); } - clear_bit(MT_IO_FLAGS_RUNNING, &td->mt_io_flags); + clear_bit_unlock(MT_IO_FLAGS_RUNNING, &td->mt_io_flags); } static int mt_touch_input_configured(struct hid_device *hdev, @@ -1671,11 +1671,11 @@ static void mt_expired_timeout(struct timer_list *t) * An input report came in just before we release the sticky fingers, * it will take care of the sticky fingers. */ - if (test_and_set_bit(MT_IO_FLAGS_RUNNING, &td->mt_io_flags)) + if (test_and_set_bit_lock(MT_IO_FLAGS_RUNNING, &td->mt_io_flags)) return; if (test_bit(MT_IO_FLAGS_PENDING_SLOTS, &td->mt_io_flags)) mt_release_contacts(hdev); - clear_bit(MT_IO_FLAGS_RUNNING, &td->mt_io_flags); + clear_bit_unlock(MT_IO_FLAGS_RUNNING, &td->mt_io_flags); } static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id) From 6b7ae4a904a4dd83cae245c02db88c8b5d49e1cb Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Fri, 23 Sep 2022 21:45:52 +0800 Subject: [PATCH 0026/1477] quota: Check next/prev free block number after reading from quota file commit 6c8ea8b8cd4722efd419f91ca46a2dc81b7d89a3 upstream. Following process: Init: v2_read_file_info: <3> dqi_free_blk 0 dqi_free_entry 5 dqi_blks 6 Step 1. chown bin f_a -> dquot_acquire -> v2_write_dquot: qtree_write_dquot do_insert_tree find_free_dqentry get_free_dqblk write_blk(info->dqi_blocks) // info->dqi_blocks = 6, failure. The content in physical block (corresponding to blk 6) is random. Step 2. chown root f_a -> dquot_transfer -> dqput_all -> dqput -> ext4_release_dquot -> v2_release_dquot -> qtree_delete_dquot: dquot_release remove_tree free_dqentry put_free_dqblk(6) info->dqi_free_blk = blk // info->dqi_free_blk = 6 Step 3. drop cache (buffer head for block 6 is released) Step 4. chown bin f_b -> dquot_acquire -> commit_dqblk -> v2_write_dquot: qtree_write_dquot do_insert_tree find_free_dqentry get_free_dqblk dh = (struct qt_disk_dqdbheader *)buf blk = info->dqi_free_blk // 6 ret = read_blk(info, blk, buf) // The content of buf is random info->dqi_free_blk = le32_to_cpu(dh->dqdh_next_free) // random blk Step 5. chown bin f_c -> notify_change -> ext4_setattr -> dquot_transfer: dquot = dqget -> acquire_dquot -> ext4_acquire_dquot -> dquot_acquire -> commit_dqblk -> v2_write_dquot -> dq_insert_tree: do_insert_tree find_free_dqentry get_free_dqblk blk = info->dqi_free_blk // If blk < 0 and blk is not an error code, it will be returned as dquot transfer_to[USRQUOTA] = dquot // A random negative value __dquot_transfer(transfer_to) dquot_add_inodes(transfer_to[cnt]) spin_lock(&dquot->dq_dqb_lock) // page fault , which will lead to kernel page fault: Quota error (device sda): qtree_write_dquot: Error -8000 occurred while creating quota BUG: unable to handle page fault for address: ffffffffffffe120 #PF: supervisor write access in kernel mode #PF: error_code(0x0002) - not-present page Oops: 0002 [#1] PREEMPT SMP CPU: 0 PID: 5974 Comm: chown Not tainted 6.0.0-rc1-00004 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) RIP: 0010:_raw_spin_lock+0x3a/0x90 Call Trace: dquot_add_inodes+0x28/0x270 __dquot_transfer+0x377/0x840 dquot_transfer+0xde/0x540 ext4_setattr+0x405/0x14d0 notify_change+0x68e/0x9f0 chown_common+0x300/0x430 __x64_sys_fchownat+0x29/0x40 In order to avoid accessing invalid quota memory address, this patch adds block number checking of next/prev free block read from quota file. Fetch a reproducer in [Link]. Link: https://bugzilla.kernel.org/show_bug.cgi?id=216372 Fixes: 1da177e4c3f4152 ("Linux-2.6.12-rc2") CC: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220923134555.2623931-2-chengzhihao1@huawei.com Signed-off-by: Zhihao Cheng Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/quota/quota_tree.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c index 1a188fbdf34e..07948f6ac84e 100644 --- a/fs/quota/quota_tree.c +++ b/fs/quota/quota_tree.c @@ -80,6 +80,35 @@ static ssize_t write_blk(struct qtree_mem_dqinfo *info, uint blk, char *buf) return ret; } +static inline int do_check_range(struct super_block *sb, const char *val_name, + uint val, uint min_val, uint max_val) +{ + if (val < min_val || val > max_val) { + quota_error(sb, "Getting %s %u out of range %u-%u", + val_name, val, min_val, max_val); + return -EUCLEAN; + } + + return 0; +} + +static int check_dquot_block_header(struct qtree_mem_dqinfo *info, + struct qt_disk_dqdbheader *dh) +{ + int err = 0; + + err = do_check_range(info->dqi_sb, "dqdh_next_free", + le32_to_cpu(dh->dqdh_next_free), 0, + info->dqi_blocks - 1); + if (err) + return err; + err = do_check_range(info->dqi_sb, "dqdh_prev_free", + le32_to_cpu(dh->dqdh_prev_free), 0, + info->dqi_blocks - 1); + + return err; +} + /* Remove empty block from list and return it */ static int get_free_dqblk(struct qtree_mem_dqinfo *info) { @@ -94,6 +123,9 @@ static int get_free_dqblk(struct qtree_mem_dqinfo *info) ret = read_blk(info, blk, buf); if (ret < 0) goto out_buf; + ret = check_dquot_block_header(info, dh); + if (ret) + goto out_buf; info->dqi_free_blk = le32_to_cpu(dh->dqdh_next_free); } else { @@ -241,6 +273,9 @@ static uint find_free_dqentry(struct qtree_mem_dqinfo *info, *err = read_blk(info, blk, buf); if (*err < 0) goto out_buf; + *err = check_dquot_block_header(info, dh); + if (*err) + goto out_buf; } else { blk = get_free_dqblk(info); if ((int)blk < 0) { @@ -433,6 +468,9 @@ static int free_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot, goto out_buf; } dh = (struct qt_disk_dqdbheader *)buf; + ret = check_dquot_block_header(info, dh); + if (ret) + goto out_buf; le16_add_cpu(&dh->dqdh_entries, -1); if (!le16_to_cpu(dh->dqdh_entries)) { /* Block got free? */ ret = remove_free_dqentry(info, buf, blk); From 1c20d672e3a5feaf7ea0eba6f1dcb82601d1bbad Mon Sep 17 00:00:00 2001 From: Patryk Duda Date: Tue, 2 Aug 2022 17:41:28 +0200 Subject: [PATCH 0027/1477] platform/chrome: cros_ec_proto: Update version on GET_NEXT_EVENT failure commit f74c7557ed0d321947e8bb4e9d47c1013f8b2227 upstream. Some EC based devices (e.g. Fingerpint MCU) can jump to RO part of the firmware (intentionally or due to device reboot). The RO part doesn't change during the device lifecycle, so it won't support newer version of EC_CMD_GET_NEXT_EVENT command. Function cros_ec_query_all() is responsible for finding maximum supported MKBP event version. It's usually called when the device is running RW part of the firmware, so the command version can be potentially higher than version supported by the RO. The problem was fixed by updating maximum supported version when the device returns EC_RES_INVALID_VERSION (mapped to -ENOPROTOOPT). That way the kernel will use highest common version supported by RO and RW. Fixes: 3300fdd630d4 ("platform/chrome: cros_ec: handle MKBP more events flag") Cc: # 5.10+ Reviewed-by: Guenter Roeck Signed-off-by: Patryk Duda Signed-off-by: Tzung-Bi Shih Link: https://lore.kernel.org/r/20220802154128.21175-1-pdk@semihalf.com Signed-off-by: Greg Kroah-Hartman --- drivers/platform/chrome/cros_ec_proto.c | 32 +++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/drivers/platform/chrome/cros_ec_proto.c b/drivers/platform/chrome/cros_ec_proto.c index 3a2a78ff3330..8ffbf92ec1e0 100644 --- a/drivers/platform/chrome/cros_ec_proto.c +++ b/drivers/platform/chrome/cros_ec_proto.c @@ -748,6 +748,7 @@ int cros_ec_get_next_event(struct cros_ec_device *ec_dev, u8 event_type; u32 host_event; int ret; + u32 ver_mask; /* * Default value for wake_event. @@ -769,6 +770,37 @@ int cros_ec_get_next_event(struct cros_ec_device *ec_dev, return get_keyboard_state_event(ec_dev); ret = get_next_event(ec_dev); + /* + * -ENOPROTOOPT is returned when EC returns EC_RES_INVALID_VERSION. + * This can occur when EC based device (e.g. Fingerprint MCU) jumps to + * the RO image which doesn't support newer version of the command. In + * this case we will attempt to update maximum supported version of the + * EC_CMD_GET_NEXT_EVENT. + */ + if (ret == -ENOPROTOOPT) { + dev_dbg(ec_dev->dev, + "GET_NEXT_EVENT returned invalid version error.\n"); + ret = cros_ec_get_host_command_version_mask(ec_dev, + EC_CMD_GET_NEXT_EVENT, + &ver_mask); + if (ret < 0 || ver_mask == 0) + /* + * Do not change the MKBP supported version if we can't + * obtain supported version correctly. Please note that + * calling EC_CMD_GET_NEXT_EVENT returned + * EC_RES_INVALID_VERSION which means that the command + * is present. + */ + return -ENOPROTOOPT; + + ec_dev->mkbp_event_supported = fls(ver_mask); + dev_dbg(ec_dev->dev, "MKBP support version changed to %u\n", + ec_dev->mkbp_event_supported - 1); + + /* Try to get next event with new MKBP support version set. */ + ret = get_next_event(ec_dev); + } + if (ret <= 0) return ret; From 9b2c82af65f7bef26e53ef9d7cb67e23648272cc Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 21 Sep 2022 16:53:53 +0200 Subject: [PATCH 0028/1477] ASoC: wcd9335: fix order of Slimbus unprepare/disable commit ea8ef003aa53ad23e7705c5cab1c4e664faa6c79 upstream. Slimbus streams are first prepared and then enabled, so the cleanup path should reverse it. The unprepare sets stream->num_ports to 0 and frees the stream->ports. Calling disable after unprepare was not really effective (channels was not deactivated) and could lead to further issues due to making transfers on unprepared stream. Fixes: 20aedafdf492 ("ASoC: wcd9335: add support to wcd9335 codec") Cc: Signed-off-by: Krzysztof Kozlowski Reviewed-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20220921145354.1683791-1-krzysztof.kozlowski@linaro.org Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/wcd9335.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/wcd9335.c b/sound/soc/codecs/wcd9335.c index 8f4ed39c49de..33c29a1f52d0 100644 --- a/sound/soc/codecs/wcd9335.c +++ b/sound/soc/codecs/wcd9335.c @@ -1971,8 +1971,8 @@ static int wcd9335_trigger(struct snd_pcm_substream *substream, int cmd, case SNDRV_PCM_TRIGGER_STOP: case SNDRV_PCM_TRIGGER_SUSPEND: case SNDRV_PCM_TRIGGER_PAUSE_PUSH: - slim_stream_unprepare(dai_data->sruntime); slim_stream_disable(dai_data->sruntime); + slim_stream_unprepare(dai_data->sruntime); break; default: break; From 8ef0e1c0ae5048879f12b01726c5ac7cfafa6820 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 21 Sep 2022 16:53:54 +0200 Subject: [PATCH 0029/1477] ASoC: wcd934x: fix order of Slimbus unprepare/disable commit e96bca7eaa5747633ec638b065630ff83728982a upstream. Slimbus streams are first prepared and then enabled, so the cleanup path should reverse it. The unprepare sets stream->num_ports to 0 and frees the stream->ports. Calling disable after unprepare was not really effective (channels was not deactivated) and could lead to further issues due to making transfers on unprepared stream. Fixes: a61f3b4f476e ("ASoC: wcd934x: add support to wcd9340/wcd9341 codec") Cc: Signed-off-by: Krzysztof Kozlowski Reviewed-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20220921145354.1683791-2-krzysztof.kozlowski@linaro.org Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/wcd934x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/wcd934x.c b/sound/soc/codecs/wcd934x.c index fd704df9b175..104751ac6cd1 100644 --- a/sound/soc/codecs/wcd934x.c +++ b/sound/soc/codecs/wcd934x.c @@ -1829,8 +1829,8 @@ static int wcd934x_trigger(struct snd_pcm_substream *substream, int cmd, case SNDRV_PCM_TRIGGER_STOP: case SNDRV_PCM_TRIGGER_SUSPEND: case SNDRV_PCM_TRIGGER_PAUSE_PUSH: - slim_stream_unprepare(dai_data->sruntime); slim_stream_disable(dai_data->sruntime); + slim_stream_unprepare(dai_data->sruntime); break; default: break; From 85909424a1f5ffe0667ea83ca17d1820a78bf597 Mon Sep 17 00:00:00 2001 From: Liang He Date: Fri, 16 Sep 2022 23:47:08 +0800 Subject: [PATCH 0030/1477] hwmon: (gsc-hwmon) Call of_node_get() before of_find_xxx API commit 7f62cf781e6567d59c8935dc8c6068ce2bb904b7 upstream. In gsc_hwmon_get_devtree_pdata(), we should call of_node_get() before the of_find_compatible_node() which will automatically call of_node_put() for the 'from' argument. Fixes: 3bce5377ef66 ("hwmon: Add Gateworks System Controller support") Signed-off-by: Liang He Co-developed-by: Mengda Chen Signed-off-by: Mengda Chen Link: https://lore.kernel.org/r/20220916154708.3084515-1-chenmengda2009@163.com Cc: stable@vger.kernel.org Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/gsc-hwmon.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hwmon/gsc-hwmon.c b/drivers/hwmon/gsc-hwmon.c index 1fe37418ff46..f29ce49294da 100644 --- a/drivers/hwmon/gsc-hwmon.c +++ b/drivers/hwmon/gsc-hwmon.c @@ -267,6 +267,7 @@ gsc_hwmon_get_devtree_pdata(struct device *dev) pdata->nchannels = nchannels; /* fan controller base address */ + of_node_get(dev->parent->of_node); fan = of_find_compatible_node(dev->parent->of_node, NULL, "gw,gsc-fan"); if (fan && of_property_read_u32(fan, "reg", &pdata->fan_base)) { dev_err(dev, "fan node without base\n"); From ffb571e1232f4afc62294bc215c0e9f68a9fc35e Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 9 Sep 2022 13:25:29 +0200 Subject: [PATCH 0031/1477] regulator: qcom_rpm: Fix circular deferral regression commit 8478ed5844588703a1a4c96a004b1525fbdbdd5e upstream. On recent kernels, the PM8058 L16 (or any other PM8058 LDO-regulator) does not come up if they are supplied by an SMPS-regulator. This is not very strange since the regulators are registered in a long array and the L-regulators are registered before the S-regulators, and if an L-regulator defers, it will never get around to registering the S-regulator that it needs. See arch/arm/boot/dts/qcom-apq8060-dragonboard.dts: pm8058-regulators { (...) vdd_l13_l16-supply = <&pm8058_s4>; (...) Ooops. Fix this by moving the PM8058 S-regulators first in the array. Do the same for the PM8901 S-regulators (though this is currently not causing any problems with out device trees) so that the pattern of registration order is the same on all PMnnnn chips. Fixes: 087a1b5cdd55 ("regulator: qcom: Rework to single platform device") Cc: stable@vger.kernel.org Cc: Andy Gross Cc: Bjorn Andersson Cc: Konrad Dybcio Cc: linux-arm-msm@vger.kernel.org Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20220909112529.239143-1-linus.walleij@linaro.org Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/qcom_rpm-regulator.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/regulator/qcom_rpm-regulator.c b/drivers/regulator/qcom_rpm-regulator.c index 7f9d66ac37ff..3c41b71a1f52 100644 --- a/drivers/regulator/qcom_rpm-regulator.c +++ b/drivers/regulator/qcom_rpm-regulator.c @@ -802,6 +802,12 @@ static const struct rpm_regulator_data rpm_pm8018_regulators[] = { }; static const struct rpm_regulator_data rpm_pm8058_regulators[] = { + { "s0", QCOM_RPM_PM8058_SMPS0, &pm8058_smps, "vdd_s0" }, + { "s1", QCOM_RPM_PM8058_SMPS1, &pm8058_smps, "vdd_s1" }, + { "s2", QCOM_RPM_PM8058_SMPS2, &pm8058_smps, "vdd_s2" }, + { "s3", QCOM_RPM_PM8058_SMPS3, &pm8058_smps, "vdd_s3" }, + { "s4", QCOM_RPM_PM8058_SMPS4, &pm8058_smps, "vdd_s4" }, + { "l0", QCOM_RPM_PM8058_LDO0, &pm8058_nldo, "vdd_l0_l1_lvs" }, { "l1", QCOM_RPM_PM8058_LDO1, &pm8058_nldo, "vdd_l0_l1_lvs" }, { "l2", QCOM_RPM_PM8058_LDO2, &pm8058_pldo, "vdd_l2_l11_l12" }, @@ -829,12 +835,6 @@ static const struct rpm_regulator_data rpm_pm8058_regulators[] = { { "l24", QCOM_RPM_PM8058_LDO24, &pm8058_nldo, "vdd_l23_l24_l25" }, { "l25", QCOM_RPM_PM8058_LDO25, &pm8058_nldo, "vdd_l23_l24_l25" }, - { "s0", QCOM_RPM_PM8058_SMPS0, &pm8058_smps, "vdd_s0" }, - { "s1", QCOM_RPM_PM8058_SMPS1, &pm8058_smps, "vdd_s1" }, - { "s2", QCOM_RPM_PM8058_SMPS2, &pm8058_smps, "vdd_s2" }, - { "s3", QCOM_RPM_PM8058_SMPS3, &pm8058_smps, "vdd_s3" }, - { "s4", QCOM_RPM_PM8058_SMPS4, &pm8058_smps, "vdd_s4" }, - { "lvs0", QCOM_RPM_PM8058_LVS0, &pm8058_switch, "vdd_l0_l1_lvs" }, { "lvs1", QCOM_RPM_PM8058_LVS1, &pm8058_switch, "vdd_l0_l1_lvs" }, @@ -843,6 +843,12 @@ static const struct rpm_regulator_data rpm_pm8058_regulators[] = { }; static const struct rpm_regulator_data rpm_pm8901_regulators[] = { + { "s0", QCOM_RPM_PM8901_SMPS0, &pm8901_ftsmps, "vdd_s0" }, + { "s1", QCOM_RPM_PM8901_SMPS1, &pm8901_ftsmps, "vdd_s1" }, + { "s2", QCOM_RPM_PM8901_SMPS2, &pm8901_ftsmps, "vdd_s2" }, + { "s3", QCOM_RPM_PM8901_SMPS3, &pm8901_ftsmps, "vdd_s3" }, + { "s4", QCOM_RPM_PM8901_SMPS4, &pm8901_ftsmps, "vdd_s4" }, + { "l0", QCOM_RPM_PM8901_LDO0, &pm8901_nldo, "vdd_l0" }, { "l1", QCOM_RPM_PM8901_LDO1, &pm8901_pldo, "vdd_l1" }, { "l2", QCOM_RPM_PM8901_LDO2, &pm8901_pldo, "vdd_l2" }, @@ -851,12 +857,6 @@ static const struct rpm_regulator_data rpm_pm8901_regulators[] = { { "l5", QCOM_RPM_PM8901_LDO5, &pm8901_pldo, "vdd_l5" }, { "l6", QCOM_RPM_PM8901_LDO6, &pm8901_pldo, "vdd_l6" }, - { "s0", QCOM_RPM_PM8901_SMPS0, &pm8901_ftsmps, "vdd_s0" }, - { "s1", QCOM_RPM_PM8901_SMPS1, &pm8901_ftsmps, "vdd_s1" }, - { "s2", QCOM_RPM_PM8901_SMPS2, &pm8901_ftsmps, "vdd_s2" }, - { "s3", QCOM_RPM_PM8901_SMPS3, &pm8901_ftsmps, "vdd_s3" }, - { "s4", QCOM_RPM_PM8901_SMPS4, &pm8901_ftsmps, "vdd_s4" }, - { "lvs0", QCOM_RPM_PM8901_LVS0, &pm8901_switch, "lvs0_in" }, { "lvs1", QCOM_RPM_PM8901_LVS1, &pm8901_switch, "lvs1_in" }, { "lvs2", QCOM_RPM_PM8901_LVS2, &pm8901_switch, "lvs2_in" }, From 2ce9fab94b8db61f014e43ddf80dd1524ae6dff4 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Thu, 22 Sep 2022 22:56:06 +0100 Subject: [PATCH 0032/1477] RISC-V: Make port I/O string accessors actually work commit 9cc205e3c17d5716da7ebb7fa0c985555e95d009 upstream. Fix port I/O string accessors such as `insb', `outsb', etc. which use the physical PCI port I/O address rather than the corresponding memory mapping to get at the requested location, which in turn breaks at least accesses made by our parport driver to a PCIe parallel port such as: PCI parallel port detected: 1415:c118, I/O at 0x1000(0x1008), IRQ 20 parport0: PC-style at 0x1000 (0x1008), irq 20, using FIFO [PCSPP,TRISTATE,COMPAT,EPP,ECP] causing a memory access fault: Unable to handle kernel access to user memory without uaccess routines at virtual address 0000000000001008 Oops [#1] Modules linked in: CPU: 1 PID: 350 Comm: cat Not tainted 6.0.0-rc2-00283-g10d4879f9ef0-dirty #23 Hardware name: SiFive HiFive Unmatched A00 (DT) epc : parport_pc_fifo_write_block_pio+0x266/0x416 ra : parport_pc_fifo_write_block_pio+0xb4/0x416 epc : ffffffff80542c3e ra : ffffffff80542a8c sp : ffffffd88899fc60 gp : ffffffff80fa2700 tp : ffffffd882b1e900 t0 : ffffffd883d0b000 t1 : ffffffffff000002 t2 : 4646393043330a38 s0 : ffffffd88899fcf0 s1 : 0000000000001000 a0 : 0000000000000010 a1 : 0000000000000000 a2 : ffffffd883d0a010 a3 : 0000000000000023 a4 : 00000000ffff8fbb a5 : ffffffd883d0a001 a6 : 0000000100000000 a7 : ffffffc800000000 s2 : ffffffffff000002 s3 : ffffffff80d28880 s4 : ffffffff80fa1f50 s5 : 0000000000001008 s6 : 0000000000000008 s7 : ffffffd883d0a000 s8 : 0004000000000000 s9 : ffffffff80dc1d80 s10: ffffffd8807e4000 s11: 0000000000000000 t3 : 00000000000000ff t4 : 393044410a303930 t5 : 0000000000001000 t6 : 0000000000040000 status: 0000000200000120 badaddr: 0000000000001008 cause: 000000000000000f [] parport_pc_compat_write_block_pio+0xfe/0x200 [] parport_write+0x46/0xf8 [] lp_write+0x158/0x2d2 [] vfs_write+0x8e/0x2c2 [] ksys_write+0x52/0xc2 [] sys_write+0xe/0x16 [] ret_from_syscall+0x0/0x2 ---[ end trace 0000000000000000 ]--- For simplicity address the problem by adding PCI_IOBASE to the physical address requested in the respective wrapper macros only, observing that the raw accessors such as `__insb', `__outsb', etc. are not supposed to be used other than by said macros. Remove the cast to `long' that is no longer needed on `addr' now that it is used as an offset from PCI_IOBASE and add parentheses around `addr' needed for predictable evaluation in macro expansion. No need to make said adjustments in separate changes given that current code is gravely broken and does not ever work. Signed-off-by: Maciej W. Rozycki Fixes: fab957c11efe2 ("RISC-V: Atomic and Locking Code") Cc: stable@vger.kernel.org # v4.15+ Reviewed-by: Arnd Bergmann Link: https://lore.kernel.org/r/alpine.DEB.2.21.2209220223080.29493@angie.orcam.me.uk Signed-off-by: Palmer Dabbelt Signed-off-by: Greg Kroah-Hartman --- arch/riscv/include/asm/io.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/riscv/include/asm/io.h b/arch/riscv/include/asm/io.h index c025a746a148..391dd869db64 100644 --- a/arch/riscv/include/asm/io.h +++ b/arch/riscv/include/asm/io.h @@ -114,9 +114,9 @@ __io_reads_ins(reads, u32, l, __io_br(), __io_ar(addr)) __io_reads_ins(ins, u8, b, __io_pbr(), __io_par(addr)) __io_reads_ins(ins, u16, w, __io_pbr(), __io_par(addr)) __io_reads_ins(ins, u32, l, __io_pbr(), __io_par(addr)) -#define insb(addr, buffer, count) __insb((void __iomem *)(long)addr, buffer, count) -#define insw(addr, buffer, count) __insw((void __iomem *)(long)addr, buffer, count) -#define insl(addr, buffer, count) __insl((void __iomem *)(long)addr, buffer, count) +#define insb(addr, buffer, count) __insb(PCI_IOBASE + (addr), buffer, count) +#define insw(addr, buffer, count) __insw(PCI_IOBASE + (addr), buffer, count) +#define insl(addr, buffer, count) __insl(PCI_IOBASE + (addr), buffer, count) __io_writes_outs(writes, u8, b, __io_bw(), __io_aw()) __io_writes_outs(writes, u16, w, __io_bw(), __io_aw()) @@ -128,22 +128,22 @@ __io_writes_outs(writes, u32, l, __io_bw(), __io_aw()) __io_writes_outs(outs, u8, b, __io_pbw(), __io_paw()) __io_writes_outs(outs, u16, w, __io_pbw(), __io_paw()) __io_writes_outs(outs, u32, l, __io_pbw(), __io_paw()) -#define outsb(addr, buffer, count) __outsb((void __iomem *)(long)addr, buffer, count) -#define outsw(addr, buffer, count) __outsw((void __iomem *)(long)addr, buffer, count) -#define outsl(addr, buffer, count) __outsl((void __iomem *)(long)addr, buffer, count) +#define outsb(addr, buffer, count) __outsb(PCI_IOBASE + (addr), buffer, count) +#define outsw(addr, buffer, count) __outsw(PCI_IOBASE + (addr), buffer, count) +#define outsl(addr, buffer, count) __outsl(PCI_IOBASE + (addr), buffer, count) #ifdef CONFIG_64BIT __io_reads_ins(reads, u64, q, __io_br(), __io_ar(addr)) #define readsq(addr, buffer, count) __readsq(addr, buffer, count) __io_reads_ins(ins, u64, q, __io_pbr(), __io_par(addr)) -#define insq(addr, buffer, count) __insq((void __iomem *)addr, buffer, count) +#define insq(addr, buffer, count) __insq(PCI_IOBASE + (addr), buffer, count) __io_writes_outs(writes, u64, q, __io_bw(), __io_aw()) #define writesq(addr, buffer, count) __writesq(addr, buffer, count) __io_writes_outs(outs, u64, q, __io_pbr(), __io_paw()) -#define outsq(addr, buffer, count) __outsq((void __iomem *)addr, buffer, count) +#define outsq(addr, buffer, count) __outsq(PCI_IOBASE + (addr), buffer, count) #endif #include From a6dcc6cfa2934afe2133f44241a3a1b4c758cc2b Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 14 Oct 2022 10:13:55 +0200 Subject: [PATCH 0033/1477] parisc: fbdev/stifb: Align graphics memory size to 4MB commit aca7c13d3bee81a968337a5515411409ae9d095d upstream. Independend of the current graphics resolution, adjust the reported graphics card memory size to the next 4MB boundary. This fixes the fbtest program which expects a naturally aligned size. Signed-off-by: Helge Deller Cc: Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/stifb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/stifb.c b/drivers/video/fbdev/stifb.c index 002f265d8db5..b0470f4f595e 100644 --- a/drivers/video/fbdev/stifb.c +++ b/drivers/video/fbdev/stifb.c @@ -1257,7 +1257,7 @@ static int __init stifb_init_fb(struct sti_struct *sti, int bpp_pref) /* limit fbsize to max visible screen size */ if (fix->smem_len > yres*fix->line_length) - fix->smem_len = yres*fix->line_length; + fix->smem_len = ALIGN(yres*fix->line_length, 4*1024*1024); fix->accel = FB_ACCEL_NONE; From d8c6f9b2e1948700e9e0c33b4829de9fbf8d547e Mon Sep 17 00:00:00 2001 From: Andrew Bresticker Date: Thu, 15 Sep 2022 15:37:02 -0400 Subject: [PATCH 0034/1477] riscv: Allow PROT_WRITE-only mmap() commit 9e2e6042a7ec6504fe8e366717afa2f40cf16488 upstream. Commit 2139619bcad7 ("riscv: mmap with PROT_WRITE but no PROT_READ is invalid") made mmap() return EINVAL if PROT_WRITE was set wihtout PROT_READ with the justification that a write-only PTE is considered a reserved PTE permission bit pattern in the privileged spec. This check is unnecessary since we let VM_WRITE imply VM_READ on RISC-V, and it is inconsistent with other architectures that don't support write-only PTEs, creating a potential software portability issue. Just remove the check altogether and let PROT_WRITE imply PROT_READ as is the case on other architectures. Note that this also allows PROT_WRITE|PROT_EXEC mappings which were disallowed prior to the aforementioned commit; PROT_READ is implied in such mappings as well. Fixes: 2139619bcad7 ("riscv: mmap with PROT_WRITE but no PROT_READ is invalid") Reviewed-by: Atish Patra Signed-off-by: Andrew Bresticker Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220915193702.2201018-3-abrestic@rivosinc.com/ Signed-off-by: Palmer Dabbelt Signed-off-by: Greg Kroah-Hartman --- arch/riscv/kernel/sys_riscv.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c index 8a7880b9c433..bb402685057a 100644 --- a/arch/riscv/kernel/sys_riscv.c +++ b/arch/riscv/kernel/sys_riscv.c @@ -18,9 +18,6 @@ static long riscv_sys_mmap(unsigned long addr, unsigned long len, if (unlikely(offset & (~PAGE_MASK >> page_shift_offset))) return -EINVAL; - if (unlikely((prot & PROT_WRITE) && !(prot & PROT_READ))) - return -EINVAL; - return ksys_mmap_pgoff(addr, len, prot, flags, fd, offset >> (PAGE_SHIFT - page_shift_offset)); } From d15dca1d46ab6fcdb17914cf780989bdd49db9f5 Mon Sep 17 00:00:00 2001 From: Andrew Bresticker Date: Thu, 15 Sep 2022 15:37:01 -0400 Subject: [PATCH 0035/1477] riscv: Make VM_WRITE imply VM_READ commit 7ab72c597356be1e7f0f3d856e54ce78527f43c8 upstream. RISC-V does not presently have write-only mappings as that PTE bit pattern is considered reserved in the privileged spec, so allow handling of read faults in VMAs that have VM_WRITE without VM_READ in order to be consistent with other architectures that have similar limitations. Fixes: 2139619bcad7 ("riscv: mmap with PROT_WRITE but no PROT_READ is invalid") Reviewed-by: Atish Patra Signed-off-by: Andrew Bresticker Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220915193702.2201018-2-abrestic@rivosinc.com/ Signed-off-by: Palmer Dabbelt Signed-off-by: Greg Kroah-Hartman --- arch/riscv/mm/fault.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 3c8b9e433c67..8f84bbe0ac33 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -167,7 +167,8 @@ static inline bool access_error(unsigned long cause, struct vm_area_struct *vma) } break; case EXC_LOAD_PAGE_FAULT: - if (!(vma->vm_flags & VM_READ)) { + /* Write implies read */ + if (!(vma->vm_flags & (VM_READ | VM_WRITE))) { return true; } break; From 1a053f597f42941eeebefc1b686e7c2ccd24104e Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sun, 18 Sep 2022 02:29:34 -0700 Subject: [PATCH 0036/1477] riscv: Pass -mno-relax only on lld < 15.0.0 commit 3cebf80e9a0d3adcb174053be32c88a640b3344b upstream. lld since llvm:6611d58f5bbc ("[ELF] Relax R_RISCV_ALIGN"), which will be included in the 15.0.0 release, has implemented some RISC-V linker relaxation. -mno-relax is no longer needed in KBUILD_CFLAGS/KBUILD_AFLAGS to suppress R_RISCV_ALIGN which older lld can not handle: ld.lld: error: capability.c:(.fixup+0x0): relocation R_RISCV_ALIGN requires unimplemented linker relaxation; recompile with -mno-relax but the .o is already compiled with -mno-relax Signed-off-by: Fangrui Song Link: https://lore.kernel.org/r/20220710071117.446112-1-maskray@google.com/ Link: https://lore.kernel.org/r/20220918092933.19943-1-palmer@rivosinc.com Reviewed-by: Nick Desaulniers Tested-by: Nick Desaulniers Tested-by: Nathan Chancellor Tested-by: Conor Dooley Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt Signed-off-by: Greg Kroah-Hartman --- arch/riscv/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 3d3016092b31..1bb1bf1141cc 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -37,6 +37,7 @@ else endif ifeq ($(CONFIG_LD_IS_LLD),y) +ifeq ($(shell test $(CONFIG_LLD_VERSION) -lt 150000; echo $$?),0) KBUILD_CFLAGS += -mno-relax KBUILD_AFLAGS += -mno-relax ifneq ($(LLVM_IAS),1) @@ -44,6 +45,7 @@ ifneq ($(LLVM_IAS),1) KBUILD_AFLAGS += -Wa,-mno-relax endif endif +endif # ISA string setting riscv-march-$(CONFIG_ARCH_RV32I) := rv32ima From 6a73e6edcbf3cdd82796dcdf0c0f5fe5d91021af Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 12 Jul 2022 15:52:55 +0800 Subject: [PATCH 0037/1477] UM: cpuinfo: Fix a warning for CONFIG_CPUMASK_OFFSTACK commit 16c546e148fa6d14a019431436a6f7b4087dbccd upstream. When CONFIG_CPUMASK_OFFSTACK and CONFIG_DEBUG_PER_CPU_MAPS is selected, cpu_max_bits_warn() generates a runtime warning similar as below while we show /proc/cpuinfo. Fix this by using nr_cpu_ids (the runtime limit) instead of NR_CPUS to iterate CPUs. [ 3.052463] ------------[ cut here ]------------ [ 3.059679] WARNING: CPU: 3 PID: 1 at include/linux/cpumask.h:108 show_cpuinfo+0x5e8/0x5f0 [ 3.070072] Modules linked in: efivarfs autofs4 [ 3.076257] CPU: 0 PID: 1 Comm: systemd Not tainted 5.19-rc5+ #1052 [ 3.099465] Stack : 9000000100157b08 9000000000f18530 9000000000cf846c 9000000100154000 [ 3.109127] 9000000100157a50 0000000000000000 9000000100157a58 9000000000ef7430 [ 3.118774] 90000001001578e8 0000000000000040 0000000000000020 ffffffffffffffff [ 3.128412] 0000000000aaaaaa 1ab25f00eec96a37 900000010021de80 900000000101c890 [ 3.138056] 0000000000000000 0000000000000000 0000000000000000 0000000000aaaaaa [ 3.147711] ffff8000339dc220 0000000000000001 0000000006ab4000 0000000000000000 [ 3.157364] 900000000101c998 0000000000000004 9000000000ef7430 0000000000000000 [ 3.167012] 0000000000000009 000000000000006c 0000000000000000 0000000000000000 [ 3.176641] 9000000000d3de08 9000000001639390 90000000002086d8 00007ffff0080286 [ 3.186260] 00000000000000b0 0000000000000004 0000000000000000 0000000000071c1c [ 3.195868] ... [ 3.199917] Call Trace: [ 3.203941] [<90000000002086d8>] show_stack+0x38/0x14c [ 3.210666] [<9000000000cf846c>] dump_stack_lvl+0x60/0x88 [ 3.217625] [<900000000023d268>] __warn+0xd0/0x100 [ 3.223958] [<9000000000cf3c90>] warn_slowpath_fmt+0x7c/0xcc [ 3.231150] [<9000000000210220>] show_cpuinfo+0x5e8/0x5f0 [ 3.238080] [<90000000004f578c>] seq_read_iter+0x354/0x4b4 [ 3.245098] [<90000000004c2e90>] new_sync_read+0x17c/0x1c4 [ 3.252114] [<90000000004c5174>] vfs_read+0x138/0x1d0 [ 3.258694] [<90000000004c55f8>] ksys_read+0x70/0x100 [ 3.265265] [<9000000000cfde9c>] do_syscall+0x7c/0x94 [ 3.271820] [<9000000000202fe4>] handle_syscall+0xc4/0x160 [ 3.281824] ---[ end trace 8b484262b4b8c24c ]--- Cc: stable@vger.kernel.org Signed-off-by: Huacai Chen Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- arch/um/kernel/um_arch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 52e2e2a3e4ae..00c6dce14bd2 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -77,7 +77,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) static void *c_start(struct seq_file *m, loff_t *pos) { - return *pos < NR_CPUS ? cpu_data + *pos : NULL; + return *pos < nr_cpu_ids ? cpu_data + *pos : NULL; } static void *c_next(struct seq_file *m, void *v, loff_t *pos) From 303436e301ba72021c3894c7a44bcfc1dcf17122 Mon Sep 17 00:00:00 2001 From: Rishabh Bhatnagar Date: Tue, 20 Sep 2022 19:19:32 +0000 Subject: [PATCH 0038/1477] nvme-pci: set min_align_mask before calculating max_hw_sectors commit 61ce339f19fabbc3e51237148a7ef6f2270e44fa upstream. If swiotlb is force enabled dma_max_mapping_size ends up calling swiotlb_max_mapping_size which takes into account the min align mask for the device. Set the min align mask for nvme driver before calling dma_max_mapping_size while calculating max hw sectors. Signed-off-by: Rishabh Bhatnagar Signed-off-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/host/pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index ce129655ef0a..65f4bf880608 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2624,6 +2624,8 @@ static void nvme_reset_work(struct work_struct *work) if (result) goto out_unlock; + dma_set_min_align_mask(dev->dev, NVME_CTRL_PAGE_SIZE - 1); + /* * Limit the max command size to prevent iod->sg allocations going * over a single page. @@ -2636,7 +2638,6 @@ static void nvme_reset_work(struct work_struct *work) * Don't limit the IOMMU merged segment size. */ dma_set_max_seg_size(dev->dev, 0xffffffff); - dma_set_min_align_mask(dev->dev, NVME_CTRL_PAGE_SIZE - 1); mutex_unlock(&dev->shutdown_lock); From f134f261d76ae3d5ecf68db642eaa746ceb84cfb Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 30 Jun 2022 23:07:19 +0300 Subject: [PATCH 0039/1477] drm/virtio: Check whether transferred 2D BO is shmem commit e473216b42aa1fd9fc6b94b608b42c210c655908 upstream. Transferred 2D BO always must be a shmem BO. Add check for that to prevent NULL dereference if userspace passes a VRAM BO. Cc: stable@vger.kernel.org Reviewed-by: Emil Velikov Signed-off-by: Dmitry Osipenko Link: http://patchwork.freedesktop.org/patch/msgid/20220630200726.1884320-3-dmitry.osipenko@collabora.com Signed-off-by: Gerd Hoffmann Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/virtio/virtgpu_vq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c index 5e40fa0f5e8f..e98a29d243c0 100644 --- a/drivers/gpu/drm/virtio/virtgpu_vq.c +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c @@ -601,7 +601,7 @@ void virtio_gpu_cmd_transfer_to_host_2d(struct virtio_gpu_device *vgdev, bool use_dma_api = !virtio_has_dma_quirk(vgdev->vdev); struct virtio_gpu_object_shmem *shmem = to_virtio_gpu_shmem(bo); - if (use_dma_api) + if (virtio_gpu_is_shmem(bo) && use_dma_api) dma_sync_sgtable_for_device(vgdev->vdev->dev.parent, shmem->pages, DMA_TO_DEVICE); From 0a129790893b765bef4ba02fcdeb73cbc56d5b99 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 8 Sep 2022 11:51:04 +0200 Subject: [PATCH 0040/1477] drm/udl: Restore display mode on resume commit 6d6e732835db92e66c28dbcf258a7e3d3c71420d upstream. Restore the display mode whne resuming from suspend. Currently, the display remains dark. On resume, the CRTC's mode does not change, but the 'active' flag changes to 'true'. Taking this into account when considering a mode switch restores the display mode. The bug is reproducable by using Gnome with udl and observing the adapter's suspend/resume behavior. Actually, the whole check added in udl_simple_display_pipe_enable() about the crtc_state->mode_changed was bogus. We should drop the whole check and always apply the mode change in this function. [ tiwai -- Drop the mode_changed check entirely instead, per Daniel's suggestion ] Fixes: 997d33c35618 ("drm/udl: Inline DPMS code into CRTC enable and disable functions") Cc: Suggested-by: Daniel Vetter Reviewed-by: Daniel Vetter Signed-off-by: Takashi Iwai Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20220908095115.23396-2-tiwai@suse.de Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/udl/udl_modeset.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/udl/udl_modeset.c b/drivers/gpu/drm/udl/udl_modeset.c index edcfd8c120c4..209b5ceba3e0 100644 --- a/drivers/gpu/drm/udl/udl_modeset.c +++ b/drivers/gpu/drm/udl/udl_modeset.c @@ -400,9 +400,6 @@ udl_simple_display_pipe_enable(struct drm_simple_display_pipe *pipe, udl_handle_damage(fb, 0, 0, fb->width, fb->height); - if (!crtc_state->mode_changed) - return; - /* enable display */ udl_crtc_write_mode_to_hw(crtc); } From 7d551b7d611493bcb964722e65c98d6b2b26a0d1 Mon Sep 17 00:00:00 2001 From: Jeffle Xu Date: Wed, 2 Dec 2020 19:11:45 +0800 Subject: [PATCH 0041/1477] block: fix inflight statistics of part0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b0d97557ebfc9d5ba5f2939339a9fdd267abafeb upstream. The inflight of partition 0 doesn't include inflight IOs to all sub-partitions, since currently mq calculates inflight of specific partition by simply camparing the value of the partition pointer. Thus the following case is possible: $ cat /sys/block/vda/inflight        0        0 $ cat /sys/block/vda/vda1/inflight        0      128 While single queue device (on a previous version, e.g. v3.10) has no this issue: $cat /sys/block/sda/sda3/inflight 0 33 $cat /sys/block/sda/inflight 0 33 Partition 0 should be specially handled since it represents the whole disk. This issue is introduced since commit bf0ddaba65dd ("blk-mq: fix sysfs inflight counter"). Besides, this patch can also fix the inflight statistics of part 0 in /proc/diskstats. Before this patch, the inflight statistics of part 0 doesn't include that of sub partitions. (I have marked the 'inflight' field with asterisk.) $cat /proc/diskstats 259 0 nvme0n1 45974469 0 367814768 6445794 1 0 1 0 *0* 111062 6445794 0 0 0 0 0 0 259 2 nvme0n1p1 45974058 0 367797952 6445727 0 0 0 0 *33* 111001 6445727 0 0 0 0 0 0 This is introduced since commit f299b7c7a9de ("blk-mq: provide internal in-flight variant"). Fixes: bf0ddaba65dd ("blk-mq: fix sysfs inflight counter") Fixes: f299b7c7a9de ("blk-mq: provide internal in-flight variant") Signed-off-by: Jeffle Xu Reviewed-by: Christoph Hellwig [axboe: adapt for 5.11 partition change] Signed-off-by: Jens Axboe [khazhy: adapt for 5.10 partition] Signed-off-by: Khazhismel Kumykov Signed-off-by: Greg Kroah-Hartman --- block/blk-mq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index cfc039fabf8c..e37ba792902a 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -105,7 +105,8 @@ static bool blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx, { struct mq_inflight *mi = priv; - if (rq->part == mi->part && blk_mq_rq_state(rq) == MQ_RQ_IN_FLIGHT) + if ((!mi->part->partno || rq->part == mi->part) && + blk_mq_rq_state(rq) == MQ_RQ_IN_FLIGHT) mi->inflight[rq_data_dir(rq)]++; return true; From a3c08c021778dad30f69895e378843e9f423d734 Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Fri, 30 Sep 2022 00:38:43 +0000 Subject: [PATCH 0042/1477] mm/mmap: undo ->mmap() when arch_validate_flags() fails commit deb0f6562884b5b4beb883d73e66a7d3a1b96d99 upstream. Commit c462ac288f2c ("mm: Introduce arch_validate_flags()") added a late check in mmap_region() to let architectures validate vm_flags. The check needs to happen after calling ->mmap() as the flags can potentially be modified during this callback. If arch_validate_flags() check fails we unmap and free the vma. However, the error path fails to undo the ->mmap() call that previously succeeded and depending on the specific ->mmap() implementation this translates to reference increments, memory allocations and other operations what will not be cleaned up. There are several places (mainly device drivers) where this is an issue. However, one specific example is bpf_map_mmap() which keeps count of the mappings in map->writecnt. The count is incremented on ->mmap() and then decremented on vm_ops->close(). When arch_validate_flags() fails this count is off since bpf_map_mmap_close() is never called. One can reproduce this issue in arm64 devices with MTE support. Here the vm_flags are checked to only allow VM_MTE if VM_MTE_ALLOWED has been set previously. From userspace then is enough to pass the PROT_MTE flag to mmap() syscall to trigger the arch_validate_flags() failure. The following program reproduces this issue: #include #include #include #include #include int main(void) { union bpf_attr attr = { .map_type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(int), .value_size = sizeof(long long), .max_entries = 256, .map_flags = BPF_F_MMAPABLE, }; int fd; fd = syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr)); mmap(NULL, 4096, PROT_WRITE | PROT_MTE, MAP_SHARED, fd, 0); return 0; } By manually adding some log statements to the vm_ops callbacks we can confirm that when passing PROT_MTE to mmap() the map->writecnt is off upon ->release(): With PROT_MTE flag: root@debian:~# ./bpf-test [ 111.263874] bpf_map_write_active_inc: map=9 writecnt=1 [ 111.288763] bpf_map_release: map=9 writecnt=1 Without PROT_MTE flag: root@debian:~# ./bpf-test [ 157.816912] bpf_map_write_active_inc: map=10 writecnt=1 [ 157.830442] bpf_map_write_active_dec: map=10 writecnt=0 [ 157.832396] bpf_map_release: map=10 writecnt=0 This patch fixes the above issue by calling vm_ops->close() when the arch_validate_flags() check fails, after this we can proceed to unmap and free the vma on the error path. Link: https://lkml.kernel.org/r/20220930003844.1210987-1-cmllamas@google.com Fixes: c462ac288f2c ("mm: Introduce arch_validate_flags()") Signed-off-by: Carlos Llamas Reviewed-by: Catalin Marinas Acked-by: Andrii Nakryiko Reviewed-by: Liam Howlett Cc: Christian Brauner (Microsoft) Cc: Michal Hocko Cc: Suren Baghdasaryan Cc: [5.10+] Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/mmap.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/mmap.c b/mm/mmap.c index 31fc116a8ec9..33ebda8385b9 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1856,7 +1856,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, if (!arch_validate_flags(vma->vm_flags)) { error = -EINVAL; if (file) - goto unmap_and_free_vma; + goto close_and_free_vma; else goto free_vma; } @@ -1900,6 +1900,9 @@ out: return addr; +close_and_free_vma: + if (vma->vm_ops && vma->vm_ops->close) + vma->vm_ops->close(vma); unmap_and_free_vma: vma->vm_file = NULL; fput(file); From 9119a92ad93eae59ac3238949976ccd568baee1f Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Wed, 21 Sep 2022 20:49:16 +0100 Subject: [PATCH 0043/1477] PCI: Sanitise firmware BAR assignments behind a PCI-PCI bridge commit 0e32818397426a688f598f35d3bc762eca6d7592 upstream. When pci_assign_resource() is unable to assign resources to a BAR, it uses pci_revert_fw_address() to fall back to a firmware assignment (if any). Previously pci_revert_fw_address() assumed all addresses could reach the device, but this is not true if the device is below a bridge that only forwards addresses within its windows. This problem was observed on a Tyan Tomcat IV S1564D system where the BIOS did not assign valid addresses to several bridges and USB devices: pci 0000:00:11.0: PCI-to-PCIe bridge to [bus 01-ff] pci 0000:00:11.0: bridge window [io 0xe000-0xefff] pci 0000:01:00.0: PCIe Upstream Port to [bus 02-ff] pci 0000:01:00.0: bridge window [io 0x0000-0x0fff] # unreachable pci 0000:02:02.0: PCIe Downstream Port to [bus 05-ff] pci 0000:02:02.0: bridge window [io 0x0000-0x0fff] # unreachable pci 0000:05:00.0: PCIe-to-PCI bridge to [bus 06-ff] pci 0000:05:00.0: bridge window [io 0x0000-0x0fff] # unreachable pci 0000:06:08.0: USB UHCI 1.1 pci 0000:06:08.0: BAR 4: [io 0xfce0-0xfcff] # unreachable pci 0000:06:08.1: USB UHCI 1.1 pci 0000:06:08.1: BAR 4: [io 0xfce0-0xfcff] # unreachable pci 0000:06:08.0: can't claim BAR 4 [io 0xfce0-0xfcff]: no compatible bridge window pci 0000:06:08.1: can't claim BAR 4 [io 0xfce0-0xfcff]: no compatible bridge window During the first pass of assigning unassigned resources, there was not enough I/O space available, so we couldn't assign the 06:08.0 BAR and reverted to the firmware assignment (still unreachable). Reverting the 06:08.1 assignment failed because it conflicted with 06:08.0: pci 0000:00:11.0: bridge window [io 0xe000-0xefff] pci 0000:01:00.0: no space for bridge window [io size 0x2000] pci 0000:02:02.0: no space for bridge window [io size 0x1000] pci 0000:05:00.0: no space for bridge window [io size 0x1000] pci 0000:06:08.0: BAR 4: no space for [io size 0x0020] pci 0000:06:08.0: BAR 4: trying firmware assignment [io 0xfce0-0xfcff] pci 0000:06:08.1: BAR 4: no space for [io size 0x0020] pci 0000:06:08.1: BAR 4: trying firmware assignment [io 0xfce0-0xfcff] pci 0000:06:08.1: BAR 4: [io 0xfce0-0xfcff] conflicts with 0000:06:08.0 [io 0xfce0-0xfcff] A subsequent pass assigned valid bridge windows and a valid 06:08.1 BAR, but left the 06:08.0 BAR alone, so the UHCI device was still unusable: pci 0000:00:11.0: bridge window [io 0xe000-0xefff] released pci 0000:00:11.0: bridge window [io 0x1000-0x2fff] # reassigned pci 0000:01:00.0: bridge window [io 0x1000-0x2fff] # reassigned pci 0000:02:02.0: bridge window [io 0x2000-0x2fff] # reassigned pci 0000:05:00.0: bridge window [io 0x2000-0x2fff] # reassigned pci 0000:06:08.0: BAR 4: assigned [io 0xfce0-0xfcff] # left alone pci 0000:06:08.1: BAR 4: assigned [io 0x2000-0x201f] ... uhci_hcd 0000:06:08.0: host system error, PCI problems? uhci_hcd 0000:06:08.0: host controller process error, something bad happened! uhci_hcd 0000:06:08.0: host controller halted, very bad! uhci_hcd 0000:06:08.0: HCRESET not completed yet! uhci_hcd 0000:06:08.0: HC died; cleaning up If the address assigned by firmware is not reachable because it's not within upstream bridge windows, fail instead of assigning the unusable address from firmware. [bhelgaas: commit log, use pci_upstream_bridge()] Link: https://bugzilla.kernel.org/show_bug.cgi?id=16263 Link: https://lore.kernel.org/r/alpine.DEB.2.21.2203012338460.46819@angie.orcam.me.uk Link: https://lore.kernel.org/r/alpine.DEB.2.21.2209211921250.29493@angie.orcam.me.uk Fixes: 58c84eda0756 ("PCI: fall back to original BIOS BAR addresses") Signed-off-by: Maciej W. Rozycki Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org # v2.6.35+ Signed-off-by: Greg Kroah-Hartman --- drivers/pci/setup-res.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index 7f1acb3918d0..875d50c16f19 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -210,6 +210,17 @@ static int pci_revert_fw_address(struct resource *res, struct pci_dev *dev, root = pci_find_parent_resource(dev, res); if (!root) { + /* + * If dev is behind a bridge, accesses will only reach it + * if res is inside the relevant bridge window. + */ + if (pci_upstream_bridge(dev)) + return -ENXIO; + + /* + * On the root bus, assume the host bridge will forward + * everything. + */ if (res->flags & IORESOURCE_IO) root = &ioport_resource; else From 7db60fd46e0c8492f72551010f50fc6b39909fbb Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Sat, 24 Sep 2022 13:47:36 +0800 Subject: [PATCH 0044/1477] powercap: intel_rapl: Use standard Energy Unit for SPR Dram RAPL domain commit 4c081324df5608b73428662ca54d5221ea03a6bd upstream. Intel Xeon servers used to use a fixed energy resolution (15.3uj) for Dram RAPL domain. But on SPR, Dram RAPL domain follows the standard energy resolution as described in MSR_RAPL_POWER_UNIT. Remove the SPR dram_domain_energy_unit quirk. Fixes: 2d798d9f5967 ("powercap: intel_rapl: add support for Sapphire Rapids") Signed-off-by: Zhang Rui Tested-by: Wang Wendy Cc: 5.9+ # 5.9+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/powercap/intel_rapl_common.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index 70d6d52bc1e2..a13a07f475d2 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -979,7 +979,6 @@ static const struct rapl_defaults rapl_defaults_spr_server = { .check_unit = rapl_check_unit_core, .set_floor_freq = set_floor_freq_default, .compute_time_window = rapl_compute_time_window_core, - .dram_domain_energy_unit = 15300, .psys_domain_energy_unit = 1000000000, }; From 102c4b6e8c4b5ba8571df8f88d45730ebcb7f5a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Sat, 27 Aug 2022 15:44:54 +0200 Subject: [PATCH 0045/1477] powerpc/boot: Explicitly disable usage of SPE instructions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 110a58b9f91c66f743c01a2c217243d94c899c23 upstream. uImage boot wrapper should not use SPE instructions, like kernel itself. Boot wrapper has already disabled Altivec and VSX instructions but not SPE. Options -mno-spe and -mspe=no already set when compilation of kernel, but not when compiling uImage wrapper yet. Fix it. Cc: stable@vger.kernel.org Signed-off-by: Pali Rohár Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220827134454.17365-1-pali@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/boot/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index e4b364b5da9e..8b78eba755f9 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -30,6 +30,7 @@ endif BOOTCFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ -fno-strict-aliasing -O2 -msoft-float -mno-altivec -mno-vsx \ + $(call cc-option,-mno-spe) $(call cc-option,-mspe=no) \ -pipe -fomit-frame-pointer -fno-builtin -fPIC -nostdinc \ $(LINUXINCLUDE) From 9931bd05bb8d1d8602d3834bb6ba0c6d87a451b7 Mon Sep 17 00:00:00 2001 From: Saurav Kashyap Date: Mon, 19 Sep 2022 06:44:34 -0700 Subject: [PATCH 0046/1477] scsi: qedf: Populate sysfs attributes for vport commit 592642e6b11e620e4b43189f8072752429fc8dc3 upstream. Few vport parameters were displayed by systool as 'Unknown' or 'NULL'. Copy speed, supported_speed, frame_size and update port_type for NPIV port. Link: https://lore.kernel.org/r/20220919134434.3513-1-njavali@marvell.com Cc: stable@vger.kernel.org Tested-by: Guangwu Zhang Reviewed-by: John Meneghini Signed-off-by: Saurav Kashyap Signed-off-by: Nilesh Javali Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qedf/qedf_main.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c index de5b6453827c..f48ef47546f4 100644 --- a/drivers/scsi/qedf/qedf_main.c +++ b/drivers/scsi/qedf/qedf_main.c @@ -1917,6 +1917,27 @@ static int qedf_vport_create(struct fc_vport *vport, bool disabled) fc_vport_setlink(vn_port); } + /* Set symbolic node name */ + if (base_qedf->pdev->device == QL45xxx) + snprintf(fc_host_symbolic_name(vn_port->host), 256, + "Marvell FastLinQ 45xxx FCoE v%s", QEDF_VERSION); + + if (base_qedf->pdev->device == QL41xxx) + snprintf(fc_host_symbolic_name(vn_port->host), 256, + "Marvell FastLinQ 41xxx FCoE v%s", QEDF_VERSION); + + /* Set supported speed */ + fc_host_supported_speeds(vn_port->host) = n_port->link_supported_speeds; + + /* Set speed */ + vn_port->link_speed = n_port->link_speed; + + /* Set port type */ + fc_host_port_type(vn_port->host) = FC_PORTTYPE_NPIV; + + /* Set maxframe size */ + fc_host_maxframe_size(vn_port->host) = n_port->mfs; + QEDF_INFO(&(base_qedf->dbg_ctx), QEDF_LOG_NPIV, "vn_port=%p.\n", vn_port); From e50472949604f385e09ce3fa4e74dce9f44fb19b Mon Sep 17 00:00:00 2001 From: Hyunwoo Kim Date: Sun, 25 Sep 2022 06:32:43 -0700 Subject: [PATCH 0047/1477] fbdev: smscufx: Fix use-after-free in ufx_ops_open() commit 5610bcfe8693c02e2e4c8b31427f1bdbdecc839c upstream. A race condition may occur if the user physically removes the USB device while calling open() for this device node. This is a race condition between the ufx_ops_open() function and the ufx_usb_disconnect() function, which may eventually result in UAF. So, add a mutex to the ufx_ops_open() and ufx_usb_disconnect() functions to avoid race contidion of krefs. Signed-off-by: Hyunwoo Kim Cc: stable@vger.kernel.org Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/smscufx.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/video/fbdev/smscufx.c b/drivers/video/fbdev/smscufx.c index 28768c272b73..7673db5da26b 100644 --- a/drivers/video/fbdev/smscufx.c +++ b/drivers/video/fbdev/smscufx.c @@ -137,6 +137,8 @@ static int ufx_submit_urb(struct ufx_data *dev, struct urb * urb, size_t len); static int ufx_alloc_urb_list(struct ufx_data *dev, int count, size_t size); static void ufx_free_urb_list(struct ufx_data *dev); +static DEFINE_MUTEX(disconnect_mutex); + /* reads a control register */ static int ufx_reg_read(struct ufx_data *dev, u32 index, u32 *data) { @@ -1070,9 +1072,13 @@ static int ufx_ops_open(struct fb_info *info, int user) if (user == 0 && !console) return -EBUSY; + mutex_lock(&disconnect_mutex); + /* If the USB device is gone, we don't accept new opens */ - if (dev->virtualized) + if (dev->virtualized) { + mutex_unlock(&disconnect_mutex); return -ENODEV; + } dev->fb_count++; @@ -1096,6 +1102,8 @@ static int ufx_ops_open(struct fb_info *info, int user) pr_debug("open /dev/fb%d user=%d fb_info=%p count=%d", info->node, user, info, dev->fb_count); + mutex_unlock(&disconnect_mutex); + return 0; } @@ -1740,6 +1748,8 @@ static void ufx_usb_disconnect(struct usb_interface *interface) { struct ufx_data *dev; + mutex_lock(&disconnect_mutex); + dev = usb_get_intfdata(interface); pr_debug("USB disconnect starting\n"); @@ -1760,6 +1770,8 @@ static void ufx_usb_disconnect(struct usb_interface *interface) kref_put(&dev->kref, ufx_free); /* consider ufx_data freed */ + + mutex_unlock(&disconnect_mutex); } static struct usb_driver ufx_driver = { From 47b5ffe86332af95f0f52be0a63d4da7c2b37b55 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 23 Aug 2022 12:45:42 +0100 Subject: [PATCH 0048/1477] btrfs: fix race between quota enable and quota rescan ioctl commit 331cd9461412e103d07595a10289de90004ac890 upstream. When enabling quotas, at btrfs_quota_enable(), after committing the transaction, we change fs_info->quota_root to point to the quota root we created and set BTRFS_FS_QUOTA_ENABLED at fs_info->flags. Then we try to start the qgroup rescan worker, first by initializing it with a call to qgroup_rescan_init() - however if that fails we end up freeing the quota root but we leave fs_info->quota_root still pointing to it, this can later result in a use-after-free somewhere else. We have previously set the flags BTRFS_FS_QUOTA_ENABLED and BTRFS_QGROUP_STATUS_FLAG_ON, so we can only fail with -EINPROGRESS at btrfs_quota_enable(), which is possible if someone already called the quota rescan ioctl, and therefore started the rescan worker. So fix this by ignoring an -EINPROGRESS and asserting we can't get any other error. Reported-by: Ye Bin Link: https://lore.kernel.org/linux-btrfs/20220823015931.421355-1-yebin10@huawei.com/ CC: stable@vger.kernel.org # 4.19+ Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/qgroup.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index a02e38fb696c..36da77534076 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1158,6 +1158,21 @@ out_add_root: fs_info->qgroup_rescan_running = true; btrfs_queue_work(fs_info->qgroup_rescan_workers, &fs_info->qgroup_rescan_work); + } else { + /* + * We have set both BTRFS_FS_QUOTA_ENABLED and + * BTRFS_QGROUP_STATUS_FLAG_ON, so we can only fail with + * -EINPROGRESS. That can happen because someone started the + * rescan worker by calling quota rescan ioctl before we + * attempted to initialize the rescan worker. Failure due to + * quotas disabled in the meanwhile is not possible, because + * we are holding a write lock on fs_info->subvol_sem, which + * is also acquired when disabling quotas. + * Ignore such error, and any other error would need to undo + * everything we did in the transaction we just committed. + */ + ASSERT(ret == -EINPROGRESS); + ret = 0; } out_free_path: From 12014eaf1b3fa601ff544585180307cbf83a8ee8 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 23 Aug 2022 10:18:42 -0700 Subject: [PATCH 0049/1477] f2fs: increase the limit for reserve_root commit da35fe96d12d15779f3cb74929b7ed03941cf983 upstream. This patch increases the threshold that limits the reserved root space from 0.2% to 12.5% by using simple shift operation. Typically Android sets 128MB, but if the storage capacity is 32GB, 0.2% which is around 64MB becomes too small. Let's relax it. Cc: stable@vger.kernel.org Reported-by: Aran Dalton Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index ccfb6c5a8fbc..fba413ced982 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -267,10 +267,10 @@ static int f2fs_sb_read_encoding(const struct f2fs_super_block *sb, static inline void limit_reserve_root(struct f2fs_sb_info *sbi) { - block_t limit = min((sbi->user_block_count << 1) / 1000, + block_t limit = min((sbi->user_block_count >> 3), sbi->user_block_count - sbi->reserved_blocks); - /* limit is 0.2% */ + /* limit is 12.5% */ if (test_opt(sbi, RESERVE_ROOT) && F2FS_OPTION(sbi).root_reserved_blocks > limit) { F2FS_OPTION(sbi).root_reserved_blocks = limit; From 73fb4bd2c055a393816f078f158cdd3025006f1d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 13 Sep 2022 10:08:41 +0800 Subject: [PATCH 0050/1477] f2fs: fix to do sanity check on destination blkaddr during recovery commit 0ef4ca04a3f9223ff8bc440041c524b2123e09a3 upstream. As Wenqing Liu reported in bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=216456 loop5: detected capacity change from 0 to 131072 F2FS-fs (loop5): recover_inode: ino = 6, name = hln, inline = 1 F2FS-fs (loop5): recover_data: ino = 6 (i_size: recover) err = 0 F2FS-fs (loop5): recover_inode: ino = 6, name = hln, inline = 1 F2FS-fs (loop5): recover_data: ino = 6 (i_size: recover) err = 0 F2FS-fs (loop5): recover_inode: ino = 6, name = hln, inline = 1 F2FS-fs (loop5): recover_data: ino = 6 (i_size: recover) err = 0 F2FS-fs (loop5): Bitmap was wrongly set, blk:5634 ------------[ cut here ]------------ WARNING: CPU: 3 PID: 1013 at fs/f2fs/segment.c:2198 RIP: 0010:update_sit_entry+0xa55/0x10b0 [f2fs] Call Trace: f2fs_do_replace_block+0xa98/0x1890 [f2fs] f2fs_replace_block+0xeb/0x180 [f2fs] recover_data+0x1a69/0x6ae0 [f2fs] f2fs_recover_fsync_data+0x120d/0x1fc0 [f2fs] f2fs_fill_super+0x4665/0x61e0 [f2fs] mount_bdev+0x2cf/0x3b0 legacy_get_tree+0xed/0x1d0 vfs_get_tree+0x81/0x2b0 path_mount+0x47e/0x19d0 do_mount+0xce/0xf0 __x64_sys_mount+0x12c/0x1a0 do_syscall_64+0x38/0x90 entry_SYSCALL_64_after_hwframe+0x63/0xcd If we enable CONFIG_F2FS_CHECK_FS config, it will trigger a kernel panic instead of warning. The root cause is: in fuzzed image, SIT table is inconsistent with inode mapping table, result in triggering such warning during SIT table update. This patch introduces a new flag DATA_GENERIC_ENHANCE_UPDATE, w/ this flag, data block recovery flow can check destination blkaddr's validation in SIT table, and skip f2fs_replace_block() to avoid inconsistent status. Cc: stable@vger.kernel.org Reported-by: Wenqing Liu Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/checkpoint.c | 10 +++++++++- fs/f2fs/f2fs.h | 4 ++++ fs/f2fs/recovery.c | 8 ++++++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 1c49b9959b32..0653c54873b5 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -136,7 +136,7 @@ static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr, unsigned int segno, offset; bool exist; - if (type != DATA_GENERIC_ENHANCE && type != DATA_GENERIC_ENHANCE_READ) + if (type == DATA_GENERIC) return true; segno = GET_SEGNO(sbi, blkaddr); @@ -144,6 +144,13 @@ static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr, se = get_seg_entry(sbi, segno); exist = f2fs_test_bit(offset, se->cur_valid_map); + if (exist && type == DATA_GENERIC_ENHANCE_UPDATE) { + f2fs_err(sbi, "Inconsistent error blkaddr:%u, sit bitmap:%d", + blkaddr, exist); + set_sbi_flag(sbi, SBI_NEED_FSCK); + return exist; + } + if (!exist && type == DATA_GENERIC_ENHANCE) { f2fs_err(sbi, "Inconsistent error blkaddr:%u, sit bitmap:%d", blkaddr, exist); @@ -181,6 +188,7 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, case DATA_GENERIC: case DATA_GENERIC_ENHANCE: case DATA_GENERIC_ENHANCE_READ: + case DATA_GENERIC_ENHANCE_UPDATE: if (unlikely(blkaddr >= MAX_BLKADDR(sbi) || blkaddr < MAIN_BLKADDR(sbi))) { f2fs_warn(sbi, "access invalid blkaddr:%u", diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 1066725c3c5d..dbe9fcef07e3 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -235,6 +235,10 @@ enum { * condition of read on truncated area * by extent_cache */ + DATA_GENERIC_ENHANCE_UPDATE, /* + * strong check on range and segment + * bitmap for update case + */ META_GENERIC, }; diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 72ce13111679..865e0c237d23 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -661,6 +661,14 @@ retry_prev: goto err; } + if (f2fs_is_valid_blkaddr(sbi, dest, + DATA_GENERIC_ENHANCE_UPDATE)) { + f2fs_err(sbi, "Inconsistent dest blkaddr:%u, ino:%lu, ofs:%u", + dest, inode->i_ino, dn.ofs_in_node); + err = -EFSCORRUPTED; + goto err; + } + /* write dummy data page */ f2fs_replace_block(sbi, &dn, src, dest, ni.version, false, false); From 4a8e8bf280703e04e0b9d91f101e1fdd9a5bd09e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 14 Sep 2022 19:51:51 +0800 Subject: [PATCH 0051/1477] f2fs: fix to do sanity check on summary info commit c6ad7fd16657ebd34a87a97d9588195aae87597d upstream. As Wenqing Liu reported in bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=216456 BUG: KASAN: use-after-free in recover_data+0x63ae/0x6ae0 [f2fs] Read of size 4 at addr ffff8881464dcd80 by task mount/1013 CPU: 3 PID: 1013 Comm: mount Tainted: G W 6.0.0-rc4 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.15.0-1 04/01/2014 Call Trace: dump_stack_lvl+0x45/0x5e print_report.cold+0xf3/0x68d kasan_report+0xa8/0x130 recover_data+0x63ae/0x6ae0 [f2fs] f2fs_recover_fsync_data+0x120d/0x1fc0 [f2fs] f2fs_fill_super+0x4665/0x61e0 [f2fs] mount_bdev+0x2cf/0x3b0 legacy_get_tree+0xed/0x1d0 vfs_get_tree+0x81/0x2b0 path_mount+0x47e/0x19d0 do_mount+0xce/0xf0 __x64_sys_mount+0x12c/0x1a0 do_syscall_64+0x38/0x90 entry_SYSCALL_64_after_hwframe+0x63/0xcd The root cause is: in fuzzed image, SSA table is corrupted: ofs_in_node is larger than ADDRS_PER_PAGE(), result in out-of-range access on 4k-size page. - recover_data - do_recover_data - check_index_in_prev_nodes - f2fs_data_blkaddr This patch adds sanity check on summary info in recovery and GC flow in where the flows rely on them. After patch: [ 29.310883] F2FS-fs (loop0): Inconsistent ofs_in_node:65286 in summary, ino:0, nid:6, max:1018 Cc: stable@vger.kernel.org Reported-by: Wenqing Liu Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/gc.c | 10 +++++++++- fs/f2fs/recovery.c | 15 ++++++++++++--- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 3b53fdebf03d..3baa62ef6e3a 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -977,7 +977,7 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, { struct page *node_page; nid_t nid; - unsigned int ofs_in_node; + unsigned int ofs_in_node, max_addrs; block_t source_blkaddr; nid = le32_to_cpu(sum->nid); @@ -1003,6 +1003,14 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, return false; } + max_addrs = IS_INODE(node_page) ? DEF_ADDRS_PER_INODE : + DEF_ADDRS_PER_BLOCK; + if (ofs_in_node >= max_addrs) { + f2fs_err(sbi, "Inconsistent ofs_in_node:%u in summary, ino:%u, nid:%u, max:%u", + ofs_in_node, dni->ino, dni->nid, max_addrs); + return false; + } + *nofs = ofs_of_node(node_page); source_blkaddr = data_blkaddr(NULL, node_page, ofs_in_node); f2fs_put_page(node_page, 1); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 865e0c237d23..c3c527afdd07 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -437,7 +437,7 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, struct dnode_of_data tdn = *dn; nid_t ino, nid; struct inode *inode; - unsigned int offset; + unsigned int offset, ofs_in_node, max_addrs; block_t bidx; int i; @@ -463,15 +463,24 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, got_it: /* Use the locked dnode page and inode */ nid = le32_to_cpu(sum.nid); + ofs_in_node = le16_to_cpu(sum.ofs_in_node); + + max_addrs = ADDRS_PER_PAGE(dn->node_page, dn->inode); + if (ofs_in_node >= max_addrs) { + f2fs_err(sbi, "Inconsistent ofs_in_node:%u in summary, ino:%lu, nid:%u, max:%u", + ofs_in_node, dn->inode->i_ino, nid, max_addrs); + return -EFSCORRUPTED; + } + if (dn->inode->i_ino == nid) { tdn.nid = nid; if (!dn->inode_page_locked) lock_page(dn->inode_page); tdn.node_page = dn->inode_page; - tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node); + tdn.ofs_in_node = ofs_in_node; goto truncate_out; } else if (dn->nid == nid) { - tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node); + tdn.ofs_in_node = ofs_in_node; goto truncate_out; } From d621a87064fa5e58845855cc505cb2039c3c2693 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 20 Jul 2021 14:54:17 -0700 Subject: [PATCH 0052/1477] hardening: Clarify Kconfig text for auto-var-init commit dcb7c0b9461c2a30f6616262736daac6f01ecb09 upstream. Clarify the details around the automatic variable initialization modes available. Specifically this details the values used for pattern init and expands on the rationale for zero init safety. Additionally makes zero init the default when available. Cc: glider@google.com Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: linux-security-module@vger.kernel.org Cc: clang-built-linux@googlegroups.com Signed-off-by: Kees Cook Acked-by: Gustavo A. R. Silva Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- security/Kconfig.hardening | 50 +++++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 19 deletions(-) diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening index 269967c4fc1b..45c688488172 100644 --- a/security/Kconfig.hardening +++ b/security/Kconfig.hardening @@ -29,6 +29,7 @@ choice prompt "Initialize kernel stack variables at function entry" default GCC_PLUGIN_STRUCTLEAK_BYREF_ALL if COMPILE_TEST && GCC_PLUGINS default INIT_STACK_ALL_PATTERN if COMPILE_TEST && CC_HAS_AUTO_VAR_INIT_PATTERN + default INIT_STACK_ALL_ZERO if CC_HAS_AUTO_VAR_INIT_PATTERN default INIT_STACK_NONE help This option enables initialization of stack variables at @@ -39,11 +40,11 @@ choice syscalls. This chooses the level of coverage over classes of potentially - uninitialized variables. The selected class will be + uninitialized variables. The selected class of variable will be initialized before use in a function. config INIT_STACK_NONE - bool "no automatic initialization (weakest)" + bool "no automatic stack variable initialization (weakest)" help Disable automatic stack variable initialization. This leaves the kernel vulnerable to the standard @@ -80,7 +81,7 @@ choice and is disallowed. config GCC_PLUGIN_STRUCTLEAK_BYREF_ALL - bool "zero-init anything passed by reference (very strong)" + bool "zero-init everything passed by reference (very strong)" depends on GCC_PLUGINS depends on !(KASAN && KASAN_STACK=1) select GCC_PLUGIN_STRUCTLEAK @@ -91,33 +92,44 @@ choice of uninitialized stack variable exploits and information exposures. + As a side-effect, this keeps a lot of variables on the + stack that can otherwise be optimized out, so combining + this with CONFIG_KASAN_STACK can lead to a stack overflow + and is disallowed. + config INIT_STACK_ALL_PATTERN - bool "0xAA-init everything on the stack (strongest)" + bool "pattern-init everything (strongest)" depends on CC_HAS_AUTO_VAR_INIT_PATTERN help - Initializes everything on the stack with a 0xAA - pattern. This is intended to eliminate all classes - of uninitialized stack variable exploits and information - exposures, even variables that were warned to have been - left uninitialized. + Initializes everything on the stack (including padding) + with a specific debug value. This is intended to eliminate + all classes of uninitialized stack variable exploits and + information exposures, even variables that were warned about + having been left uninitialized. Pattern initialization is known to provoke many existing bugs related to uninitialized locals, e.g. pointers receive - non-NULL values, buffer sizes and indices are very big. + non-NULL values, buffer sizes and indices are very big. The + pattern is situation-specific; Clang on 64-bit uses 0xAA + repeating for all types and padding except float and double + which use 0xFF repeating (-NaN). Clang on 32-bit uses 0xFF + repeating for all types and padding. config INIT_STACK_ALL_ZERO - bool "zero-init everything on the stack (strongest and safest)" + bool "zero-init everything (strongest and safest)" depends on CC_HAS_AUTO_VAR_INIT_ZERO help - Initializes everything on the stack with a zero - value. This is intended to eliminate all classes - of uninitialized stack variable exploits and information - exposures, even variables that were warned to have been - left uninitialized. + Initializes everything on the stack (including padding) + with a zero value. This is intended to eliminate all + classes of uninitialized stack variable exploits and + information exposures, even variables that were warned + about having been left uninitialized. - Zero initialization provides safe defaults for strings, - pointers, indices and sizes, and is therefore - more suitable as a security mitigation measure. + Zero initialization provides safe defaults for strings + (immediately NUL-terminated), pointers (NULL), indices + (index 0), and sizes (0 length), so it is therefore more + suitable as a production security mitigation than pattern + initialization. endchoice From bdcb1d7cf285a024dedef2863b661517a87c72cc Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 14 Sep 2021 12:49:03 -0700 Subject: [PATCH 0053/1477] hardening: Avoid harmless Clang option under CONFIG_INIT_STACK_ALL_ZERO commit f02003c860d921171be4a27e2893766eb3bc6871 upstream. Currently under Clang, CC_HAS_AUTO_VAR_INIT_ZERO requires an extra -enable flag compared to CC_HAS_AUTO_VAR_INIT_PATTERN. GCC 12[1] will not, and will happily ignore the Clang-specific flag. However, its presence on the command-line is both cumbersome and confusing. Due to GCC's tolerant behavior, though, we can continue to use a single Kconfig cc-option test for the feature on both compilers, but then drop the Clang-specific option in the Makefile. In other words, this patch does not change anything other than making the compiler command line shorter once GCC supports -ftrivial-auto-var-init=zero. [1] https://gcc.gnu.org/git/?p=gcc.git;a=commitdiff;h=a25e0b5e6ac8a77a71c229e0a7b744603365b0e9 Cc: Greg Kroah-Hartman Cc: Masahiro Yamada Cc: llvm@lists.linux.dev Fixes: dcb7c0b9461c ("hardening: Clarify Kconfig text for auto-var-init") Suggested-by: Will Deacon Link: https://lore.kernel.org/lkml/20210914102837.6172-1-will@kernel.org/ Reviewed-by: Nick Desaulniers Reviewed-by: Nathan Chancellor Acked-by: Will Deacon Signed-off-by: Kees Cook Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- Makefile | 6 +++--- security/Kconfig.hardening | 5 ++++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index b824bdb0457c..858391c31631 100644 --- a/Makefile +++ b/Makefile @@ -816,12 +816,12 @@ endif # Initialize all stack variables with a zero value. ifdef CONFIG_INIT_STACK_ALL_ZERO -# Future support for zero initialization is still being debated, see -# https://bugs.llvm.org/show_bug.cgi?id=45497. These flags are subject to being -# renamed or dropped. KBUILD_CFLAGS += -ftrivial-auto-var-init=zero +ifdef CONFIG_CC_IS_CLANG +# https://bugs.llvm.org/show_bug.cgi?id=45497 KBUILD_CFLAGS += -enable-trivial-auto-var-init-zero-knowing-it-will-be-removed-from-clang endif +endif DEBUG_CFLAGS := diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening index 45c688488172..81368ce57d67 100644 --- a/security/Kconfig.hardening +++ b/security/Kconfig.hardening @@ -23,13 +23,16 @@ config CC_HAS_AUTO_VAR_INIT_PATTERN def_bool $(cc-option,-ftrivial-auto-var-init=pattern) config CC_HAS_AUTO_VAR_INIT_ZERO + # GCC ignores the -enable flag, so we can test for the feature with + # a single invocation using the flag, but drop it as appropriate in + # the Makefile, depending on the presence of Clang. def_bool $(cc-option,-ftrivial-auto-var-init=zero -enable-trivial-auto-var-init-zero-knowing-it-will-be-removed-from-clang) choice prompt "Initialize kernel stack variables at function entry" default GCC_PLUGIN_STRUCTLEAK_BYREF_ALL if COMPILE_TEST && GCC_PLUGINS default INIT_STACK_ALL_PATTERN if COMPILE_TEST && CC_HAS_AUTO_VAR_INIT_PATTERN - default INIT_STACK_ALL_ZERO if CC_HAS_AUTO_VAR_INIT_PATTERN + default INIT_STACK_ALL_ZERO if CC_HAS_AUTO_VAR_INIT_ZERO default INIT_STACK_NONE help This option enables initialization of stack variables at From ba52e685d29b942f436ce6d51e787510ed93ef5a Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 29 Sep 2022 22:57:43 -0700 Subject: [PATCH 0054/1477] hardening: Remove Clang's enable flag for -ftrivial-auto-var-init=zero commit 607e57c6c62c00965ae276902c166834ce73014a upstream. Now that Clang's -enable-trivial-auto-var-init-zero-knowing-it-will-be-removed-from-clang option is no longer required, remove it from the command line. Clang 16 and later will warn when it is used, which will cause Kconfig to think it can't use -ftrivial-auto-var-init=zero at all. Check for whether it is required and only use it when so. Cc: Nathan Chancellor Cc: Masahiro Yamada Cc: Nick Desaulniers Cc: linux-kbuild@vger.kernel.org Cc: llvm@lists.linux.dev Cc: stable@vger.kernel.org Fixes: f02003c860d9 ("hardening: Avoid harmless Clang option under CONFIG_INIT_STACK_ALL_ZERO") Signed-off-by: Kees Cook Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- Makefile | 4 ++-- security/Kconfig.hardening | 14 ++++++++++---- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 858391c31631..2af6f1e68a09 100644 --- a/Makefile +++ b/Makefile @@ -817,8 +817,8 @@ endif # Initialize all stack variables with a zero value. ifdef CONFIG_INIT_STACK_ALL_ZERO KBUILD_CFLAGS += -ftrivial-auto-var-init=zero -ifdef CONFIG_CC_IS_CLANG -# https://bugs.llvm.org/show_bug.cgi?id=45497 +ifdef CONFIG_CC_HAS_AUTO_VAR_INIT_ZERO_ENABLER +# https://github.com/llvm/llvm-project/issues/44842 KBUILD_CFLAGS += -enable-trivial-auto-var-init-zero-knowing-it-will-be-removed-from-clang endif endif diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening index 81368ce57d67..b54eb7177a31 100644 --- a/security/Kconfig.hardening +++ b/security/Kconfig.hardening @@ -22,11 +22,17 @@ menu "Memory initialization" config CC_HAS_AUTO_VAR_INIT_PATTERN def_bool $(cc-option,-ftrivial-auto-var-init=pattern) -config CC_HAS_AUTO_VAR_INIT_ZERO - # GCC ignores the -enable flag, so we can test for the feature with - # a single invocation using the flag, but drop it as appropriate in - # the Makefile, depending on the presence of Clang. +config CC_HAS_AUTO_VAR_INIT_ZERO_BARE + def_bool $(cc-option,-ftrivial-auto-var-init=zero) + +config CC_HAS_AUTO_VAR_INIT_ZERO_ENABLER + # Clang 16 and later warn about using the -enable flag, but it + # is required before then. def_bool $(cc-option,-ftrivial-auto-var-init=zero -enable-trivial-auto-var-init-zero-knowing-it-will-be-removed-from-clang) + depends on !CC_HAS_AUTO_VAR_INIT_ZERO_BARE + +config CC_HAS_AUTO_VAR_INIT_ZERO + def_bool CC_HAS_AUTO_VAR_INIT_ZERO_BARE || CC_HAS_AUTO_VAR_INIT_ZERO_ENABLER choice prompt "Initialize kernel stack variables at function entry" From eea3e455a3ae60fd3da1aeffb161123b8c15535b Mon Sep 17 00:00:00 2001 From: Andrew Perepechko Date: Wed, 7 Sep 2022 19:59:59 +0300 Subject: [PATCH 0055/1477] jbd2: wake up journal waiters in FIFO order, not LIFO commit 34fc8768ec6089565d6d73bad26724083cecf7bd upstream. LIFO wakeup order is unfair and sometimes leads to a journal user not being able to get a journal handle for hundreds of transactions in a row. FIFO wakeup can make things more fair. Cc: stable@kernel.org Signed-off-by: Alexey Lyashkov Reviewed-by: Ritesh Harjani (IBM) Link: https://lore.kernel.org/r/20220907165959.1137482-1-alexey.lyashkov@gmail.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/jbd2/commit.c | 2 +- fs/jbd2/transaction.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 98cfa73cb165..fa24b407a9dc 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -581,7 +581,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) journal->j_running_transaction = NULL; start_time = ktime_get(); commit_transaction->t_log_start = journal->j_head; - wake_up(&journal->j_wait_transaction_locked); + wake_up_all(&journal->j_wait_transaction_locked); write_unlock(&journal->j_state_lock); jbd_debug(3, "JBD2: commit phase 2a\n"); diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 0f1cef90fa7d..86472212cce1 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -173,7 +173,7 @@ static void wait_transaction_locked(journal_t *journal) int need_to_start; tid_t tid = journal->j_running_transaction->t_tid; - prepare_to_wait(&journal->j_wait_transaction_locked, &wait, + prepare_to_wait_exclusive(&journal->j_wait_transaction_locked, &wait, TASK_UNINTERRUPTIBLE); need_to_start = !tid_geq(journal->j_commit_request, tid); read_unlock(&journal->j_state_lock); @@ -199,7 +199,7 @@ static void wait_transaction_switching(journal_t *journal) read_unlock(&journal->j_state_lock); return; } - prepare_to_wait(&journal->j_wait_transaction_locked, &wait, + prepare_to_wait_exclusive(&journal->j_wait_transaction_locked, &wait, TASK_UNINTERRUPTIBLE); read_unlock(&journal->j_state_lock); /* @@ -894,7 +894,7 @@ void jbd2_journal_unlock_updates (journal_t *journal) write_lock(&journal->j_state_lock); --journal->j_barrier_count; write_unlock(&journal->j_state_lock); - wake_up(&journal->j_wait_transaction_locked); + wake_up_all(&journal->j_wait_transaction_locked); } static void warn_dirty_buffer(struct buffer_head *bh) From 7a33dde572fceb45d02d188e0213c47059401c93 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Wed, 14 Sep 2022 18:08:11 +0800 Subject: [PATCH 0056/1477] jbd2: fix potential buffer head reference count leak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e0d5fc7a6d80ac2406c7dfc6bb625201d0250a8a upstream. As in 'jbd2_fc_wait_bufs' if buffer isn't uptodate, will return -EIO without update 'journal->j_fc_off'. But 'jbd2_fc_release_bufs' will release buffer head from ‘j_fc_off - 1’ if 'bh' is NULL will terminal release which will lead to buffer head buffer head reference count leak. To solve above issue, update 'journal->j_fc_off' before return -EIO. Cc: stable@kernel.org Signed-off-by: Ye Bin Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20220914100812.1414768-2-yebin10@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/jbd2/journal.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index b748329bb0ba..7c443ee5b472 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -926,8 +926,14 @@ int jbd2_fc_wait_bufs(journal_t *journal, int num_blks) wait_on_buffer(bh); put_bh(bh); journal->j_fc_wbuf[i] = NULL; - if (unlikely(!buffer_uptodate(bh))) + /* + * Update j_fc_off so jbd2_fc_release_bufs can release remain + * buffer head. + */ + if (unlikely(!buffer_uptodate(bh))) { + journal->j_fc_off = i; return -EIO; + } } return 0; From 1d4d16daec2a6689b6d3fbfc7d2078643adc6619 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Wed, 14 Sep 2022 18:08:12 +0800 Subject: [PATCH 0057/1477] jbd2: fix potential use-after-free in jbd2_fc_wait_bufs commit 243d1a5d505d0b0460c9af0ad56ed4a56ef0bebd upstream. In 'jbd2_fc_wait_bufs' use 'bh' after put buffer head reference count which may lead to use-after-free. So judge buffer if uptodate before put buffer head reference count. Cc: stable@kernel.org Signed-off-by: Ye Bin Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20220914100812.1414768-3-yebin10@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/jbd2/journal.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 7c443ee5b472..6689d235de8a 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -924,16 +924,16 @@ int jbd2_fc_wait_bufs(journal_t *journal, int num_blks) for (i = j_fc_off - 1; i >= j_fc_off - num_blks; i--) { bh = journal->j_fc_wbuf[i]; wait_on_buffer(bh); - put_bh(bh); - journal->j_fc_wbuf[i] = NULL; /* * Update j_fc_off so jbd2_fc_release_bufs can release remain * buffer head. */ if (unlikely(!buffer_uptodate(bh))) { - journal->j_fc_off = i; + journal->j_fc_off = i + 1; return -EIO; } + put_bh(bh); + journal->j_fc_wbuf[i] = NULL; } return 0; From e65506ff181fc176088f32117d69b9cb1ddda777 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Sat, 17 Sep 2022 17:38:05 +0800 Subject: [PATCH 0058/1477] jbd2: add miss release buffer head in fc_do_one_pass() commit dfff66f30f66b9524b661f311bbed8ff3d2ca49f upstream. In fc_do_one_pass() miss release buffer head after use which will lead to reference count leak. Cc: stable@kernel.org Signed-off-by: Ye Bin Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20220917093805.1782845-1-yebin10@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/jbd2/recovery.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 1e07dfac4d81..1ae1697fe99b 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -256,6 +256,7 @@ static int fc_do_one_pass(journal_t *journal, err = journal->j_fc_replay_callback(journal, bh, pass, next_fc_block - journal->j_fc_first, expected_commit_id); + brelse(bh); next_fc_block++; if (err < 0 || err == JBD2_FC_REPLAY_STOP) break; From fb98cb61efff3b2a1964939465ccaaf906af1d4f Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 27 Jul 2022 17:57:53 +0200 Subject: [PATCH 0059/1477] ext4: avoid crash when inline data creation follows DIO write commit 4bb26f2885ac6930984ee451b952c5a6042f2c0e upstream. When inode is created and written to using direct IO, there is nothing to clear the EXT4_STATE_MAY_INLINE_DATA flag. Thus when inode gets truncated later to say 1 byte and written using normal write, we will try to store the data as inline data. This confuses the code later because the inode now has both normal block and inline data allocated and the confusion manifests for example as: kernel BUG at fs/ext4/inode.c:2721! invalid opcode: 0000 [#1] PREEMPT SMP KASAN CPU: 0 PID: 359 Comm: repro Not tainted 5.19.0-rc8-00001-g31ba1e3b8305-dirty #15 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.0-1.fc36 04/01/2014 RIP: 0010:ext4_writepages+0x363d/0x3660 RSP: 0018:ffffc90000ccf260 EFLAGS: 00010293 RAX: ffffffff81e1abcd RBX: 0000008000000000 RCX: ffff88810842a180 RDX: 0000000000000000 RSI: 0000008000000000 RDI: 0000000000000000 RBP: ffffc90000ccf650 R08: ffffffff81e17d58 R09: ffffed10222c680b R10: dfffe910222c680c R11: 1ffff110222c680a R12: ffff888111634128 R13: ffffc90000ccf880 R14: 0000008410000000 R15: 0000000000000001 FS: 00007f72635d2640(0000) GS:ffff88811b000000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000565243379180 CR3: 000000010aa74000 CR4: 0000000000150eb0 Call Trace: do_writepages+0x397/0x640 filemap_fdatawrite_wbc+0x151/0x1b0 file_write_and_wait_range+0x1c9/0x2b0 ext4_sync_file+0x19e/0xa00 vfs_fsync_range+0x17b/0x190 ext4_buffered_write_iter+0x488/0x530 ext4_file_write_iter+0x449/0x1b90 vfs_write+0xbcd/0xf40 ksys_write+0x198/0x2c0 __x64_sys_write+0x7b/0x90 do_syscall_64+0x3d/0x90 entry_SYSCALL_64_after_hwframe+0x63/0xcd Fix the problem by clearing EXT4_STATE_MAY_INLINE_DATA when we are doing direct IO write to a file. Cc: stable@kernel.org Reported-by: Tadeusz Struk Reported-by: syzbot+bd13648a53ed6933ca49@syzkaller.appspotmail.com Link: https://syzkaller.appspot.com/bug?id=a1e89d09bbbcbd5c4cb45db230ee28c822953984 Signed-off-by: Jan Kara Reviewed-by: Lukas Czerner Tested-by: Tadeusz Struk Link: https://lore.kernel.org/r/20220727155753.13969-1-jack@suse.cz Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/file.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 7b28d44b0ddd..0f61e0aa85d6 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -529,6 +529,12 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) ret = -EAGAIN; goto out; } + /* + * Make sure inline data cannot be created anymore since we are going + * to allocate blocks for DIO. We know the inode does not have any + * inline data now because ext4_dio_supported() checked for that. + */ + ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); offset = iocb->ki_pos; count = ret; From f34ab95162763cd7352f46df169296eec28b688d Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Fri, 5 Aug 2022 20:39:47 +0800 Subject: [PATCH 0060/1477] ext4: fix null-ptr-deref in ext4_write_info commit f9c1f248607d5546075d3f731e7607d5571f2b60 upstream. I caught a null-ptr-deref bug as follows: ================================================================== KASAN: null-ptr-deref in range [0x0000000000000068-0x000000000000006f] CPU: 1 PID: 1589 Comm: umount Not tainted 5.10.0-02219-dirty #339 RIP: 0010:ext4_write_info+0x53/0x1b0 [...] Call Trace: dquot_writeback_dquots+0x341/0x9a0 ext4_sync_fs+0x19e/0x800 __sync_filesystem+0x83/0x100 sync_filesystem+0x89/0xf0 generic_shutdown_super+0x79/0x3e0 kill_block_super+0xa1/0x110 deactivate_locked_super+0xac/0x130 deactivate_super+0xb6/0xd0 cleanup_mnt+0x289/0x400 __cleanup_mnt+0x16/0x20 task_work_run+0x11c/0x1c0 exit_to_user_mode_prepare+0x203/0x210 syscall_exit_to_user_mode+0x5b/0x3a0 do_syscall_64+0x59/0x70 entry_SYSCALL_64_after_hwframe+0x44/0xa9 ================================================================== Above issue may happen as follows: ------------------------------------- exit_to_user_mode_prepare task_work_run __cleanup_mnt cleanup_mnt deactivate_super deactivate_locked_super kill_block_super generic_shutdown_super shrink_dcache_for_umount dentry = sb->s_root sb->s_root = NULL <--- Here set NULL sync_filesystem __sync_filesystem sb->s_op->sync_fs > ext4_sync_fs dquot_writeback_dquots sb->dq_op->write_info > ext4_write_info ext4_journal_start(d_inode(sb->s_root), EXT4_HT_QUOTA, 2) d_inode(sb->s_root) s_root->d_inode <--- Null pointer dereference To solve this problem, we use ext4_journal_start_sb directly to avoid s_root being used. Cc: stable@kernel.org Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20220805123947.565152-1-libaokun1@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index a0af833f7da7..1a12b91bcc6c 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -6273,7 +6273,7 @@ static int ext4_write_info(struct super_block *sb, int type) handle_t *handle; /* Data block + inode block */ - handle = ext4_journal_start(d_inode(sb->s_root), EXT4_HT_QUOTA, 2); + handle = ext4_journal_start_sb(sb, EXT4_HT_QUOTA, 2); if (IS_ERR(handle)) return PTR_ERR(handle); ret = dquot_commit_info(sb, type); From ac66db1a436504159463f811b9e9864c1d2b03f1 Mon Sep 17 00:00:00 2001 From: Lalith Rajendran Date: Thu, 18 Aug 2022 21:40:49 +0000 Subject: [PATCH 0061/1477] ext4: make ext4_lazyinit_thread freezable commit 3b575495ab8dbb4dbe85b4ac7f991693c3668ff5 upstream. ext4_lazyinit_thread is not set freezable. Hence when the thread calls try_to_freeze it doesn't freeze during suspend and continues to send requests to the storage during suspend, resulting in suspend failures. Cc: stable@kernel.org Signed-off-by: Lalith Rajendran Link: https://lore.kernel.org/r/20220818214049.1519544-1-lalithkraj@google.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 1a12b91bcc6c..fbdce7f105f3 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3550,6 +3550,7 @@ static int ext4_lazyinit_thread(void *arg) unsigned long next_wakeup, cur; BUG_ON(NULL == eli); + set_freezable(); cont_thread: while (true) { From 483831ad0440f62c10d1707c97ce824bd82d98ae Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 22 Aug 2022 13:48:32 +0200 Subject: [PATCH 0062/1477] ext4: fix check for block being out of directory size commit 61a1d87a324ad5e3ed27c6699dfc93218fcf3201 upstream. The check in __ext4_read_dirblock() for block being outside of directory size was wrong because it compared block number against directory size in bytes. Fix it. Fixes: 65f8ea4cd57d ("ext4: check if directory block is within i_size") CVE: CVE-2022-1184 CC: stable@vger.kernel.org Signed-off-by: Jan Kara Reviewed-by: Lukas Czerner Link: https://lore.kernel.org/r/20220822114832.1482-1-jack@suse.cz Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 58b0f1b12095..646cc1935dff 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -125,7 +125,7 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode, struct ext4_dir_entry *dirent; int is_dx_block = 0; - if (block >= inode->i_size) { + if (block >= inode->i_size >> inode->i_blkbits) { ext4_error_inode(inode, func, line, block, "Attempting to read directory block (%u) that is past i_size (%llu)", block, inode->i_size); From 0e1764ad71abca735418fd596a82067074b59687 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Wed, 24 Aug 2022 18:03:47 +0200 Subject: [PATCH 0063/1477] ext4: don't increase iversion counter for ea_inodes commit 50f094a5580e6297bf10a807d16f0ee23fa576cf upstream. ea_inodes are using i_version for storing part of the reference count so we really need to leave it alone. The problem can be reproduced by xfstest ext4/026 when iversion is enabled. Fix it by not calling inode_inc_iversion() for EXT4_EA_INODE_FL inodes in ext4_mark_iloc_dirty(). Cc: stable@kernel.org Signed-off-by: Lukas Czerner Reviewed-by: Jan Kara Reviewed-by: Jeff Layton Reviewed-by: Christian Brauner (Microsoft) Link: https://lore.kernel.org/r/20220824160349.39664-1-lczerner@redhat.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inode.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 44b6d061ed71..867c4d52bb78 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5769,7 +5769,12 @@ int ext4_mark_iloc_dirty(handle_t *handle, } ext4_fc_track_inode(handle, inode); - if (IS_I_VERSION(inode)) + /* + * ea_inodes are using i_version for storing reference count, don't + * mess with it + */ + if (IS_I_VERSION(inode) && + !(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) inode_inc_iversion(inode); /* the do_update_inode consumes one bh->b_count */ From fbb0e601bd51da9ccf88f548279d1745107c4ea2 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Wed, 31 Aug 2022 15:46:29 +0800 Subject: [PATCH 0064/1477] ext4: ext4_read_bh_lock() should submit IO if the buffer isn't uptodate commit 0b73284c564d3ae4feef4bc920292f004acf4980 upstream. Recently we notice that ext4 filesystem would occasionally fail to read metadata from disk and report error message, but the disk and block layer looks fine. After analyse, we lockon commit 88dbcbb3a484 ("blkdev: avoid migration stalls for blkdev pages"). It provide a migration method for the bdev, we could move page that has buffers without extra users now, but it lock the buffers on the page, which breaks the fragile metadata read operation on ext4 filesystem, ext4_read_bh_lock() was copied from ll_rw_block(), it depends on the assumption of that locked buffer means it is under IO. So it just trylock the buffer and skip submit IO if it lock failed, after wait_on_buffer() we conclude IO error because the buffer is not uptodate. This issue could be easily reproduced by add some delay just after buffer_migrate_lock_buffers() in __buffer_migrate_folio() and do fsstress on ext4 filesystem. EXT4-fs error (device pmem1): __ext4_find_entry:1658: inode #73193: comm fsstress: reading directory lblock 0 EXT4-fs error (device pmem1): __ext4_find_entry:1658: inode #75334: comm fsstress: reading directory lblock 0 Fix it by removing the trylock logic in ext4_read_bh_lock(), just lock the buffer and submit IO if it's not uptodate, and also leave over readahead helper. Cc: stable@kernel.org Signed-off-by: Zhang Yi Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20220831074629.3755110-1-yi.zhang@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index fbdce7f105f3..9573d493c374 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -188,19 +188,12 @@ int ext4_read_bh(struct buffer_head *bh, int op_flags, bh_end_io_t *end_io) int ext4_read_bh_lock(struct buffer_head *bh, int op_flags, bool wait) { - if (trylock_buffer(bh)) { - if (wait) - return ext4_read_bh(bh, op_flags, NULL); + lock_buffer(bh); + if (!wait) { ext4_read_bh_nowait(bh, op_flags, NULL); return 0; } - if (wait) { - wait_on_buffer(bh); - if (buffer_uptodate(bh)) - return 0; - return -EIO; - } - return 0; + return ext4_read_bh(bh, op_flags, NULL); } /* @@ -247,7 +240,8 @@ void ext4_sb_breadahead_unmovable(struct super_block *sb, sector_t block) struct buffer_head *bh = sb_getblk_gfp(sb, block, 0); if (likely(bh)) { - ext4_read_bh_lock(bh, REQ_RAHEAD, false); + if (trylock_buffer(bh)) + ext4_read_bh_nowait(bh, REQ_RAHEAD, NULL); brelse(bh); } } From 74d2a398d2d8c54d6468bc1e9da60ed9f3c4739f Mon Sep 17 00:00:00 2001 From: Jinke Han Date: Sat, 3 Sep 2022 09:24:29 +0800 Subject: [PATCH 0065/1477] ext4: place buffer head allocation before handle start commit d1052d236eddf6aa851434db1897b942e8db9921 upstream. In our product environment, we encounter some jbd hung waiting handles to stop while several writters were doing memory reclaim for buffer head allocation in delay alloc write path. Ext4 do buffer head allocation with holding transaction handle which may be blocked too long if the reclaim works not so smooth. According to our bcc trace, the reclaim time in buffer head allocation can reach 258s and the jbd transaction commit also take almost the same time meanwhile. Except for these extreme cases, we often see several seconds delays for cgroup memory reclaim on our servers. This is more likely to happen considering docker environment. One thing to note, the allocation of buffer heads is as often as page allocation or more often when blocksize less than page size. Just like page cache allocation, we should also place the buffer head allocation before startting the handle. Cc: stable@kernel.org Signed-off-by: Jinke Han Link: https://lore.kernel.org/r/20220903012429.22555-1-hanjinke.666@bytedance.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inode.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 867c4d52bb78..45f31dc1e66f 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1175,6 +1175,13 @@ retry_grab: page = grab_cache_page_write_begin(mapping, index, flags); if (!page) return -ENOMEM; + /* + * The same as page allocation, we prealloc buffer heads before + * starting the handle. + */ + if (!page_has_buffers(page)) + create_empty_buffers(page, inode->i_sb->s_blocksize, 0); + unlock_page(page); retry_journal: From d575fb52c46686f169774e409382af4b5990b215 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Wed, 14 Sep 2022 18:08:59 +0800 Subject: [PATCH 0066/1477] ext4: fix miss release buffer head in ext4_fc_write_inode commit ccbf8eeb39f2ff00b54726a2b20b35d788c4ecb5 upstream. In 'ext4_fc_write_inode' function first call 'ext4_get_inode_loc' get 'iloc', after use it miss release 'iloc.bh'. So just release 'iloc.bh' before 'ext4_fc_write_inode' return. Cc: stable@kernel.org Signed-off-by: Ye Bin Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20220914100859.1415196-1-yebin10@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/fast_commit.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 501e60713010..35c1e60177f0 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -766,22 +766,25 @@ static int ext4_fc_write_inode(struct inode *inode, u32 *crc) tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_INODE); tl.fc_len = cpu_to_le16(inode_len + sizeof(fc_inode.fc_ino)); + ret = -ECANCELED; dst = ext4_fc_reserve_space(inode->i_sb, sizeof(tl) + inode_len + sizeof(fc_inode.fc_ino), crc); if (!dst) - return -ECANCELED; + goto err; if (!ext4_fc_memcpy(inode->i_sb, dst, &tl, sizeof(tl), crc)) - return -ECANCELED; + goto err; dst += sizeof(tl); if (!ext4_fc_memcpy(inode->i_sb, dst, &fc_inode, sizeof(fc_inode), crc)) - return -ECANCELED; + goto err; dst += sizeof(fc_inode); if (!ext4_fc_memcpy(inode->i_sb, dst, (u8 *)ext4_raw_inode(&iloc), inode_len, crc)) - return -ECANCELED; - - return 0; + goto err; + ret = 0; +err: + brelse(iloc.bh); + return ret; } /* From c9ce7766dc4e88e624c62a68221a3bbe8f06e856 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Wed, 21 Sep 2022 14:40:38 +0800 Subject: [PATCH 0067/1477] ext4: fix potential memory leak in ext4_fc_record_modified_inode() commit 9305721a309fa1bd7c194e0d4a2335bf3b29dca4 upstream. As krealloc may return NULL, in this case 'state->fc_modified_inodes' may not be freed by krealloc, but 'state->fc_modified_inodes' already set NULL. Then will lead to 'state->fc_modified_inodes' memory leak. Cc: stable@kernel.org Signed-off-by: Ye Bin Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20220921064040.3693255-2-yebin10@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/fast_commit.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 35c1e60177f0..9f6da86a5016 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -1391,13 +1391,15 @@ static int ext4_fc_record_modified_inode(struct super_block *sb, int ino) if (state->fc_modified_inodes[i] == ino) return 0; if (state->fc_modified_inodes_used == state->fc_modified_inodes_size) { - state->fc_modified_inodes = krealloc( - state->fc_modified_inodes, + int *fc_modified_inodes; + + fc_modified_inodes = krealloc(state->fc_modified_inodes, sizeof(int) * (state->fc_modified_inodes_size + EXT4_FC_REPLAY_REALLOC_INCREMENT), GFP_KERNEL); - if (!state->fc_modified_inodes) + if (!fc_modified_inodes) return -ENOMEM; + state->fc_modified_inodes = fc_modified_inodes; state->fc_modified_inodes_size += EXT4_FC_REPLAY_REALLOC_INCREMENT; } From 2cfb769d60a2a57eb3566765428b6131cd16dcfe Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Wed, 21 Sep 2022 14:40:39 +0800 Subject: [PATCH 0068/1477] ext4: fix potential memory leak in ext4_fc_record_regions() commit 7069d105c1f15c442b68af43f7fde784f3126739 upstream. As krealloc may return NULL, in this case 'state->fc_regions' may not be freed by krealloc, but 'state->fc_regions' already set NULL. Then will lead to 'state->fc_regions' memory leak. Cc: stable@kernel.org Signed-off-by: Ye Bin Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20220921064040.3693255-3-yebin10@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/fast_commit.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 9f6da86a5016..44b192fdfb59 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -1584,15 +1584,17 @@ int ext4_fc_record_regions(struct super_block *sb, int ino, if (replay && state->fc_regions_used != state->fc_regions_valid) state->fc_regions_used = state->fc_regions_valid; if (state->fc_regions_used == state->fc_regions_size) { + struct ext4_fc_alloc_region *fc_regions; + state->fc_regions_size += EXT4_FC_REPLAY_REALLOC_INCREMENT; - state->fc_regions = krealloc( - state->fc_regions, - state->fc_regions_size * - sizeof(struct ext4_fc_alloc_region), - GFP_KERNEL); - if (!state->fc_regions) + fc_regions = krealloc(state->fc_regions, + state->fc_regions_size * + sizeof(struct ext4_fc_alloc_region), + GFP_KERNEL); + if (!fc_regions) return -ENOMEM; + state->fc_regions = fc_regions; } region = &state->fc_regions[state->fc_regions_used++]; region->ino = ino; From 2189756eabbb5f8366b4302183dfa3e98bfed196 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Wed, 21 Sep 2022 14:40:40 +0800 Subject: [PATCH 0069/1477] ext4: update 'state->fc_regions_size' after successful memory allocation commit 27cd49780381c6ccbf248798e5e8fd076200ffba upstream. To avoid to 'state->fc_regions_size' mismatch with 'state->fc_regions' when fail to reallocate 'fc_reqions',only update 'state->fc_regions_size' after 'state->fc_regions' is allocated successfully. Cc: stable@kernel.org Signed-off-by: Ye Bin Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20220921064040.3693255-4-yebin10@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/fast_commit.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 44b192fdfb59..41dcf21558c4 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -1586,14 +1586,15 @@ int ext4_fc_record_regions(struct super_block *sb, int ino, if (state->fc_regions_used == state->fc_regions_size) { struct ext4_fc_alloc_region *fc_regions; - state->fc_regions_size += - EXT4_FC_REPLAY_REALLOC_INCREMENT; fc_regions = krealloc(state->fc_regions, - state->fc_regions_size * - sizeof(struct ext4_fc_alloc_region), + sizeof(struct ext4_fc_alloc_region) * + (state->fc_regions_size + + EXT4_FC_REPLAY_REALLOC_INCREMENT), GFP_KERNEL); if (!fc_regions) return -ENOMEM; + state->fc_regions_size += + EXT4_FC_REPLAY_REALLOC_INCREMENT; state->fc_regions = fc_regions; } region = &state->fc_regions[state->fc_regions_used++]; From 846f041203b9d205890eb3aa14b699f40fff5e2d Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Mon, 8 Aug 2022 15:00:19 -0400 Subject: [PATCH 0070/1477] livepatch: fix race between fork and KLP transition commit 747f7a2901174c9afa805dddfb7b24db6f65e985 upstream. The KLP transition code depends on the TIF_PATCH_PENDING and the task->patch_state to stay in sync. On a normal (forward) transition, TIF_PATCH_PENDING will be set on every task in the system, while on a reverse transition (after a failed forward one) first TIF_PATCH_PENDING will be cleared from every task, followed by it being set on tasks that need to be transitioned back to the original code. However, the fork code copies over the TIF_PATCH_PENDING flag from the parent to the child early on, in dup_task_struct and setup_thread_stack. Much later, klp_copy_process will set child->patch_state to match that of the parent. However, the parent's patch_state may have been changed by KLP loading or unloading since it was initially copied over into the child. This results in the KLP code occasionally hitting this warning in klp_complete_transition: for_each_process_thread(g, task) { WARN_ON_ONCE(test_tsk_thread_flag(task, TIF_PATCH_PENDING)); task->patch_state = KLP_UNDEFINED; } Set, or clear, the TIF_PATCH_PENDING flag in the child task depending on whether or not it is needed at the time klp_copy_process is called, at a point in copy_process where the tasklist_lock is held exclusively, preventing races with the KLP code. The KLP code does have a few places where the state is changed without the tasklist_lock held, but those should not cause problems because klp_update_patch_state(current) cannot be called while the current task is in the middle of fork, klp_check_and_switch_task() which is called under the pi_lock, which prevents rescheduling, and manipulation of the patch state of idle tasks, which do not fork. This should prevent this warning from triggering again in the future, and close the race for both normal and reverse transitions. Signed-off-by: Rik van Riel Reported-by: Breno Leitao Reviewed-by: Petr Mladek Acked-by: Josh Poimboeuf Fixes: d83a7cb375ee ("livepatch: change to a per-task consistency model") Cc: stable@kernel.org Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20220808150019.03d6a67b@imladris.surriel.com Signed-off-by: Greg Kroah-Hartman --- kernel/livepatch/transition.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c index f6310f848f34..b04b87a4e0a7 100644 --- a/kernel/livepatch/transition.c +++ b/kernel/livepatch/transition.c @@ -611,9 +611,23 @@ void klp_reverse_transition(void) /* Called from copy_process() during fork */ void klp_copy_process(struct task_struct *child) { - child->patch_state = current->patch_state; - /* TIF_PATCH_PENDING gets copied in setup_thread_stack() */ + /* + * The parent process may have gone through a KLP transition since + * the thread flag was copied in setup_thread_stack earlier. Bring + * the task flag up to date with the parent here. + * + * The operation is serialized against all klp_*_transition() + * operations by the tasklist_lock. The only exception is + * klp_update_patch_state(current), but we cannot race with + * that because we are current. + */ + if (test_tsk_thread_flag(current, TIF_PATCH_PENDING)) + set_tsk_thread_flag(child, TIF_PATCH_PENDING); + else + clear_tsk_thread_flag(child, TIF_PATCH_PENDING); + + child->patch_state = current->patch_state; } /* From f2ca4609d0c357c2b8b20f7f10c09450451d22cd Mon Sep 17 00:00:00 2001 From: Zheng Yejian Date: Mon, 26 Sep 2022 15:20:08 +0000 Subject: [PATCH 0071/1477] ftrace: Properly unset FTRACE_HASH_FL_MOD commit 0ce0638edf5ec83343302b884fa208179580700a upstream. When executing following commands like what document said, but the log "#### all functions enabled ####" was not shown as expect: 1. Set a 'mod' filter: $ echo 'write*:mod:ext3' > /sys/kernel/tracing/set_ftrace_filter 2. Invert above filter: $ echo '!write*:mod:ext3' >> /sys/kernel/tracing/set_ftrace_filter 3. Read the file: $ cat /sys/kernel/tracing/set_ftrace_filter By some debugging, I found that flag FTRACE_HASH_FL_MOD was not unset after inversion like above step 2 and then result of ftrace_hash_empty() is incorrect. Link: https://lkml.kernel.org/r/20220926152008.2239274-1-zhengyejian1@huawei.com Cc: Cc: stable@vger.kernel.org Fixes: 8c08f0d5c6fb ("ftrace: Have cached module filters be an active filter") Signed-off-by: Zheng Yejian Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ftrace.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index d868df6f13c8..2165c9ac14bf 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5662,8 +5662,12 @@ int ftrace_regex_release(struct inode *inode, struct file *file) if (filter_hash) { orig_hash = &iter->ops->func_hash->filter_hash; - if (iter->tr && !list_empty(&iter->tr->mod_trace)) - iter->hash->flags |= FTRACE_HASH_FL_MOD; + if (iter->tr) { + if (list_empty(&iter->tr->mod_trace)) + iter->hash->flags &= ~FTRACE_HASH_FL_MOD; + else + iter->hash->flags |= FTRACE_HASH_FL_MOD; + } } else orig_hash = &iter->ops->func_hash->notrace_hash; From 4a3bbd40e4525fba49b4786c4f5d151dd1826f2d Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 27 Sep 2022 14:43:17 -0400 Subject: [PATCH 0072/1477] ring-buffer: Allow splice to read previous partially read pages commit fa8f4a89736b654125fb254b0db753ac68a5fced upstream. If a page is partially read, and then the splice system call is run against the ring buffer, it will always fail to read, no matter how much is in the ring buffer. That's because the code path for a partial read of the page does will fail if the "full" flag is set. The splice system call wants full pages, so if the read of the ring buffer is not yet full, it should return zero, and the splice will block. But if a previous read was done, where the beginning has been consumed, it should still be given to the splice caller if the rest of the page has been written to. This caused the splice command to never consume data in this scenario, and let the ring buffer just fill up and lose events. Link: https://lkml.kernel.org/r/20220927144317.46be6b80@gandalf.local.home Cc: stable@vger.kernel.org Fixes: 8789a9e7df6bf ("ring-buffer: read page interface") Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ring_buffer.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 6deac666ba3e..feebbf05fb60 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -5341,7 +5341,15 @@ int ring_buffer_read_page(struct trace_buffer *buffer, unsigned int pos = 0; unsigned int size; - if (full) + /* + * If a full page is expected, this can still be returned + * if there's been a previous partial read and the + * rest of the page can be read and the commit page is off + * the reader page. + */ + if (full && + (!read || (len < (commit - read)) || + cpu_buffer->reader_page == cpu_buffer->commit_page)) goto out_unlock; if (len > (commit - read)) From 6617e5132c442a6420c84e82d753134d24fb6eeb Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 27 Sep 2022 19:15:24 -0400 Subject: [PATCH 0073/1477] ring-buffer: Have the shortest_full queue be the shortest not longest commit 3b19d614b61b93a131f463817e08219c9ce1fee3 upstream. The logic to know when the shortest waiters on the ring buffer should be woken up or not has uses a less than instead of a greater than compare, which causes the shortest_full to actually be the longest. Link: https://lkml.kernel.org/r/20220927231823.718039222@goodmis.org Cc: stable@vger.kernel.org Cc: Ingo Molnar Cc: Andrew Morton Fixes: 2c2b0a78b3739 ("ring-buffer: Add percentage of ring buffer full to wake up reader") Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ring_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index feebbf05fb60..70d2f1912355 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -888,7 +888,7 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full) nr_pages = cpu_buffer->nr_pages; dirty = ring_buffer_nr_dirty_pages(buffer, cpu); if (!cpu_buffer->shortest_full || - cpu_buffer->shortest_full < full) + cpu_buffer->shortest_full > full) cpu_buffer->shortest_full = full; raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); if (!pagebusy && From 586f02c500b251992206d8a430c80293865ff622 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 27 Sep 2022 19:15:25 -0400 Subject: [PATCH 0074/1477] ring-buffer: Check pending waiters when doing wake ups as well commit ec0bbc5ec5664dcee344f79373852117dc672c86 upstream. The wake up waiters only checks the "wakeup_full" variable and not the "full_waiters_pending". The full_waiters_pending is set when a waiter is added to the wait queue. The wakeup_full is only set when an event is triggered, and it clears the full_waiters_pending to avoid multiple calls to irq_work_queue(). The irq_work callback really needs to check both wakeup_full as well as full_waiters_pending such that this code can be used to wake up waiters when a file is closed that represents the ring buffer and the waiters need to be woken up. Link: https://lkml.kernel.org/r/20220927231824.209460321@goodmis.org Cc: stable@vger.kernel.org Cc: Ingo Molnar Cc: Andrew Morton Fixes: 15693458c4bc0 ("tracing/ring-buffer: Move poll wake ups into ring buffer code") Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ring_buffer.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 70d2f1912355..8c8f7d1553fb 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -794,8 +794,9 @@ static void rb_wake_up_waiters(struct irq_work *work) struct rb_irq_work *rbwork = container_of(work, struct rb_irq_work, work); wake_up_all(&rbwork->waiters); - if (rbwork->wakeup_full) { + if (rbwork->full_waiters_pending || rbwork->wakeup_full) { rbwork->wakeup_full = false; + rbwork->full_waiters_pending = false; wake_up_all(&rbwork->full_waiters); } } From 588f02f8b9d96c4a89db955394ec54e77b1d482f Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Wed, 28 Sep 2022 13:39:38 -0400 Subject: [PATCH 0075/1477] ring-buffer: Add ring_buffer_wake_waiters() commit 7e9fbbb1b776d8d7969551565bc246f74ec53b27 upstream. On closing of a file that represents a ring buffer or flushing the file, there may be waiters on the ring buffer that needs to be woken up and exit the ring_buffer_wait() function. Add ring_buffer_wake_waiters() to wake up the waiters on the ring buffer and allow them to exit the wait loop. Link: https://lkml.kernel.org/r/20220928133938.28dc2c27@gandalf.local.home Cc: stable@vger.kernel.org Cc: Ingo Molnar Cc: Andrew Morton Fixes: 15693458c4bc0 ("tracing/ring-buffer: Move poll wake ups into ring buffer code") Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- include/linux/ring_buffer.h | 2 +- kernel/trace/ring_buffer.c | 39 +++++++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 136ea0997e6d..c9237d30c29b 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -100,7 +100,7 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full); __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, struct file *filp, poll_table *poll_table); - +void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu); #define RING_BUFFER_ALL_CPUS -1 diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 8c8f7d1553fb..e9fd6cf9cb9f 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -414,6 +414,7 @@ struct rb_irq_work { struct irq_work work; wait_queue_head_t waiters; wait_queue_head_t full_waiters; + long wait_index; bool waiters_pending; bool full_waiters_pending; bool wakeup_full; @@ -801,6 +802,37 @@ static void rb_wake_up_waiters(struct irq_work *work) } } +/** + * ring_buffer_wake_waiters - wake up any waiters on this ring buffer + * @buffer: The ring buffer to wake waiters on + * + * In the case of a file that represents a ring buffer is closing, + * it is prudent to wake up any waiters that are on this. + */ +void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu) +{ + struct ring_buffer_per_cpu *cpu_buffer; + struct rb_irq_work *rbwork; + + if (cpu == RING_BUFFER_ALL_CPUS) { + + /* Wake up individual ones too. One level recursion */ + for_each_buffer_cpu(buffer, cpu) + ring_buffer_wake_waiters(buffer, cpu); + + rbwork = &buffer->irq_work; + } else { + cpu_buffer = buffer->buffers[cpu]; + rbwork = &cpu_buffer->irq_work; + } + + rbwork->wait_index++; + /* make sure the waiters see the new index */ + smp_wmb(); + + rb_wake_up_waiters(&rbwork->work); +} + /** * ring_buffer_wait - wait for input to the ring buffer * @buffer: buffer to wait on @@ -816,6 +848,7 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full) struct ring_buffer_per_cpu *cpu_buffer; DEFINE_WAIT(wait); struct rb_irq_work *work; + long wait_index; int ret = 0; /* @@ -834,6 +867,7 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full) work = &cpu_buffer->irq_work; } + wait_index = READ_ONCE(work->wait_index); while (true) { if (full) @@ -898,6 +932,11 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full) } schedule(); + + /* Make sure to see the new wait index */ + smp_rmb(); + if (wait_index != work->wait_index) + break; } if (full) From 0cf6c09dafeeb6f3d92cc19ea9e024640448c42e Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 29 Sep 2022 10:49:09 -0400 Subject: [PATCH 0076/1477] ring-buffer: Fix race between reset page and reading page commit a0fcaaed0c46cf9399d3a2d6e0c87ddb3df0e044 upstream. The ring buffer is broken up into sub buffers (currently of page size). Each sub buffer has a pointer to its "tail" (the last event written to the sub buffer). When a new event is requested, the tail is locally incremented to cover the size of the new event. This is done in a way that there is no need for locking. If the tail goes past the end of the sub buffer, the process of moving to the next sub buffer takes place. After setting the current sub buffer to the next one, the previous one that had the tail go passed the end of the sub buffer needs to be reset back to the original tail location (before the new event was requested) and the rest of the sub buffer needs to be "padded". The race happens when a reader takes control of the sub buffer. As readers do a "swap" of sub buffers from the ring buffer to get exclusive access to the sub buffer, it replaces the "head" sub buffer with an empty sub buffer that goes back into the writable portion of the ring buffer. This swap can happen as soon as the writer moves to the next sub buffer and before it updates the last sub buffer with padding. Because the sub buffer can be released to the reader while the writer is still updating the padding, it is possible for the reader to see the event that goes past the end of the sub buffer. This can cause obvious issues. To fix this, add a few memory barriers so that the reader definitely sees the updates to the sub buffer, and also waits until the writer has put back the "tail" of the sub buffer back to the last event that was written on it. To be paranoid, it will only spin for 1 second, otherwise it will warn and shutdown the ring buffer code. 1 second should be enough as the writer does have preemption disabled. If the writer doesn't move within 1 second (with preemption disabled) something is horribly wrong. No interrupt should last 1 second! Link: https://lore.kernel.org/all/20220830120854.7545-1-jiazi.li@transsion.com/ Link: https://bugzilla.kernel.org/show_bug.cgi?id=216369 Link: https://lkml.kernel.org/r/20220929104909.0650a36c@gandalf.local.home Cc: Ingo Molnar Cc: Andrew Morton Cc: stable@vger.kernel.org Fixes: c7b0930857e22 ("ring-buffer: prevent adding write in discarded area") Reported-by: Jiazi.Li Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ring_buffer.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index e9fd6cf9cb9f..a12e27815555 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2531,6 +2531,9 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, /* Mark the rest of the page with padding */ rb_event_set_padding(event); + /* Make sure the padding is visible before the write update */ + smp_wmb(); + /* Set the write back to the previous setting */ local_sub(length, &tail_page->write); return; @@ -2542,6 +2545,9 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, /* time delta must be non zero */ event->time_delta = 1; + /* Make sure the padding is visible before the tail_page->write update */ + smp_wmb(); + /* Set write to end of buffer */ length = (tail + length) - BUF_PAGE_SIZE; local_sub(length, &tail_page->write); @@ -4356,6 +4362,33 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) arch_spin_unlock(&cpu_buffer->lock); local_irq_restore(flags); + /* + * The writer has preempt disable, wait for it. But not forever + * Although, 1 second is pretty much "forever" + */ +#define USECS_WAIT 1000000 + for (nr_loops = 0; nr_loops < USECS_WAIT; nr_loops++) { + /* If the write is past the end of page, a writer is still updating it */ + if (likely(!reader || rb_page_write(reader) <= BUF_PAGE_SIZE)) + break; + + udelay(1); + + /* Get the latest version of the reader write value */ + smp_rmb(); + } + + /* The writer is not moving forward? Something is wrong */ + if (RB_WARN_ON(cpu_buffer, nr_loops == USECS_WAIT)) + reader = NULL; + + /* + * Make sure we see any padding after the write update + * (see rb_reset_tail()) + */ + smp_rmb(); + + return reader; } From fc08f8438172a036308e0b9a52b67e6f33870100 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 22 Sep 2022 10:56:22 -0400 Subject: [PATCH 0077/1477] tracing: Disable interrupt or preemption before acquiring arch_spinlock_t commit c0a581d7126c0bbc96163276f585fd7b4e4d8d0e upstream. It was found that some tracing functions in kernel/trace/trace.c acquire an arch_spinlock_t with preemption and irqs enabled. An example is the tracing_saved_cmdlines_size_read() function which intermittently causes a "BUG: using smp_processor_id() in preemptible" warning when the LTP read_all_proc test is run. That can be problematic in case preemption happens after acquiring the lock. Add the necessary preemption or interrupt disabling code in the appropriate places before acquiring an arch_spinlock_t. The convention here is to disable preemption for trace_cmdline_lock and interupt for max_lock. Link: https://lkml.kernel.org/r/20220922145622.1744826-1-longman@redhat.com Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Will Deacon Cc: Boqun Feng Cc: stable@vger.kernel.org Fixes: a35873a0993b ("tracing: Add conditional snapshot") Fixes: 939c7a4f04fc ("tracing: Introduce saved_cmdlines_size file") Suggested-by: Steven Rostedt Signed-off-by: Waiman Long Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 50200898410d..a5245362ce7a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1197,12 +1197,14 @@ void *tracing_cond_snapshot_data(struct trace_array *tr) { void *cond_data = NULL; + local_irq_disable(); arch_spin_lock(&tr->max_lock); if (tr->cond_snapshot) cond_data = tr->cond_snapshot->cond_data; arch_spin_unlock(&tr->max_lock); + local_irq_enable(); return cond_data; } @@ -1338,9 +1340,11 @@ int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, goto fail_unlock; } + local_irq_disable(); arch_spin_lock(&tr->max_lock); tr->cond_snapshot = cond_snapshot; arch_spin_unlock(&tr->max_lock); + local_irq_enable(); mutex_unlock(&trace_types_lock); @@ -1367,6 +1371,7 @@ int tracing_snapshot_cond_disable(struct trace_array *tr) { int ret = 0; + local_irq_disable(); arch_spin_lock(&tr->max_lock); if (!tr->cond_snapshot) @@ -1377,6 +1382,7 @@ int tracing_snapshot_cond_disable(struct trace_array *tr) } arch_spin_unlock(&tr->max_lock); + local_irq_enable(); return ret; } @@ -2198,6 +2204,11 @@ static size_t tgid_map_max; #define SAVED_CMDLINES_DEFAULT 128 #define NO_CMDLINE_MAP UINT_MAX +/* + * Preemption must be disabled before acquiring trace_cmdline_lock. + * The various trace_arrays' max_lock must be acquired in a context + * where interrupt is disabled. + */ static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED; struct saved_cmdlines_buffer { unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1]; @@ -2410,7 +2421,11 @@ static int trace_save_cmdline(struct task_struct *tsk) * the lock, but we also don't want to spin * nor do we want to disable interrupts, * so if we miss here, then better luck next time. + * + * This is called within the scheduler and wake up, so interrupts + * had better been disabled and run queue lock been held. */ + lockdep_assert_preemption_disabled(); if (!arch_spin_trylock(&trace_cmdline_lock)) return 0; @@ -5470,9 +5485,11 @@ tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf, char buf[64]; int r; + preempt_disable(); arch_spin_lock(&trace_cmdline_lock); r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num); arch_spin_unlock(&trace_cmdline_lock); + preempt_enable(); return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } @@ -5497,10 +5514,12 @@ static int tracing_resize_saved_cmdlines(unsigned int val) return -ENOMEM; } + preempt_disable(); arch_spin_lock(&trace_cmdline_lock); savedcmd_temp = savedcmd; savedcmd = s; arch_spin_unlock(&trace_cmdline_lock); + preempt_enable(); free_saved_cmdlines_buffer(savedcmd_temp); return 0; @@ -5953,10 +5972,12 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf) #ifdef CONFIG_TRACER_SNAPSHOT if (t->use_max_tr) { + local_irq_disable(); arch_spin_lock(&tr->max_lock); if (tr->cond_snapshot) ret = -EBUSY; arch_spin_unlock(&tr->max_lock); + local_irq_enable(); if (ret) goto out; } @@ -7030,10 +7051,12 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, goto out; } + local_irq_disable(); arch_spin_lock(&tr->max_lock); if (tr->cond_snapshot) ret = -EBUSY; arch_spin_unlock(&tr->max_lock); + local_irq_enable(); if (ret) goto out; From 68158654b583bedafc50523a540160be115227d7 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 26 Sep 2022 09:33:50 -0500 Subject: [PATCH 0078/1477] thunderbolt: Explicitly enable lane adapter hotplug events at startup commit 5d2569cb4a65c373896ec0217febdf88739ed295 upstream. Software that has run before the USB4 CM in Linux runs may have disabled hotplug events for a given lane adapter. Other CMs such as that one distributed with Windows 11 will enable hotplug events. Do the same thing in the Linux CM which fixes hotplug events on "AMD Pink Sardine". Cc: stable@vger.kernel.org Signed-off-by: Mario Limonciello Signed-off-by: Mika Westerberg Signed-off-by: Greg Kroah-Hartman --- drivers/thunderbolt/switch.c | 24 ++++++++++++++++++++++++ drivers/thunderbolt/tb.h | 1 + drivers/thunderbolt/tb_regs.h | 1 + drivers/thunderbolt/usb4.c | 20 ++++++++++++++++++++ 4 files changed, 46 insertions(+) diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index 65f99d744654..e881b72833dc 100644 --- a/drivers/thunderbolt/switch.c +++ b/drivers/thunderbolt/switch.c @@ -2413,6 +2413,26 @@ void tb_switch_unconfigure_link(struct tb_switch *sw) tb_lc_unconfigure_port(down); } +static int tb_switch_port_hotplug_enable(struct tb_switch *sw) +{ + struct tb_port *port; + + if (tb_switch_is_icm(sw)) + return 0; + + tb_switch_for_each_port(sw, port) { + int res; + + if (!port->cap_usb4) + continue; + + res = usb4_port_hotplug_enable(port); + if (res) + return res; + } + return 0; +} + /** * tb_switch_add() - Add a switch to the domain * @sw: Switch to add @@ -2480,6 +2500,10 @@ int tb_switch_add(struct tb_switch *sw) return ret; } + ret = tb_switch_port_hotplug_enable(sw); + if (ret) + return ret; + ret = device_add(&sw->dev); if (ret) { dev_err(&sw->dev, "failed to add device: %d\n", ret); diff --git a/drivers/thunderbolt/tb.h b/drivers/thunderbolt/tb.h index 8ea360b0ff77..266f3bf8ff5c 100644 --- a/drivers/thunderbolt/tb.h +++ b/drivers/thunderbolt/tb.h @@ -979,6 +979,7 @@ struct tb_port *usb4_switch_map_usb3_down(struct tb_switch *sw, const struct tb_port *port); int usb4_port_unlock(struct tb_port *port); +int usb4_port_hotplug_enable(struct tb_port *port); int usb4_port_configure(struct tb_port *port); void usb4_port_unconfigure(struct tb_port *port); int usb4_port_configure_xdomain(struct tb_port *port); diff --git a/drivers/thunderbolt/tb_regs.h b/drivers/thunderbolt/tb_regs.h index e7d9529822fa..26868e2f9d0b 100644 --- a/drivers/thunderbolt/tb_regs.h +++ b/drivers/thunderbolt/tb_regs.h @@ -285,6 +285,7 @@ struct tb_regs_port_header { #define ADP_CS_5 0x05 #define ADP_CS_5_LCA_MASK GENMASK(28, 22) #define ADP_CS_5_LCA_SHIFT 22 +#define ADP_CS_5_DHP BIT(31) /* TMU adapter registers */ #define TMU_ADP_CS_3 0x03 diff --git a/drivers/thunderbolt/usb4.c b/drivers/thunderbolt/usb4.c index c05ec6fad77f..0b3a77ade04d 100644 --- a/drivers/thunderbolt/usb4.c +++ b/drivers/thunderbolt/usb4.c @@ -854,6 +854,26 @@ int usb4_port_unlock(struct tb_port *port) return tb_port_write(port, &val, TB_CFG_PORT, ADP_CS_4, 1); } +/** + * usb4_port_hotplug_enable() - Enables hotplug for a port + * @port: USB4 port to operate on + * + * Enables hot plug events on a given port. This is only intended + * to be used on lane, DP-IN, and DP-OUT adapters. + */ +int usb4_port_hotplug_enable(struct tb_port *port) +{ + int ret; + u32 val; + + ret = tb_port_read(port, &val, TB_CFG_PORT, ADP_CS_5, 1); + if (ret) + return ret; + + val &= ~ADP_CS_5_DHP; + return tb_port_write(port, &val, TB_CFG_PORT, ADP_CS_5, 1); +} + static int usb4_port_set_configured(struct tb_port *port, bool configured) { int ret; From dbdd3b1448e5944b273c178823360f9fe890ae06 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 15 Sep 2022 19:00:24 +0200 Subject: [PATCH 0079/1477] efi: libstub: drop pointless get_memory_map() call commit d80ca810f096ff66f451e7a3ed2f0cd9ef1ff519 upstream. Currently, the non-x86 stub code calls get_memory_map() redundantly, given that the data it returns is never used anywhere. So drop the call. Cc: # v4.14+ Fixes: 24d7c494ce46 ("efi/arm-stub: Round up FDT allocation to mapping size") Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/libstub/fdt.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c index 368cd60000ee..d48b0de05b62 100644 --- a/drivers/firmware/efi/libstub/fdt.c +++ b/drivers/firmware/efi/libstub/fdt.c @@ -281,14 +281,6 @@ efi_status_t allocate_new_fdt_and_exit_boot(void *handle, goto fail; } - /* - * Now that we have done our final memory allocation (and free) - * we can get the memory map key needed for exit_boot_services(). - */ - status = efi_get_memory_map(&map); - if (status != EFI_SUCCESS) - goto fail_free_new_fdt; - status = update_fdt((void *)fdt_addr, fdt_size, (void *)*new_fdt_addr, MAX_FDT_SIZE, cmdline_ptr, initrd_addr, initrd_size); From bda8120e5b100efd0d820b0a2aa1d2a0469bea51 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 18 Aug 2022 22:33:07 +0200 Subject: [PATCH 0080/1477] media: cedrus: Set the platform driver data earlier commit 708938f8495147fe2e77a9a3e1015d8e6899323e upstream. The cedrus_hw_resume() crashes with NULL deference on driver probe if runtime PM is disabled because it uses platform data that hasn't been set up yet. Fix this by setting the platform data earlier during probe. Cc: stable@vger.kernel.org Fixes: 50e761516f2b (media: platform: Add Cedrus VPU decoder driver) Signed-off-by: Dmitry Osipenko Signed-off-by: Nicolas Dufresne Reviewed-by: Samuel Holland Acked-by: Paul Kocialkowski Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/staging/media/sunxi/cedrus/cedrus.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.c b/drivers/staging/media/sunxi/cedrus/cedrus.c index 1dd833757c4e..28de90edf4cc 100644 --- a/drivers/staging/media/sunxi/cedrus/cedrus.c +++ b/drivers/staging/media/sunxi/cedrus/cedrus.c @@ -399,6 +399,8 @@ static int cedrus_probe(struct platform_device *pdev) if (!dev) return -ENOMEM; + platform_set_drvdata(pdev, dev); + dev->vfd = cedrus_video_device; dev->dev = &pdev->dev; dev->pdev = pdev; @@ -469,8 +471,6 @@ static int cedrus_probe(struct platform_device *pdev) goto err_m2m_mc; } - platform_set_drvdata(pdev, dev); - return 0; err_m2m_mc: From 085ca1d33b198048b26d8c5644809b6cdc89d651 Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Mon, 22 Aug 2022 00:06:47 +0200 Subject: [PATCH 0081/1477] KVM: x86/emulator: Fix handing of POP SS to correctly set interruptibility commit 6aa5c47c351b22c21205c87977c84809cd015fcf upstream. The emulator checks the wrong variable while setting the CPU interruptibility state, the target segment is embedded in the instruction opcode, not the ModR/M register. Fix the condition. Signed-off-by: Michal Luczaj Fixes: a5457e7bcf9a ("KVM: emulate: POP SS triggers a MOV SS shadow too") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20220821215900.1419215-1-mhal@rbox.co Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/emulate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 2aa41d682bb2..52a881d24070 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2039,7 +2039,7 @@ static int em_pop_sreg(struct x86_emulate_ctxt *ctxt) if (rc != X86EMUL_CONTINUE) return rc; - if (ctxt->modrm_reg == VCPU_SREG_SS) + if (seg == VCPU_SREG_SS) ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS; if (ctxt->op_bytes > 2) rsp_increment(ctxt, ctxt->op_bytes - 2); From 83fe0b009bd035572438e13c261c7dfc57701db2 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 30 Aug 2022 23:15:48 +0000 Subject: [PATCH 0082/1477] KVM: nVMX: Unconditionally purge queued/injected events on nested "exit" commit d953540430c5af57f5de97ea9e36253908204027 upstream. Drop pending exceptions and events queued for re-injection when leaving nested guest mode, even if the "exit" is due to VM-Fail, SMI, or forced by host userspace. Failure to purge events could result in an event belonging to L2 being injected into L1. This _should_ never happen for VM-Fail as all events should be blocked by nested_run_pending, but it's possible if KVM, not the L1 hypervisor, is the source of VM-Fail when running vmcs02. SMI is a nop (barring unknown bugs) as recognition of SMI and thus entry to SMM is blocked by pending exceptions and re-injected events. Forced exit is definitely buggy, but has likely gone unnoticed because userspace probably follows the forced exit with KVM_SET_VCPU_EVENTS (or some other ioctl() that purges the queue). Fixes: 4f350c6dbcb9 ("kvm: nVMX: Handle deferred early VMLAUNCH/VMRESUME failure properly") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Reviewed-by: Jim Mattson Reviewed-by: Maxim Levitsky Link: https://lore.kernel.org/r/20220830231614.3580124-2-seanjc@google.com Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx/nested.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 6c4277e99d58..5c3e394c80de 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -4183,14 +4183,6 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, nested_vmx_abort(vcpu, VMX_ABORT_SAVE_GUEST_MSR_FAIL); } - - /* - * Drop what we picked up for L2 via vmx_complete_interrupts. It is - * preserved above and would only end up incorrectly in L1. - */ - vcpu->arch.nmi_injected = false; - kvm_clear_exception_queue(vcpu); - kvm_clear_interrupt_queue(vcpu); } /* @@ -4530,6 +4522,17 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, WARN_ON_ONCE(nested_early_check); } + /* + * Drop events/exceptions that were queued for re-injection to L2 + * (picked up via vmx_complete_interrupts()), as well as exceptions + * that were pending for L2. Note, this must NOT be hoisted above + * prepare_vmcs12(), events/exceptions queued for re-injection need to + * be captured in vmcs12 (see vmcs12_save_pending_event()). + */ + vcpu->arch.nmi_injected = false; + kvm_clear_exception_queue(vcpu); + kvm_clear_interrupt_queue(vcpu); + vmx_switch_vmcs(vcpu, &vmx->vmcs01); /* Update any VMCS fields that might have changed while L2 ran */ From ceeb8d4a43acfa9f6d09938d598721c209eee969 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 30 Aug 2022 23:15:49 +0000 Subject: [PATCH 0083/1477] KVM: VMX: Drop bits 31:16 when shoving exception error code into VMCS commit eba9799b5a6efe2993cf92529608e4aa8163d73b upstream. Deliberately truncate the exception error code when shoving it into the VMCS (VM-Entry field for vmcs01 and vmcs02, VM-Exit field for vmcs12). Intel CPUs are incapable of handling 32-bit error codes and will never generate an error code with bits 31:16, but userspace can provide an arbitrary error code via KVM_SET_VCPU_EVENTS. Failure to drop the bits on exception injection results in failed VM-Entry, as VMX disallows setting bits 31:16. Setting the bits on VM-Exit would at best confuse L1, and at worse induce a nested VM-Entry failure, e.g. if L1 decided to reinject the exception back into L2. Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Reviewed-by: Jim Mattson Reviewed-by: Maxim Levitsky Link: https://lore.kernel.org/r/20220830231614.3580124-3-seanjc@google.com Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx/nested.c | 11 ++++++++++- arch/x86/kvm/vmx/vmx.c | 12 +++++++++++- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 5c3e394c80de..7f15e2b2a0d6 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -3776,7 +3776,16 @@ static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu, u32 intr_info = nr | INTR_INFO_VALID_MASK; if (vcpu->arch.exception.has_error_code) { - vmcs12->vm_exit_intr_error_code = vcpu->arch.exception.error_code; + /* + * Intel CPUs do not generate error codes with bits 31:16 set, + * and more importantly VMX disallows setting bits 31:16 in the + * injected error code for VM-Entry. Drop the bits to mimic + * hardware and avoid inducing failure on nested VM-Entry if L1 + * chooses to inject the exception back to L2. AMD CPUs _do_ + * generate "full" 32-bit error codes, so KVM allows userspace + * to inject exception error codes with bits 31:16 set. + */ + vmcs12->vm_exit_intr_error_code = (u16)vcpu->arch.exception.error_code; intr_info |= INTR_INFO_DELIVER_CODE_MASK; } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index b33d0f283d4f..af6742d11ca1 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1737,7 +1737,17 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu) kvm_deliver_exception_payload(vcpu); if (has_error_code) { - vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code); + /* + * Despite the error code being architecturally defined as 32 + * bits, and the VMCS field being 32 bits, Intel CPUs and thus + * VMX don't actually supporting setting bits 31:16. Hardware + * will (should) never provide a bogus error code, but AMD CPUs + * do generate error codes with bits 31:16 set, and so KVM's + * ABI lets userspace shove in arbitrary 32-bit values. Drop + * the upper bits to avoid VM-Fail, losing information that + * does't really exist is preferable to killing the VM. + */ + vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, (u16)error_code); intr_info |= INTR_INFO_DELIVER_CODE_MASK; } From d0febad83e29d85bb66e4f5cac0115b022403338 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 2 Sep 2022 16:37:15 +0200 Subject: [PATCH 0084/1477] staging: greybus: audio_helper: remove unused and wrong debugfs usage commit d517cdeb904ddc0cbebcc959d43596426cac40b0 upstream. In the greybus audio_helper code, the debugfs file for the dapm has the potential to be removed and memory will be leaked. There is also the very real potential for this code to remove ALL debugfs entries from the system, and it seems like this is what will really happen if this code ever runs. This all is very wrong as the greybus audio driver did not create this debugfs file, the sound core did and controls the lifespan of it. So remove all of the debugfs logic from the audio_helper code as there's no way it could be correct. If this really is needed, it can come back with a fixup for the incorrect usage of the debugfs_lookup() call which is what caused this to be noticed at all. Cc: Johan Hovold Cc: Alex Elder Cc: Greg Kroah-Hartman Cc: stable Link: https://lore.kernel.org/r/20220902143715.320500-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/staging/greybus/audio_helper.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/drivers/staging/greybus/audio_helper.c b/drivers/staging/greybus/audio_helper.c index a9576f92efaa..08443f4aa045 100644 --- a/drivers/staging/greybus/audio_helper.c +++ b/drivers/staging/greybus/audio_helper.c @@ -3,7 +3,6 @@ * Greybus Audio Sound SoC helper APIs */ -#include #include #include #include @@ -116,10 +115,6 @@ int gbaudio_dapm_free_controls(struct snd_soc_dapm_context *dapm, { int i; struct snd_soc_dapm_widget *w, *next_w; -#ifdef CONFIG_DEBUG_FS - struct dentry *parent = dapm->debugfs_dapm; - struct dentry *debugfs_w = NULL; -#endif mutex_lock(&dapm->card->dapm_mutex); for (i = 0; i < num; i++) { @@ -139,12 +134,6 @@ int gbaudio_dapm_free_controls(struct snd_soc_dapm_context *dapm, continue; } widget++; -#ifdef CONFIG_DEBUG_FS - if (!parent) - debugfs_w = debugfs_lookup(w->name, parent); - debugfs_remove(debugfs_w); - debugfs_w = NULL; -#endif gbaudio_dapm_free_widget(w); } mutex_unlock(&dapm->card->dapm_mutex); From 57f1a89a8e4ea0ffd3ab7321371e226b2dbbd0a5 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Tue, 16 Aug 2022 14:04:36 -0400 Subject: [PATCH 0085/1477] drm/nouveau/kms/nv140-: Disable interlacing commit 8ba9249396bef37cb68be9e8dee7847f1737db9d upstream. As it turns out: while Nvidia does actually have interlacing knobs on their GPU still pretty much no current GPUs since Volta actually support it. Trying interlacing on these GPUs will result in NVDisplay being quite unhappy like so: nouveau 0000:1f:00.0: disp: chid 0 stat 00004802 reason 4 [INVALID_ARG] mthd 2008 data 00000001 code 00080000 nouveau 0000:1f:00.0: disp: chid 0 stat 10005080 reason 5 [INVALID_STATE] mthd 0200 data 00000001 code 00000001 So let's fix this by following the same behavior Nvidia's driver does and disable interlacing entirely. Signed-off-by: Lyude Paul Cc: stable@vger.kernel.org Reviewed-by: Karol Herbst Link: https://patchwork.freedesktop.org/patch/msgid/20220816180436.156310-1-lyude@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nouveau_connector.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index 4c992fd5bd68..9542fc63e796 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -500,7 +500,8 @@ nouveau_connector_set_encoder(struct drm_connector *connector, connector->interlace_allowed = nv_encoder->caps.dp_interlace; else - connector->interlace_allowed = true; + connector->interlace_allowed = + drm->client.device.info.family < NV_DEVICE_INFO_V0_VOLTA; connector->doublescan_allowed = true; } else if (nv_encoder->dcb->type == DCB_OUTPUT_LVDS || From 5d6093c49c098d86c7b136aba9922df44aeb6944 Mon Sep 17 00:00:00 2001 From: Jianglei Nie Date: Tue, 5 Jul 2022 21:25:46 +0800 Subject: [PATCH 0086/1477] drm/nouveau: fix a use-after-free in nouveau_gem_prime_import_sg_table() commit 540dfd188ea2940582841c1c220bd035a7db0e51 upstream. nouveau_bo_init() is backed by ttm_bo_init() and ferries its return code back to the caller. On failures, ttm will call nouveau_bo_del_ttm() and free the memory.Thus, when nouveau_bo_init() returns an error, the gem object has already been released. Then the call to nouveau_bo_ref() will use the freed "nvbo->bo" and lead to a use-after-free bug. We should delete the call to nouveau_bo_ref() to avoid the use-after-free. Signed-off-by: Jianglei Nie Reviewed-by: Lyude Paul Signed-off-by: Lyude Paul Fixes: 019cbd4a4feb ("drm/nouveau: Initialize GEM object before TTM object") Cc: Thierry Reding Cc: # v5.4+ Link: https://patchwork.freedesktop.org/patch/msgid/20220705132546.2247677-1-niejianglei2021@163.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nouveau_prime.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_prime.c b/drivers/gpu/drm/nouveau/nouveau_prime.c index 5f5b87f99546..f08bda533bd9 100644 --- a/drivers/gpu/drm/nouveau/nouveau_prime.c +++ b/drivers/gpu/drm/nouveau/nouveau_prime.c @@ -89,7 +89,6 @@ struct drm_gem_object *nouveau_gem_prime_import_sg_table(struct drm_device *dev, ret = nouveau_bo_init(nvbo, size, align, NOUVEAU_GEM_DOMAIN_GART, sg, robj); if (ret) { - nouveau_bo_ref(NULL, &nvbo); obj = ERR_PTR(ret); goto unlock; } From fd37286f392abd0f7c1e84a1d70ce828ca183ec8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 3 Oct 2022 14:15:39 +0300 Subject: [PATCH 0087/1477] drm/i915: Fix watermark calculations for gen12+ RC CCS modifier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit c56453a00f19ccddee302f5f9fe96b80e0b47fd3 upstream. Take the gen12+ RC CCS modifier into account when calculating the watermarks. Othwerwise we'll calculate the watermarks thinking this Y-tiled modifier is linear. The rc_surface part is actually a nop since that is not used for any glk+ platform. v2: Split RC CCS vs. MC CCS to separate patches Cc: stable@vger.kernel.org Fixes: b3e57bccd68a ("drm/i915/tgl: Gen-12 render decompression") Reviewed-by: Juha-Pekka Heikkila Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20221003111544.8007-2-ville.syrjala@linux.intel.com (cherry picked from commit a89a96a586114f67598c6391c75678b4dba5c2da) Signed-off-by: Tvrtko Ursulin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_pm.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 2f2dc029668b..d59212e13a1d 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5145,10 +5145,12 @@ skl_compute_wm_params(const struct intel_crtc_state *crtc_state, wp->y_tiled = modifier == I915_FORMAT_MOD_Y_TILED || modifier == I915_FORMAT_MOD_Yf_TILED || modifier == I915_FORMAT_MOD_Y_TILED_CCS || - modifier == I915_FORMAT_MOD_Yf_TILED_CCS; + modifier == I915_FORMAT_MOD_Yf_TILED_CCS || + modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS; wp->x_tiled = modifier == I915_FORMAT_MOD_X_TILED; wp->rc_surface = modifier == I915_FORMAT_MOD_Y_TILED_CCS || - modifier == I915_FORMAT_MOD_Yf_TILED_CCS; + modifier == I915_FORMAT_MOD_Yf_TILED_CCS || + modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS; wp->is_planar = intel_format_info_is_yuv_semiplanar(format, modifier); wp->width = width; From abd13b21004dfbd8efe2871913f36acd1ae0332d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 3 Oct 2022 14:15:40 +0300 Subject: [PATCH 0088/1477] drm/i915: Fix watermark calculations for gen12+ MC CCS modifier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 484b2b9281000274ef7c5cb0a9ebc5da6f5c281c upstream. Take the gen12+ MC CCS modifier into account when calculating the watermarks. Othwerwise we'll calculate the watermarks thinking this Y-tiled modifier is linear. The rc_surface part is actually a nop since that is not used for any glk+ platform. v2: Split RC CCS vs. MC CCS to separate patches Cc: stable@vger.kernel.org Fixes: 2dfbf9d2873a ("drm/i915/tgl: Gen-12 display can decompress surfaces compressed by the media engine") Reviewed-by: Juha-Pekka Heikkila Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20221003111544.8007-3-ville.syrjala@linux.intel.com (cherry picked from commit 91c9651425fe955b1387f3637607dda005f3f710) Signed-off-by: Tvrtko Ursulin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_pm.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index d59212e13a1d..1b5e8d3e45a9 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5146,11 +5146,13 @@ skl_compute_wm_params(const struct intel_crtc_state *crtc_state, modifier == I915_FORMAT_MOD_Yf_TILED || modifier == I915_FORMAT_MOD_Y_TILED_CCS || modifier == I915_FORMAT_MOD_Yf_TILED_CCS || - modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS; + modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS || + modifier == I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS; wp->x_tiled = modifier == I915_FORMAT_MOD_X_TILED; wp->rc_surface = modifier == I915_FORMAT_MOD_Y_TILED_CCS || modifier == I915_FORMAT_MOD_Yf_TILED_CCS || - modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS; + modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS || + modifier == I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS; wp->is_planar = intel_format_info_is_yuv_semiplanar(format, modifier); wp->width = width; From d11e09953cc00c9e8a263ed05b92f65dad7ab748 Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 14 Oct 2022 18:50:20 -0500 Subject: [PATCH 0089/1477] smb3: must initialize two ACL struct fields to zero commit f09bd695af3b8ab46fc24e5d6954a24104c38387 upstream. Coverity spotted that we were not initalizing Stbz1 and Stbz2 to zero in create_sd_buf. Addresses-Coverity: 1513848 ("Uninitialized scalar variable") Cc: Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2pdu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 4232dc80deeb..0c4a2474e75b 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -2294,7 +2294,7 @@ create_sd_buf(umode_t mode, bool set_owner, unsigned int *len) unsigned int acelen, acl_size, ace_count; unsigned int owner_offset = 0; unsigned int group_offset = 0; - struct smb3_acl acl; + struct smb3_acl acl = {}; *len = roundup(sizeof(struct crt_sd_ctxt) + (sizeof(struct cifs_ace) * 4), 8); @@ -2367,6 +2367,7 @@ create_sd_buf(umode_t mode, bool set_owner, unsigned int *len) acl.AclRevision = ACL_REVISION; /* See 2.4.4.1 of MS-DTYP */ acl.AclSize = cpu_to_le16(acl_size); acl.AceCount = cpu_to_le16(ace_count); + /* acl.Sbz1 and Sbz2 MBZ so are not set here, but initialized above */ memcpy(aclptr, &acl, sizeof(struct smb3_acl)); buf->ccontext.DataLength = cpu_to_le32(ptr - (__u8 *)&buf->sd); From 28d9b39733078fe3a7e74a9186872ed5ab496c48 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 20 Sep 2022 19:12:52 +0200 Subject: [PATCH 0090/1477] selinux: use "grep -E" instead of "egrep" commit c969bb8dbaf2f3628927eae73e7c579a74cf1b6e upstream. The latest version of grep claims that egrep is now obsolete so the build now contains warnings that look like: egrep: warning: egrep is obsolescent; using grep -E fix this by using "grep -E" instead. Cc: Paul Moore Cc: Stephen Smalley Cc: Eric Paris Cc: selinux@vger.kernel.org Signed-off-by: Greg Kroah-Hartman [PM: tweak to remove vdso reference, cleanup subj line] Signed-off-by: Paul Moore Signed-off-by: Greg Kroah-Hartman --- scripts/selinux/install_policy.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/selinux/install_policy.sh b/scripts/selinux/install_policy.sh index 2dccf141241d..20af56ce245c 100755 --- a/scripts/selinux/install_policy.sh +++ b/scripts/selinux/install_policy.sh @@ -78,7 +78,7 @@ cd /etc/selinux/dummy/contexts/files $SF -F file_contexts / mounts=`cat /proc/$$/mounts | \ - egrep "ext[234]|jfs|xfs|reiserfs|jffs2|gfs2|btrfs|f2fs|ocfs2" | \ + grep -E "ext[234]|jfs|xfs|reiserfs|jffs2|gfs2|btrfs|f2fs|ocfs2" | \ awk '{ print $2 '}` $SF -F file_contexts $mounts From 6e4be747f15fa32a6382ca2d9392fb9ba2ce051f Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Fri, 8 Jul 2022 11:34:51 +0200 Subject: [PATCH 0091/1477] userfaultfd: open userfaultfds with O_RDONLY [ Upstream commit abec3d015fdfb7c63105c7e1c956188bf381aa55 ] Since userfaultfd doesn't implement a write operation, it is more appropriate to open it read-only. When userfaultfds are opened read-write like it is now, and such fd is passed from one process to another, SELinux will check both read and write permissions for the target process, even though it can't actually do any write operation on the fd later. Inspired by the following bug report, which has hit the SELinux scenario described above: https://bugzilla.redhat.com/show_bug.cgi?id=1974559 Reported-by: Robert O'Callahan Fixes: 86039bd3b4e6 ("userfaultfd: add new syscall to provide memory externalization") Signed-off-by: Ondrej Mosnacek Acked-by: Peter Xu Acked-by: Christian Brauner (Microsoft) Signed-off-by: Paul Moore Signed-off-by: Sasha Levin --- fs/userfaultfd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index aef0da5d6f63..a3074a9d71a6 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -974,7 +974,7 @@ static int resolve_userfault_fork(struct userfaultfd_ctx *ctx, int fd; fd = anon_inode_getfd("[userfaultfd]", &userfaultfd_fops, new, - O_RDWR | (new->flags & UFFD_SHARED_FCNTL_FLAGS)); + O_RDONLY | (new->flags & UFFD_SHARED_FCNTL_FLAGS)); if (fd < 0) return fd; @@ -1987,7 +1987,7 @@ SYSCALL_DEFINE1(userfaultfd, int, flags) mmgrab(ctx->mm); fd = anon_inode_getfd("[userfaultfd]", &userfaultfd_fops, ctx, - O_RDWR | (flags & UFFD_SHARED_FCNTL_FLAGS)); + O_RDONLY | (flags & UFFD_SHARED_FCNTL_FLAGS)); if (fd < 0) { mmdrop(ctx->mm); kmem_cache_free(userfaultfd_ctx_cachep, ctx); From 35984456983ba1cd359ccb4a71c8707075f98728 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 7 Sep 2022 16:40:44 -0700 Subject: [PATCH 0092/1477] sh: machvec: Use char[] for section boundaries [ Upstream commit c5783af354688b24abd359f7086c282ec74de993 ] As done for other sections, define the extern as a character array, which relaxes many of the compiler-time object size checks, which would otherwise assume it's a single long. Solves the following build error: arch/sh/kernel/machvec.c: error: array subscript 'struct sh_machine_vector[0]' is partly outside array bounds of 'long int[1]' [-Werror=array-bounds]: => 105:33 Cc: Yoshinori Sato Cc: Rich Felker Cc: linux-sh@vger.kernel.org Reported-by: Geert Uytterhoeven Link: https://lore.kernel.org/lkml/alpine.DEB.2.22.394.2209050944290.964530@ramsan.of.borg/ Fixes: 9655ad03af2d ("sh: Fixup machvec support.") Reviewed-by: Geert Uytterhoeven Reviewed-by: Gustavo A. R. Silva Acked-by: Rich Felker Signed-off-by: Kees Cook Signed-off-by: Sasha Levin --- arch/sh/include/asm/sections.h | 2 +- arch/sh/kernel/machvec.c | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/sh/include/asm/sections.h b/arch/sh/include/asm/sections.h index 8edb824049b9..0cb0ca149ac3 100644 --- a/arch/sh/include/asm/sections.h +++ b/arch/sh/include/asm/sections.h @@ -4,7 +4,7 @@ #include -extern long __machvec_start, __machvec_end; +extern char __machvec_start[], __machvec_end[]; extern char __uncached_start, __uncached_end; extern char __start_eh_frame[], __stop_eh_frame[]; diff --git a/arch/sh/kernel/machvec.c b/arch/sh/kernel/machvec.c index d606679a211e..57efaf5b82ae 100644 --- a/arch/sh/kernel/machvec.c +++ b/arch/sh/kernel/machvec.c @@ -20,8 +20,8 @@ #define MV_NAME_SIZE 32 #define for_each_mv(mv) \ - for ((mv) = (struct sh_machine_vector *)&__machvec_start; \ - (mv) && (unsigned long)(mv) < (unsigned long)&__machvec_end; \ + for ((mv) = (struct sh_machine_vector *)__machvec_start; \ + (mv) && (unsigned long)(mv) < (unsigned long)__machvec_end; \ (mv)++) static struct sh_machine_vector * __init get_mv_byname(const char *name) @@ -87,8 +87,8 @@ void __init sh_mv_setup(void) if (!machvec_selected) { unsigned long machvec_size; - machvec_size = ((unsigned long)&__machvec_end - - (unsigned long)&__machvec_start); + machvec_size = ((unsigned long)__machvec_end - + (unsigned long)__machvec_start); /* * Sanity check for machvec section alignment. Ensure @@ -102,7 +102,7 @@ void __init sh_mv_setup(void) * vector (usually the only one) from .machvec.init. */ if (machvec_size >= sizeof(struct sh_machine_vector)) - sh_mv = *(struct sh_machine_vector *)&__machvec_start; + sh_mv = *(struct sh_machine_vector *)__machvec_start; } pr_notice("Booting machvec: %s\n", get_system_type()); From 0c667858c0266f46a33327259169657bc54b7334 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 23 Apr 2022 15:24:03 +0200 Subject: [PATCH 0093/1477] MIPS: SGI-IP27: Free some unused memory [ Upstream commit 33d7085682b4aa212ebfadbc21da81dfefaaac16 ] platform_device_add_data() duplicates the memory it is passed. So we can free some memory to save a few bytes that would remain unused otherwise. Signed-off-by: Christophe JAILLET Signed-off-by: Thomas Bogendoerfer Stable-dep-of: 11bec9cba4de ("MIPS: SGI-IP27: Fix platform-device leak in bridge_platform_create()") Signed-off-by: Sasha Levin --- arch/mips/sgi-ip27/ip27-xtalk.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/mips/sgi-ip27/ip27-xtalk.c b/arch/mips/sgi-ip27/ip27-xtalk.c index 000ede156bdc..e762886d1dda 100644 --- a/arch/mips/sgi-ip27/ip27-xtalk.c +++ b/arch/mips/sgi-ip27/ip27-xtalk.c @@ -53,6 +53,8 @@ static void bridge_platform_create(nasid_t nasid, int widget, int masterwid) } platform_device_add_resources(pdev, &w1_res, 1); platform_device_add_data(pdev, wd, sizeof(*wd)); + /* platform_device_add_data() duplicates the data */ + kfree(wd); platform_device_add(pdev); bd = kzalloc(sizeof(*bd), GFP_KERNEL); @@ -83,6 +85,8 @@ static void bridge_platform_create(nasid_t nasid, int widget, int masterwid) bd->io_offset = offset; platform_device_add_data(pdev, bd, sizeof(*bd)); + /* platform_device_add_data() duplicates the data */ + kfree(bd); platform_device_add(pdev); pr_info("xtalk:n%d/%x bridge widget\n", nasid, widget); return; From da2aecef866b476438d02c662507a0e4e818da9d Mon Sep 17 00:00:00 2001 From: Lin Yujun Date: Wed, 14 Sep 2022 11:29:17 +0800 Subject: [PATCH 0094/1477] MIPS: SGI-IP27: Fix platform-device leak in bridge_platform_create() [ Upstream commit 11bec9cba4de06b3c0e9e4041453c2caaa1cbec1 ] In error case in bridge_platform_create after calling platform_device_add()/platform_device_add_data()/ platform_device_add_resources(), release the failed 'pdev' or it will be leak, call platform_device_put() to fix this problem. Besides, 'pdev' is divided into 'pdev_wd' and 'pdev_bd', use platform_device_unregister() to release sgi_w1 resources when xtalk-bridge registration fails. Fixes: 5dc76a96e95a ("MIPS: PCI: use information from 1-wire PROM for IOC3 detection") Signed-off-by: Lin Yujun Signed-off-by: Thomas Bogendoerfer Signed-off-by: Sasha Levin --- arch/mips/sgi-ip27/ip27-xtalk.c | 70 +++++++++++++++++++++++---------- 1 file changed, 50 insertions(+), 20 deletions(-) diff --git a/arch/mips/sgi-ip27/ip27-xtalk.c b/arch/mips/sgi-ip27/ip27-xtalk.c index e762886d1dda..5143d1cf8984 100644 --- a/arch/mips/sgi-ip27/ip27-xtalk.c +++ b/arch/mips/sgi-ip27/ip27-xtalk.c @@ -27,15 +27,18 @@ static void bridge_platform_create(nasid_t nasid, int widget, int masterwid) { struct xtalk_bridge_platform_data *bd; struct sgi_w1_platform_data *wd; - struct platform_device *pdev; + struct platform_device *pdev_wd; + struct platform_device *pdev_bd; struct resource w1_res; unsigned long offset; offset = NODE_OFFSET(nasid); wd = kzalloc(sizeof(*wd), GFP_KERNEL); - if (!wd) - goto no_mem; + if (!wd) { + pr_warn("xtalk:n%d/%x bridge create out of memory\n", nasid, widget); + return; + } snprintf(wd->dev_id, sizeof(wd->dev_id), "bridge-%012lx", offset + (widget << SWIN_SIZE_BITS)); @@ -46,24 +49,35 @@ static void bridge_platform_create(nasid_t nasid, int widget, int masterwid) w1_res.end = w1_res.start + 3; w1_res.flags = IORESOURCE_MEM; - pdev = platform_device_alloc("sgi_w1", PLATFORM_DEVID_AUTO); - if (!pdev) { - kfree(wd); - goto no_mem; + pdev_wd = platform_device_alloc("sgi_w1", PLATFORM_DEVID_AUTO); + if (!pdev_wd) { + pr_warn("xtalk:n%d/%x bridge create out of memory\n", nasid, widget); + goto err_kfree_wd; + } + if (platform_device_add_resources(pdev_wd, &w1_res, 1)) { + pr_warn("xtalk:n%d/%x bridge failed to add platform resources.\n", nasid, widget); + goto err_put_pdev_wd; + } + if (platform_device_add_data(pdev_wd, wd, sizeof(*wd))) { + pr_warn("xtalk:n%d/%x bridge failed to add platform data.\n", nasid, widget); + goto err_put_pdev_wd; + } + if (platform_device_add(pdev_wd)) { + pr_warn("xtalk:n%d/%x bridge failed to add platform device.\n", nasid, widget); + goto err_put_pdev_wd; } - platform_device_add_resources(pdev, &w1_res, 1); - platform_device_add_data(pdev, wd, sizeof(*wd)); /* platform_device_add_data() duplicates the data */ kfree(wd); - platform_device_add(pdev); bd = kzalloc(sizeof(*bd), GFP_KERNEL); - if (!bd) - goto no_mem; - pdev = platform_device_alloc("xtalk-bridge", PLATFORM_DEVID_AUTO); - if (!pdev) { - kfree(bd); - goto no_mem; + if (!bd) { + pr_warn("xtalk:n%d/%x bridge create out of memory\n", nasid, widget); + goto err_unregister_pdev_wd; + } + pdev_bd = platform_device_alloc("xtalk-bridge", PLATFORM_DEVID_AUTO); + if (!pdev_bd) { + pr_warn("xtalk:n%d/%x bridge create out of memory\n", nasid, widget); + goto err_kfree_bd; } @@ -84,15 +98,31 @@ static void bridge_platform_create(nasid_t nasid, int widget, int masterwid) bd->io.flags = IORESOURCE_IO; bd->io_offset = offset; - platform_device_add_data(pdev, bd, sizeof(*bd)); + if (platform_device_add_data(pdev_bd, bd, sizeof(*bd))) { + pr_warn("xtalk:n%d/%x bridge failed to add platform data.\n", nasid, widget); + goto err_put_pdev_bd; + } + if (platform_device_add(pdev_bd)) { + pr_warn("xtalk:n%d/%x bridge failed to add platform device.\n", nasid, widget); + goto err_put_pdev_bd; + } /* platform_device_add_data() duplicates the data */ kfree(bd); - platform_device_add(pdev); pr_info("xtalk:n%d/%x bridge widget\n", nasid, widget); return; -no_mem: - pr_warn("xtalk:n%d/%x bridge create out of memory\n", nasid, widget); +err_put_pdev_bd: + platform_device_put(pdev_bd); +err_kfree_bd: + kfree(bd); +err_unregister_pdev_wd: + platform_device_unregister(pdev_wd); + return; +err_put_pdev_wd: + platform_device_put(pdev_wd); +err_kfree_wd: + kfree(wd); + return; } static int probe_one_port(nasid_t nasid, int widget, int masterwid) From f1d6edeaa8d06e1c4800473a23be356c89fbd7b6 Mon Sep 17 00:00:00 2001 From: Wang Kefeng Date: Tue, 13 Sep 2022 05:25:51 +0100 Subject: [PATCH 0095/1477] ARM: 9244/1: dump: Fix wrong pg_level in walk_pmd() [ Upstream commit 2ccd19b3ffac07cc7e75a2bd1ed779728bb67197 ] After ARM supports p4d page tables, the pg_level for note_page() in walk_pmd() should be 4, not 3, fix it. Fixes: 84e6ffb2c49c ("arm: add support for folded p4d page tables") Signed-off-by: Kefeng Wang Signed-off-by: Russell King (Oracle) Signed-off-by: Sasha Levin --- arch/arm/mm/dump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mm/dump.c b/arch/arm/mm/dump.c index c18d23a5e5f1..9b9023a92d46 100644 --- a/arch/arm/mm/dump.c +++ b/arch/arm/mm/dump.c @@ -342,7 +342,7 @@ static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) addr = start + i * PMD_SIZE; domain = get_domain_name(pmd); if (pmd_none(*pmd) || pmd_large(*pmd) || !pmd_present(*pmd)) - note_page(st, addr, 3, pmd_val(*pmd), domain); + note_page(st, addr, 4, pmd_val(*pmd), domain); else walk_pte(st, pmd, addr, domain); From 84837738d406149bb000037b51e5cbc329083c1f Mon Sep 17 00:00:00 2001 From: Wang Kefeng Date: Fri, 16 Sep 2022 12:10:49 +0100 Subject: [PATCH 0096/1477] ARM: 9247/1: mm: set readonly for MT_MEMORY_RO with ARM_LPAE [ Upstream commit 14ca1a4690750bb54e1049e49f3140ef48958a6e ] MT_MEMORY_RO is introduced by commit 598f0a99fa8a ("ARM: 9210/1: Mark the FDT_FIXED sections as shareable"), which is a readonly memory type for FDT area, but there are some different between ARM_LPAE and non-ARM_LPAE, we need to setup PMD_SECT_AP2 and L_PMD_SECT_RDONLY for MT_MEMORY_RO when ARM_LAPE enabled. non-ARM_LPAE 0xff800000-0xffa00000 2M PGD KERNEL ro NX SHD ARM_LPAE 0xff800000-0xffc00000 4M PMD RW NX SHD ARM_LPAE+fix 0xff800000-0xffc00000 4M PMD ro NX SHD Fixes: 598f0a99fa8a ("ARM: 9210/1: Mark the FDT_FIXED sections as shareable") Signed-off-by: Kefeng Wang Signed-off-by: Russell King (Oracle) Signed-off-by: Sasha Levin --- arch/arm/mm/mmu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 86f213f1b44b..0d0c3bf23914 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -300,7 +300,11 @@ static struct mem_type mem_types[] __ro_after_init = { .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | L_PTE_XN | L_PTE_RDONLY, .prot_l1 = PMD_TYPE_TABLE, +#ifdef CONFIG_ARM_LPAE + .prot_sect = PMD_TYPE_SECT | L_PMD_SECT_RDONLY | PMD_SECT_AP2, +#else .prot_sect = PMD_TYPE_SECT, +#endif .domain = DOMAIN_KERNEL, }, [MT_ROM] = { From 730191a098d81b89b38149da0a3a1d01149d5893 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Thu, 8 Sep 2022 14:54:58 -0700 Subject: [PATCH 0097/1477] objtool: Preserve special st_shndx indexes in elf_update_symbol [ Upstream commit 5141d3a06b2da1731ac82091298b766a1f95d3d8 ] elf_update_symbol fails to preserve the special st_shndx values between [SHN_LORESERVE, SHN_HIRESERVE], which results in it converting SHN_ABS entries into SHN_UNDEF, for example. Explicitly check for the special indexes and ensure these symbols are not marked undefined. Fixes: ead165fa1042 ("objtool: Fix symbol creation") Signed-off-by: Sami Tolvanen Acked-by: Peter Zijlstra (Intel) Tested-by: Peter Zijlstra (Intel) Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220908215504.3686827-17-samitolvanen@google.com Signed-off-by: Sasha Levin --- tools/objtool/elf.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 5aa3b4e76479..a2ea3931e01d 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -578,6 +578,11 @@ static int elf_update_symbol(struct elf *elf, struct section *symtab, Elf64_Xword entsize = symtab->sh.sh_entsize; int max_idx, idx = sym->idx; Elf_Scn *s, *t = NULL; + bool is_special_shndx = sym->sym.st_shndx >= SHN_LORESERVE && + sym->sym.st_shndx != SHN_XINDEX; + + if (is_special_shndx) + shndx = sym->sym.st_shndx; s = elf_getscn(elf->elf, symtab->idx); if (!s) { @@ -663,7 +668,7 @@ static int elf_update_symbol(struct elf *elf, struct section *symtab, } /* setup extended section index magic and write the symbol */ - if (shndx >= SHN_UNDEF && shndx < SHN_LORESERVE) { + if ((shndx >= SHN_UNDEF && shndx < SHN_LORESERVE) || is_special_shndx) { sym->sym.st_shndx = shndx; if (!shndx_data) shndx = 0; From e060c4b9f33c1fca74df26d57a98e784295327e6 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 1 Sep 2022 07:27:04 +0200 Subject: [PATCH 0098/1477] nfsd: Fix a memory leak in an error handling path [ Upstream commit fd1ef88049de09bc70d60b549992524cfc0e66ff ] If this memdup_user() call fails, the memory allocated in a previous call a few lines above should be freed. Otherwise it leaks. Fixes: 6ee95d1c8991 ("nfsd: add support for upcall version 2") Signed-off-by: Christophe JAILLET Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- fs/nfsd/nfs4recover.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index f9b730c43192..83c4e6883953 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -815,8 +815,10 @@ __cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg, princhash.data = memdup_user( &ci->cc_princhash.cp_data, princhashlen); - if (IS_ERR_OR_NULL(princhash.data)) + if (IS_ERR_OR_NULL(princhash.data)) { + kfree(name.data); return -EFAULT; + } princhash.len = princhashlen; } else princhash.len = 0; From 08faf07717be0c88b02b5aa45aad2225dfcdd2dc Mon Sep 17 00:00:00 2001 From: Wen Gong Date: Mon, 1 Aug 2022 10:19:30 -0400 Subject: [PATCH 0099/1477] wifi: ath10k: add peer map clean up for peer delete in ath10k_sta_state() [ Upstream commit f020d9570a04df0762a2ac5c50cf1d8c511c9164 ] When peer delete failed in a disconnect operation, use-after-free detected by KFENCE in below log. It is because for each vdev_id and address, it has only one struct ath10k_peer, it is allocated in ath10k_peer_map_event(). When connected to an AP, it has more than one HTT_T2H_MSG_TYPE_PEER_MAP reported from firmware, then the array peer_map of struct ath10k will be set muti-elements to the same ath10k_peer in ath10k_peer_map_event(). When peer delete failed in ath10k_sta_state(), the ath10k_peer will be free for the 1st peer id in array peer_map of struct ath10k, and then use-after-free happened for the 2nd peer id because they map to the same ath10k_peer. And clean up all peers in array peer_map for the ath10k_peer, then user-after-free disappeared peer map event log: [ 306.911021] wlan0: authenticate with b0:2a:43:e6:75:0e [ 306.957187] ath10k_pci 0000:01:00.0: mac vdev 0 peer create b0:2a:43:e6:75:0e (new sta) sta 1 / 32 peer 1 / 33 [ 306.957395] ath10k_pci 0000:01:00.0: htt peer map vdev 0 peer b0:2a:43:e6:75:0e id 246 [ 306.957404] ath10k_pci 0000:01:00.0: htt peer map vdev 0 peer b0:2a:43:e6:75:0e id 198 [ 306.986924] ath10k_pci 0000:01:00.0: htt peer map vdev 0 peer b0:2a:43:e6:75:0e id 166 peer unmap event log: [ 435.715691] wlan0: deauthenticating from b0:2a:43:e6:75:0e by local choice (Reason: 3=DEAUTH_LEAVING) [ 435.716802] ath10k_pci 0000:01:00.0: mac vdev 0 peer delete b0:2a:43:e6:75:0e sta ffff990e0e9c2b50 (sta gone) [ 435.717177] ath10k_pci 0000:01:00.0: htt peer unmap vdev 0 peer b0:2a:43:e6:75:0e id 246 [ 435.717186] ath10k_pci 0000:01:00.0: htt peer unmap vdev 0 peer b0:2a:43:e6:75:0e id 198 [ 435.717193] ath10k_pci 0000:01:00.0: htt peer unmap vdev 0 peer b0:2a:43:e6:75:0e id 166 use-after-free log: [21705.888627] wlan0: deauthenticating from d0:76:8f:82:be:75 by local choice (Reason: 3=DEAUTH_LEAVING) [21713.799910] ath10k_pci 0000:01:00.0: failed to delete peer d0:76:8f:82:be:75 for vdev 0: -110 [21713.799925] ath10k_pci 0000:01:00.0: found sta peer d0:76:8f:82:be:75 (ptr 0000000000000000 id 102) entry on vdev 0 after it was supposedly removed [21713.799968] ================================================================== [21713.799991] BUG: KFENCE: use-after-free read in ath10k_sta_state+0x265/0xb8a [ath10k_core] [21713.799991] [21713.799997] Use-after-free read at 0x00000000abe1c75e (in kfence-#69): [21713.800010] ath10k_sta_state+0x265/0xb8a [ath10k_core] [21713.800041] drv_sta_state+0x115/0x677 [mac80211] [21713.800059] __sta_info_destroy_part2+0xb1/0x133 [mac80211] [21713.800076] __sta_info_flush+0x11d/0x162 [mac80211] [21713.800093] ieee80211_set_disassoc+0x12d/0x2f4 [mac80211] [21713.800110] ieee80211_mgd_deauth+0x26c/0x29b [mac80211] [21713.800137] cfg80211_mlme_deauth+0x13f/0x1bb [cfg80211] [21713.800153] nl80211_deauthenticate+0xf8/0x121 [cfg80211] [21713.800161] genl_rcv_msg+0x38e/0x3be [21713.800166] netlink_rcv_skb+0x89/0xf7 [21713.800171] genl_rcv+0x28/0x36 [21713.800176] netlink_unicast+0x179/0x24b [21713.800181] netlink_sendmsg+0x3a0/0x40e [21713.800187] sock_sendmsg+0x72/0x76 [21713.800192] ____sys_sendmsg+0x16d/0x1e3 [21713.800196] ___sys_sendmsg+0x95/0xd1 [21713.800200] __sys_sendmsg+0x85/0xbf [21713.800205] do_syscall_64+0x43/0x55 [21713.800210] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [21713.800213] [21713.800219] kfence-#69: 0x000000009149b0d5-0x000000004c0697fb, size=1064, cache=kmalloc-2k [21713.800219] [21713.800224] allocated by task 13 on cpu 0 at 21705.501373s: [21713.800241] ath10k_peer_map_event+0x7e/0x154 [ath10k_core] [21713.800254] ath10k_htt_t2h_msg_handler+0x586/0x1039 [ath10k_core] [21713.800265] ath10k_htt_htc_t2h_msg_handler+0x12/0x28 [ath10k_core] [21713.800277] ath10k_htc_rx_completion_handler+0x14c/0x1b5 [ath10k_core] [21713.800283] ath10k_pci_process_rx_cb+0x195/0x1df [ath10k_pci] [21713.800294] ath10k_ce_per_engine_service+0x55/0x74 [ath10k_core] [21713.800305] ath10k_ce_per_engine_service_any+0x76/0x84 [ath10k_core] [21713.800310] ath10k_pci_napi_poll+0x49/0x144 [ath10k_pci] [21713.800316] net_rx_action+0xdc/0x361 [21713.800320] __do_softirq+0x163/0x29a [21713.800325] asm_call_irq_on_stack+0x12/0x20 [21713.800331] do_softirq_own_stack+0x3c/0x48 [21713.800337] __irq_exit_rcu+0x9b/0x9d [21713.800342] common_interrupt+0xc9/0x14d [21713.800346] asm_common_interrupt+0x1e/0x40 [21713.800351] ksoftirqd_should_run+0x5/0x16 [21713.800357] smpboot_thread_fn+0x148/0x211 [21713.800362] kthread+0x150/0x15f [21713.800367] ret_from_fork+0x22/0x30 [21713.800370] [21713.800374] freed by task 708 on cpu 1 at 21713.799953s: [21713.800498] ath10k_sta_state+0x2c6/0xb8a [ath10k_core] [21713.800515] drv_sta_state+0x115/0x677 [mac80211] [21713.800532] __sta_info_destroy_part2+0xb1/0x133 [mac80211] [21713.800548] __sta_info_flush+0x11d/0x162 [mac80211] [21713.800565] ieee80211_set_disassoc+0x12d/0x2f4 [mac80211] [21713.800581] ieee80211_mgd_deauth+0x26c/0x29b [mac80211] [21713.800598] cfg80211_mlme_deauth+0x13f/0x1bb [cfg80211] [21713.800614] nl80211_deauthenticate+0xf8/0x121 [cfg80211] [21713.800619] genl_rcv_msg+0x38e/0x3be [21713.800623] netlink_rcv_skb+0x89/0xf7 [21713.800628] genl_rcv+0x28/0x36 [21713.800632] netlink_unicast+0x179/0x24b [21713.800637] netlink_sendmsg+0x3a0/0x40e [21713.800642] sock_sendmsg+0x72/0x76 [21713.800646] ____sys_sendmsg+0x16d/0x1e3 [21713.800651] ___sys_sendmsg+0x95/0xd1 [21713.800655] __sys_sendmsg+0x85/0xbf [21713.800659] do_syscall_64+0x43/0x55 [21713.800663] entry_SYSCALL_64_after_hwframe+0x44/0xa9 Tested-on: QCA6174 hw3.2 PCI WLAN.RM.4.4.1-00288-QCARMSWPZ-1 Fixes: d0eeafad1189 ("ath10k: Clean up peer when sta goes away.") Signed-off-by: Wen Gong Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20220801141930.16794-1-quic_wgong@quicinc.com Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath10k/mac.c | 54 ++++++++++++++------------- 1 file changed, 29 insertions(+), 25 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index b61cd275fbda..15f02bf23e9b 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -853,11 +853,36 @@ static int ath10k_peer_delete(struct ath10k *ar, u32 vdev_id, const u8 *addr) return 0; } +static void ath10k_peer_map_cleanup(struct ath10k *ar, struct ath10k_peer *peer) +{ + int peer_id, i; + + lockdep_assert_held(&ar->conf_mutex); + + for_each_set_bit(peer_id, peer->peer_ids, + ATH10K_MAX_NUM_PEER_IDS) { + ar->peer_map[peer_id] = NULL; + } + + /* Double check that peer is properly un-referenced from + * the peer_map + */ + for (i = 0; i < ARRAY_SIZE(ar->peer_map); i++) { + if (ar->peer_map[i] == peer) { + ath10k_warn(ar, "removing stale peer_map entry for %pM (ptr %pK idx %d)\n", + peer->addr, peer, i); + ar->peer_map[i] = NULL; + } + } + + list_del(&peer->list); + kfree(peer); + ar->num_peers--; +} + static void ath10k_peer_cleanup(struct ath10k *ar, u32 vdev_id) { struct ath10k_peer *peer, *tmp; - int peer_id; - int i; lockdep_assert_held(&ar->conf_mutex); @@ -869,25 +894,7 @@ static void ath10k_peer_cleanup(struct ath10k *ar, u32 vdev_id) ath10k_warn(ar, "removing stale peer %pM from vdev_id %d\n", peer->addr, vdev_id); - for_each_set_bit(peer_id, peer->peer_ids, - ATH10K_MAX_NUM_PEER_IDS) { - ar->peer_map[peer_id] = NULL; - } - - /* Double check that peer is properly un-referenced from - * the peer_map - */ - for (i = 0; i < ARRAY_SIZE(ar->peer_map); i++) { - if (ar->peer_map[i] == peer) { - ath10k_warn(ar, "removing stale peer_map entry for %pM (ptr %pK idx %d)\n", - peer->addr, peer, i); - ar->peer_map[i] = NULL; - } - } - - list_del(&peer->list); - kfree(peer); - ar->num_peers--; + ath10k_peer_map_cleanup(ar, peer); } spin_unlock_bh(&ar->data_lock); } @@ -7470,10 +7477,7 @@ static int ath10k_sta_state(struct ieee80211_hw *hw, /* Clean up the peer object as well since we * must have failed to do this above. */ - list_del(&peer->list); - ar->peer_map[i] = NULL; - kfree(peer); - ar->num_peers--; + ath10k_peer_map_cleanup(ar, peer); } } spin_unlock_bh(&ar->data_lock); From 75652070667ff6421f2f0eab05834c2e17b2055c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 15 Aug 2022 10:02:27 +0200 Subject: [PATCH 0100/1477] leds: lm3601x: Don't use mutex after it was destroyed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 32f7eed0c763a9b89f6b357ec54b48398fc7b99e ] The mutex might still be in use until the devm cleanup callback devm_led_classdev_flash_release() is called. This only happens some time after lm3601x_remove() completed. Fixes: e63a744871a3 ("leds: lm3601x: Convert class registration to device managed") Acked-by: Pavel Machek Signed-off-by: Uwe Kleine-König Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/leds/leds-lm3601x.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/leds/leds-lm3601x.c b/drivers/leds/leds-lm3601x.c index d0e1d4814042..3d1272748201 100644 --- a/drivers/leds/leds-lm3601x.c +++ b/drivers/leds/leds-lm3601x.c @@ -444,8 +444,6 @@ static int lm3601x_remove(struct i2c_client *client) { struct lm3601x_led *led = i2c_get_clientdata(client); - mutex_destroy(&led->lock); - return regmap_update_bits(led->regmap, LM3601X_ENABLE_REG, LM3601X_ENABLE_MASK, LM3601X_MODE_STANDBY); From 61905bbb6116686d04e384265eef9e4ecd4f78a2 Mon Sep 17 00:00:00 2001 From: Hari Chandrakanthan Date: Wed, 27 Jul 2022 12:02:29 +0530 Subject: [PATCH 0101/1477] wifi: mac80211: allow bw change during channel switch in mesh [ Upstream commit 6b75f133fe05c36c52d691ff21545d5757fff721 ] From 'IEEE Std 802.11-2020 section 11.8.8.4.1': The mesh channel switch may be triggered by the need to avoid interference to a detected radar signal, or to reassign mesh STA channels to ensure the MBSS connectivity. A 20/40 MHz MBSS may be changed to a 20 MHz MBSS and a 20 MHz MBSS may be changed to a 20/40 MHz MBSS. Since the standard allows the change of bandwidth during the channel switch in mesh, remove the bandwidth check present in ieee80211_set_csa_beacon. Fixes: c6da674aff94 ("{nl,cfg,mac}80211: enable the triggering of CSA frame in mesh") Signed-off-by: Hari Chandrakanthan Link: https://lore.kernel.org/r/1658903549-21218-1-git-send-email-quic_haric@quicinc.com Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/cfg.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 8010967a6874..c6a7f1c99abc 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3357,9 +3357,6 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata, case NL80211_IFTYPE_MESH_POINT: { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - if (params->chandef.width != sdata->vif.bss_conf.chandef.width) - return -EINVAL; - /* changes into another band are not supported */ if (sdata->vif.bss_conf.chandef.chan->band != params->chandef.chan->band) From 2afb93e4e4166a9b4022db58689986893a4c3653 Mon Sep 17 00:00:00 2001 From: Lam Thai Date: Wed, 24 Aug 2022 15:59:00 -0700 Subject: [PATCH 0102/1477] bpftool: Fix a wrong type cast in btf_dumper_int [ Upstream commit 7184aef9c0f7a81db8fd18d183ee42481d89bf35 ] When `data` points to a boolean value, casting it to `int *` is problematic and could lead to a wrong value being passed to `jsonw_bool`. Change the cast to `bool *` instead. Fixes: b12d6ec09730 ("bpf: btf: add btf print functionality") Signed-off-by: Lam Thai Signed-off-by: Andrii Nakryiko Reviewed-by: Quentin Monnet Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20220824225859.9038-1-lamthai@arista.com Signed-off-by: Sasha Levin --- tools/bpf/bpftool/btf_dumper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c index 0e9310727281..13be48763199 100644 --- a/tools/bpf/bpftool/btf_dumper.c +++ b/tools/bpf/bpftool/btf_dumper.c @@ -416,7 +416,7 @@ static int btf_dumper_int(const struct btf_type *t, __u8 bit_offset, *(char *)data); break; case BTF_INT_BOOL: - jsonw_bool(jw, *(int *)data); + jsonw_bool(jw, *(bool *)data); break; default: /* shouldn't happen */ From 029a1de92ce225fe5ee0cd456352167e66da5a91 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 27 Aug 2022 13:42:07 +0200 Subject: [PATCH 0103/1477] spi: mt7621: Fix an error message in mt7621_spi_probe() [ Upstream commit 2b2bf6b7faa9010fae10dc7de76627a3fdb525b3 ] 'status' is known to be 0 at this point. The expected error code is PTR_ERR(clk). Switch to dev_err_probe() in order to display the expected error code (in a human readable way). This also filters -EPROBE_DEFER cases, should it happen. Fixes: 1ab7f2a43558 ("staging: mt7621-spi: add mt7621 support") Signed-off-by: Christophe JAILLET Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/928f3fb507d53ba0774df27cea0bbba4b055993b.1661599671.git.christophe.jaillet@wanadoo.fr Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-mt7621.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/spi/spi-mt7621.c b/drivers/spi/spi-mt7621.c index b4b9b7309b5e..351b0ef52bbc 100644 --- a/drivers/spi/spi-mt7621.c +++ b/drivers/spi/spi-mt7621.c @@ -340,11 +340,9 @@ static int mt7621_spi_probe(struct platform_device *pdev) return PTR_ERR(base); clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(clk)) { - dev_err(&pdev->dev, "unable to get SYS clock, err=%d\n", - status); - return PTR_ERR(clk); - } + if (IS_ERR(clk)) + return dev_err_probe(&pdev->dev, PTR_ERR(clk), + "unable to get SYS clock\n"); status = clk_prepare_enable(clk); if (status) From 294395caacf19c7017b3ac5404fd2791ab5f73b6 Mon Sep 17 00:00:00 2001 From: Kohei Tarumizu Date: Wed, 24 Aug 2022 09:44:10 -0700 Subject: [PATCH 0104/1477] x86/resctrl: Fix to restore to original value when re-enabling hardware prefetch register [ Upstream commit 499c8bb4693d1c8d8f3d6dd38e5bdde3ff5bd906 ] The current pseudo_lock.c code overwrites the value of the MSR_MISC_FEATURE_CONTROL to 0 even if the original value is not 0. Therefore, modify it to save and restore the original values. Fixes: 018961ae5579 ("x86/intel_rdt: Pseudo-lock region creation/removal core") Fixes: 443810fe6160 ("x86/intel_rdt: Create debugfs files for pseudo-locking testing") Fixes: 8a2fc0e1bc0c ("x86/intel_rdt: More precise L2 hit/miss measurements") Signed-off-by: Kohei Tarumizu Signed-off-by: Dave Hansen Acked-by: Reinette Chatre Link: https://lkml.kernel.org/r/eb660f3c2010b79a792c573c02d01e8e841206ad.1661358182.git.reinette.chatre@intel.com Signed-off-by: Sasha Levin --- arch/x86/kernel/cpu/resctrl/pseudo_lock.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c index 0daf2f1cf7a8..465dce141bfc 100644 --- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c +++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c @@ -416,6 +416,7 @@ static int pseudo_lock_fn(void *_rdtgrp) struct pseudo_lock_region *plr = rdtgrp->plr; u32 rmid_p, closid_p; unsigned long i; + u64 saved_msr; #ifdef CONFIG_KASAN /* * The registers used for local register variables are also used @@ -459,6 +460,7 @@ static int pseudo_lock_fn(void *_rdtgrp) * the buffer and evict pseudo-locked memory read earlier from the * cache. */ + saved_msr = __rdmsr(MSR_MISC_FEATURE_CONTROL); __wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0); closid_p = this_cpu_read(pqr_state.cur_closid); rmid_p = this_cpu_read(pqr_state.cur_rmid); @@ -510,7 +512,7 @@ static int pseudo_lock_fn(void *_rdtgrp) __wrmsr(IA32_PQR_ASSOC, rmid_p, closid_p); /* Re-enable the hardware prefetcher(s) */ - wrmsr(MSR_MISC_FEATURE_CONTROL, 0x0, 0x0); + wrmsrl(MSR_MISC_FEATURE_CONTROL, saved_msr); local_irq_enable(); plr->thread_done = 1; @@ -867,6 +869,7 @@ bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d) static int measure_cycles_lat_fn(void *_plr) { struct pseudo_lock_region *plr = _plr; + u32 saved_low, saved_high; unsigned long i; u64 start, end; void *mem_r; @@ -875,6 +878,7 @@ static int measure_cycles_lat_fn(void *_plr) /* * Disable hardware prefetchers. */ + rdmsr(MSR_MISC_FEATURE_CONTROL, saved_low, saved_high); wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0); mem_r = READ_ONCE(plr->kmem); /* @@ -891,7 +895,7 @@ static int measure_cycles_lat_fn(void *_plr) end = rdtsc_ordered(); trace_pseudo_lock_mem_latency((u32)(end - start)); } - wrmsr(MSR_MISC_FEATURE_CONTROL, 0x0, 0x0); + wrmsr(MSR_MISC_FEATURE_CONTROL, saved_low, saved_high); local_irq_enable(); plr->thread_done = 1; wake_up_interruptible(&plr->lock_thread_wq); @@ -936,6 +940,7 @@ static int measure_residency_fn(struct perf_event_attr *miss_attr, u64 hits_before = 0, hits_after = 0, miss_before = 0, miss_after = 0; struct perf_event *miss_event, *hit_event; int hit_pmcnum, miss_pmcnum; + u32 saved_low, saved_high; unsigned int line_size; unsigned int size; unsigned long i; @@ -969,6 +974,7 @@ static int measure_residency_fn(struct perf_event_attr *miss_attr, /* * Disable hardware prefetchers. */ + rdmsr(MSR_MISC_FEATURE_CONTROL, saved_low, saved_high); wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0); /* Initialize rest of local variables */ @@ -1027,7 +1033,7 @@ static int measure_residency_fn(struct perf_event_attr *miss_attr, */ rmb(); /* Re-enable hardware prefetchers */ - wrmsr(MSR_MISC_FEATURE_CONTROL, 0x0, 0x0); + wrmsr(MSR_MISC_FEATURE_CONTROL, saved_low, saved_high); local_irq_enable(); out_hit: perf_event_release_kernel(hit_event); From cdadf95435ff464a97035b63bc220b9940df6b7e Mon Sep 17 00:00:00 2001 From: Mark Chen Date: Tue, 2 Feb 2021 18:26:17 +0800 Subject: [PATCH 0105/1477] Bluetooth: btusb: Fine-tune mt7663 mechanism. [ Upstream commit 48c13301e6baba5fd0960b412af519c0baa98011 ] Fine-tune read register for mt7663/mt7921. For mediatek chip spcific wmt protocol, we add more delay to send EP0 In-Token. Signed-off-by: Mark Chen Signed-off-by: Marcel Holtmann Stable-dep-of: fd3f106677ba ("Bluetooth: btusb: mediatek: fix WMT failure during runtime suspend") Signed-off-by: Sasha Levin --- drivers/bluetooth/btusb.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index a699e6166aef..eb6e33d168d8 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -2816,6 +2816,7 @@ enum { enum { BTMTK_WMT_INVALID, BTMTK_WMT_PATCH_UNDONE, + BTMTK_WMT_PATCH_PROGRESS, BTMTK_WMT_PATCH_DONE, BTMTK_WMT_ON_UNDONE, BTMTK_WMT_ON_DONE, @@ -2831,7 +2832,7 @@ struct btmtk_wmt_hdr { struct btmtk_hci_wmt_cmd { struct btmtk_wmt_hdr hdr; - u8 data[256]; + u8 data[1000]; } __packed; struct btmtk_hci_wmt_evt { @@ -2934,7 +2935,7 @@ static void btusb_mtk_wmt_recv(struct urb *urb) * to generate the event. Otherwise, the WMT event cannot return from * the device successfully. */ - udelay(100); + udelay(500); usb_anchor_urb(urb, &data->ctrl_anchor); err = usb_submit_urb(urb, GFP_ATOMIC); @@ -3238,9 +3239,9 @@ err_free_buf: return err; } -static int btusb_mtk_id_get(struct btusb_data *data, u32 *id) +static int btusb_mtk_id_get(struct btusb_data *data, u32 reg, u32 *id) { - return btusb_mtk_reg_read(data, 0x80000008, id); + return btusb_mtk_reg_read(data, reg, id); } static int btusb_mtk_setup(struct hci_dev *hdev) @@ -3258,7 +3259,7 @@ static int btusb_mtk_setup(struct hci_dev *hdev) calltime = ktime_get(); - err = btusb_mtk_id_get(data, &dev_id); + err = btusb_mtk_id_get(data, 0x80000008, &dev_id); if (err < 0) { bt_dev_err(hdev, "Failed to get device id (%d)", err); return err; From 07194ccbb14c97e8bf3d52a417fabfe1bf32b6db Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 4 Feb 2021 16:47:07 +0100 Subject: [PATCH 0106/1477] Bluetooth: btusb: fix excessive stack usage [ Upstream commit 10888140f09c3472146dc206accd0cfa051d0ed4 ] Enlarging the size of 'struct btmtk_hci_wmt_cmd' makes it no longer fit on the kernel stack, as seen from this compiler warning: drivers/bluetooth/btusb.c:3365:12: error: stack frame size of 1036 bytes in function 'btusb_mtk_hci_wmt_sync' [-Werror,-Wframe-larger-than=] Change the function to dynamically allocate the buffer instead. As there are other sleeping functions called from the same location, using GFP_KERNEL should be fine here, and the runtime overhead should not matter as this is rarely called. Unfortunately, I could not figure out why the message size is increased in the previous patch. Using dynamic allocation means any size is possible now, but there is still a range check that limits the total size (including the five-byte header) to 255 bytes, so whatever was intended there is now undone. Fixes: 48c13301e6ba ("Bluetooth: btusb: Fine-tune mt7663 mechanism.") Signed-off-by: Arnd Bergmann Signed-off-by: Marcel Holtmann Stable-dep-of: fd3f106677ba ("Bluetooth: btusb: mediatek: fix WMT failure during runtime suspend") Signed-off-by: Sasha Levin --- drivers/bluetooth/btusb.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index eb6e33d168d8..80a3d5019950 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -2832,7 +2832,7 @@ struct btmtk_wmt_hdr { struct btmtk_hci_wmt_cmd { struct btmtk_wmt_hdr hdr; - u8 data[1000]; + u8 data[]; } __packed; struct btmtk_hci_wmt_evt { @@ -3011,7 +3011,7 @@ static int btusb_mtk_hci_wmt_sync(struct hci_dev *hdev, struct btmtk_hci_wmt_evt_funcc *wmt_evt_funcc; u32 hlen, status = BTMTK_WMT_INVALID; struct btmtk_hci_wmt_evt *wmt_evt; - struct btmtk_hci_wmt_cmd wc; + struct btmtk_hci_wmt_cmd *wc; struct btmtk_wmt_hdr *hdr; int err; @@ -3020,20 +3020,24 @@ static int btusb_mtk_hci_wmt_sync(struct hci_dev *hdev, if (hlen > 255) return -EINVAL; - hdr = (struct btmtk_wmt_hdr *)&wc; + wc = kzalloc(hlen, GFP_KERNEL); + if (!wc) + return -ENOMEM; + + hdr = &wc->hdr; hdr->dir = 1; hdr->op = wmt_params->op; hdr->dlen = cpu_to_le16(wmt_params->dlen + 1); hdr->flag = wmt_params->flag; - memcpy(wc.data, wmt_params->data, wmt_params->dlen); + memcpy(wc->data, wmt_params->data, wmt_params->dlen); set_bit(BTUSB_TX_WAIT_VND_EVT, &data->flags); - err = __hci_cmd_send(hdev, 0xfc6f, hlen, &wc); + err = __hci_cmd_send(hdev, 0xfc6f, hlen, wc); if (err < 0) { clear_bit(BTUSB_TX_WAIT_VND_EVT, &data->flags); - return err; + goto err_free_wc; } /* Submit control IN URB on demand to process the WMT event */ @@ -3055,13 +3059,14 @@ static int btusb_mtk_hci_wmt_sync(struct hci_dev *hdev, if (err == -EINTR) { bt_dev_err(hdev, "Execution of wmt command interrupted"); clear_bit(BTUSB_TX_WAIT_VND_EVT, &data->flags); - return err; + goto err_free_wc; } if (err) { bt_dev_err(hdev, "Execution of wmt command timed out"); clear_bit(BTUSB_TX_WAIT_VND_EVT, &data->flags); - return -ETIMEDOUT; + err = -ETIMEDOUT; + goto err_free_wc; } /* Parse and handle the return WMT event */ @@ -3097,7 +3102,8 @@ static int btusb_mtk_hci_wmt_sync(struct hci_dev *hdev, err_free_skb: kfree_skb(data->evt_skb); data->evt_skb = NULL; - +err_free_wc: + kfree(wc); return err; } From ea1b6b54098ce4e6203d806c1f5a526db6e42f68 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Thu, 11 Aug 2022 08:49:07 +0800 Subject: [PATCH 0107/1477] Bluetooth: btusb: mediatek: fix WMT failure during runtime suspend [ Upstream commit fd3f106677bac70437dc12e76c827294ed495a44 ] WMT cmd/event doesn't follow up the generic HCI cmd/event handling, it needs constantly polling control pipe until the host received the WMT event, thus, we should require to specifically acquire PM counter on the USB to prevent the interface from entering auto suspended while WMT cmd/event in progress. Fixes: a1c49c434e15 ("Bluetooth: btusb: Add protocol support for MediaTek MT7668U USB devices") Co-developed-by: Jing Cai Signed-off-by: Jing Cai Signed-off-by: Sean Wang Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- drivers/bluetooth/btusb.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 80a3d5019950..6efd981979bd 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -3033,15 +3033,29 @@ static int btusb_mtk_hci_wmt_sync(struct hci_dev *hdev, set_bit(BTUSB_TX_WAIT_VND_EVT, &data->flags); + /* WMT cmd/event doesn't follow up the generic HCI cmd/event handling, + * it needs constantly polling control pipe until the host received the + * WMT event, thus, we should require to specifically acquire PM counter + * on the USB to prevent the interface from entering auto suspended + * while WMT cmd/event in progress. + */ + err = usb_autopm_get_interface(data->intf); + if (err < 0) + goto err_free_wc; + err = __hci_cmd_send(hdev, 0xfc6f, hlen, wc); if (err < 0) { clear_bit(BTUSB_TX_WAIT_VND_EVT, &data->flags); + usb_autopm_put_interface(data->intf); goto err_free_wc; } /* Submit control IN URB on demand to process the WMT event */ err = btusb_mtk_submit_wmt_recv_urb(hdev); + + usb_autopm_put_interface(data->intf); + if (err < 0) return err; From c823df067941898e3ee77bf632e0c9d110d7e5b2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 19 Aug 2022 08:22:32 +0300 Subject: [PATCH 0108/1477] wifi: rtl8xxxu: tighten bounds checking in rtl8xxxu_read_efuse() [ Upstream commit 620d5eaeb9059636864bda83ca1c68c20ede34a5 ] There some bounds checking to ensure that "map_addr" is not out of bounds before the start of the loop. But the checking needs to be done as we iterate through the loop because "map_addr" gets larger as we iterate. Fixes: 26f1fad29ad9 ("New driver: rtl8xxxu (mac80211)") Signed-off-by: Dan Carpenter Acked-by: Jes Sorensen Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/Yv8eGLdBslLAk3Ct@kili Signed-off-by: Sasha Levin --- .../net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index 0d374a294840..9f646964055d 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -1874,13 +1874,6 @@ static int rtl8xxxu_read_efuse(struct rtl8xxxu_priv *priv) /* We have 8 bits to indicate validity */ map_addr = offset * 8; - if (map_addr >= EFUSE_MAP_LEN) { - dev_warn(dev, "%s: Illegal map_addr (%04x), " - "efuse corrupt!\n", - __func__, map_addr); - ret = -EINVAL; - goto exit; - } for (i = 0; i < EFUSE_MAX_WORD_UNIT; i++) { /* Check word enable condition in the section */ if (word_mask & BIT(i)) { @@ -1891,6 +1884,13 @@ static int rtl8xxxu_read_efuse(struct rtl8xxxu_priv *priv) ret = rtl8xxxu_read_efuse8(priv, efuse_addr++, &val8); if (ret) goto exit; + if (map_addr >= EFUSE_MAP_LEN - 1) { + dev_warn(dev, "%s: Illegal map_addr (%04x), " + "efuse corrupt!\n", + __func__, map_addr); + ret = -EINVAL; + goto exit; + } priv->efuse_wifi.raw[map_addr++] = val8; ret = rtl8xxxu_read_efuse8(priv, efuse_addr++, &val8); From 557600830515199bc4ec31ca53e17791e4100d91 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 1 Sep 2022 13:26:45 -0700 Subject: [PATCH 0109/1477] selftests/xsk: Avoid use-after-free on ctx [ Upstream commit af515a5587b8f45f19e11657746e0c89411b0380 ] The put lowers the reference count to 0 and frees ctx, reading it afterwards is invalid. Move the put after the uses and determine the last use by the reference count being 1. Fixes: 39e940d4abfa ("selftests/xsk: Destroy BPF resources only when ctx refcount drops to 0") Signed-off-by: Ian Rogers Signed-off-by: Daniel Borkmann Acked-by: Magnus Karlsson Link: https://lore.kernel.org/bpf/20220901202645.1463552-1-irogers@google.com Signed-off-by: Sasha Levin --- tools/lib/bpf/xsk.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index e8745f646371..fa1f8faf7dfe 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -930,13 +930,13 @@ void xsk_socket__delete(struct xsk_socket *xsk) ctx = xsk->ctx; umem = ctx->umem; - xsk_put_ctx(ctx, true); - - if (!ctx->refcount) { + if (ctx->refcount == 1) { xsk_delete_bpf_maps(xsk); close(ctx->prog_fd); } + xsk_put_ctx(ctx, true); + err = xsk_get_mmap_offsets(xsk->fd, &off); if (!err) { if (xsk->rx) { From 7b83d11d48ffe2bc451b75f5a83154d720097b9e Mon Sep 17 00:00:00 2001 From: Xu Qiang Date: Thu, 25 Aug 2022 06:53:23 +0000 Subject: [PATCH 0110/1477] spi: qup: add missing clk_disable_unprepare on error in spi_qup_resume() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 70034320fdc597b8f58b4a43bb547f17c4c5557a ] Add the missing clk_disable_unprepare() before return from spi_qup_resume() in the error handling case. Fixes: 64ff247a978f (“spi: Add Qualcomm QUP SPI controller support”) Signed-off-by: Xu Qiang Link: https://lore.kernel.org/r/20220825065324.68446-1-xuqiang36@huawei.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-qup.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/spi/spi-qup.c b/drivers/spi/spi-qup.c index d39dec6d1c91..668d79922fac 100644 --- a/drivers/spi/spi-qup.c +++ b/drivers/spi/spi-qup.c @@ -1246,14 +1246,25 @@ static int spi_qup_resume(struct device *device) return ret; ret = clk_prepare_enable(controller->cclk); - if (ret) + if (ret) { + clk_disable_unprepare(controller->iclk); return ret; + } ret = spi_qup_set_state(controller, QUP_STATE_RESET); if (ret) - return ret; + goto disable_clk; - return spi_master_resume(master); + ret = spi_master_resume(master); + if (ret) + goto disable_clk; + + return 0; + +disable_clk: + clk_disable_unprepare(controller->cclk); + clk_disable_unprepare(controller->iclk); + return ret; } #endif /* CONFIG_PM_SLEEP */ From 1e911790576fec000a185b64a2810376d03eec4d Mon Sep 17 00:00:00 2001 From: Xu Qiang Date: Thu, 25 Aug 2022 06:53:24 +0000 Subject: [PATCH 0111/1477] spi: qup: add missing clk_disable_unprepare on error in spi_qup_pm_resume_runtime() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 494a22765ce479c9f8ad181c5d24cffda9f534bb ] Add the missing clk_disable_unprepare() before return from spi_qup_pm_resume_runtime() in the error handling case. Fixes: dae1a7700b34 (“spi: qup: Handle clocks in pm_runtime suspend and resume”) Signed-off-by: Xu Qiang Link: https://lore.kernel.org/r/20220825065324.68446-2-xuqiang36@huawei.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-qup.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-qup.c b/drivers/spi/spi-qup.c index 668d79922fac..f3877eeb3da6 100644 --- a/drivers/spi/spi-qup.c +++ b/drivers/spi/spi-qup.c @@ -1199,8 +1199,10 @@ static int spi_qup_pm_resume_runtime(struct device *device) return ret; ret = clk_prepare_enable(controller->cclk); - if (ret) + if (ret) { + clk_disable_unprepare(controller->iclk); return ret; + } /* Disable clocks auto gaiting */ config = readl_relaxed(controller->base + QUP_CONFIG); From 351cf55595d3e6636e6d302657eef258dafef70c Mon Sep 17 00:00:00 2001 From: Bitterblue Smith Date: Wed, 31 Aug 2022 19:12:36 +0300 Subject: [PATCH 0112/1477] wifi: rtl8xxxu: Fix skb misuse in TX queue selection [ Upstream commit edd5747aa12ed61a5ecbfa58d3908623fddbf1e8 ] rtl8xxxu_queue_select() selects the wrong TX queues because it's reading memory from the wrong address. It expects to find ieee80211_hdr at skb->data, but that's not the case after skb_push(). Move the call to rtl8xxxu_queue_select() before the call to skb_push(). Fixes: 26f1fad29ad9 ("New driver: rtl8xxxu (mac80211)") Signed-off-by: Bitterblue Smith Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/7fa4819a-4f20-b2af-b7a6-8ee01ac49295@gmail.com Signed-off-by: Sasha Levin --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index 9f646964055d..e8b4544b5b15 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -4984,6 +4984,8 @@ static void rtl8xxxu_tx(struct ieee80211_hw *hw, if (control && control->sta) sta = control->sta; + queue = rtl8xxxu_queue_select(hw, skb); + tx_desc = skb_push(skb, tx_desc_size); memset(tx_desc, 0, tx_desc_size); @@ -4996,7 +4998,6 @@ static void rtl8xxxu_tx(struct ieee80211_hw *hw, is_broadcast_ether_addr(ieee80211_get_DA(hdr))) tx_desc->txdw0 |= TXDESC_BROADMULTICAST; - queue = rtl8xxxu_queue_select(hw, skb); tx_desc->txdw1 = cpu_to_le32(queue << TXDESC_QUEUE_SHIFT); if (tx_info->control.hw_key) { From 8398a45d3d72a7389b0a301b34f883e0042b9005 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Thu, 8 Sep 2022 14:18:03 +0200 Subject: [PATCH 0113/1477] spi: meson-spicc: do not rely on busy flag in pow2 clk ops [ Upstream commit 36acf80fc0c4b5ebe6fa010b524d442ee7f08fd3 ] Since [1], controller's busy flag isn't set anymore when the __spi_transfer_message_noqueue() is used instead of the __spi_pump_transfer_message() logic for spi_sync transfers. Since the pow2 clock ops were limited to only be available when a transfer is ongoing (between prepare_transfer_hardware and unprepare_transfer_hardware callbacks), the only way to track this down is to check for the controller cur_msg. [1] ae7d2346dc89 ("spi: Don't use the message queue if possible in spi_sync") Fixes: 09992025dacd ("spi: meson-spicc: add local pow2 clock ops to preserve rate between messages") Fixes: ae7d2346dc89 ("spi: Don't use the message queue if possible in spi_sync") Reported-by: Markus Schneider-Pargmann Signed-off-by: Neil Armstrong Tested-by: Markus Schneider-Pargmann Link: https://lore.kernel.org/r/20220908121803.919943-1-narmstrong@baylibre.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-meson-spicc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/spi/spi-meson-spicc.c b/drivers/spi/spi-meson-spicc.c index e4cb52e1fe26..6974a1c947aa 100644 --- a/drivers/spi/spi-meson-spicc.c +++ b/drivers/spi/spi-meson-spicc.c @@ -537,7 +537,7 @@ static unsigned long meson_spicc_pow2_recalc_rate(struct clk_hw *hw, struct clk_divider *divider = to_clk_divider(hw); struct meson_spicc_device *spicc = pow2_clk_to_spicc(divider); - if (!spicc->master->cur_msg || !spicc->master->busy) + if (!spicc->master->cur_msg) return 0; return clk_divider_ops.recalc_rate(hw, parent_rate); @@ -549,7 +549,7 @@ static int meson_spicc_pow2_determine_rate(struct clk_hw *hw, struct clk_divider *divider = to_clk_divider(hw); struct meson_spicc_device *spicc = pow2_clk_to_spicc(divider); - if (!spicc->master->cur_msg || !spicc->master->busy) + if (!spicc->master->cur_msg) return -EINVAL; return clk_divider_ops.determine_rate(hw, req); @@ -561,7 +561,7 @@ static int meson_spicc_pow2_set_rate(struct clk_hw *hw, unsigned long rate, struct clk_divider *divider = to_clk_divider(hw); struct meson_spicc_device *spicc = pow2_clk_to_spicc(divider); - if (!spicc->master->cur_msg || !spicc->master->busy) + if (!spicc->master->cur_msg) return -EINVAL; return clk_divider_ops.set_rate(hw, rate, parent_rate); From 0a60ac7a0dadf68169dffad730c3f1ba03d70377 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Sat, 10 Sep 2022 11:01:20 +0000 Subject: [PATCH 0114/1477] bpf: btf: fix truncated last_member_type_id in btf_struct_resolve [ Upstream commit a37a32583e282d8d815e22add29bc1e91e19951a ] When trying to finish resolving a struct member, btf_struct_resolve saves the member type id in a u16 temporary variable. This truncates the 32 bit type id value if it exceeds UINT16_MAX. As a result, structs that have members with type ids > UINT16_MAX and which need resolution will fail with a message like this: [67414] STRUCT ff_device size=120 vlen=12 effect_owners type_id=67434 bits_offset=960 Member exceeds struct_size Fix this by changing the type of last_member_type_id to u32. Fixes: a0791f0df7d2 ("bpf: fix BTF limits") Reviewed-by: Stanislav Fomichev Signed-off-by: Lorenz Bauer Link: https://lore.kernel.org/r/20220910110120.339242-1-oss@lmb.io Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- kernel/bpf/btf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index dc497eaf2266..9232938e3f96 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -2913,7 +2913,7 @@ static int btf_struct_resolve(struct btf_verifier_env *env, if (v->next_member) { const struct btf_type *last_member_type; const struct btf_member *last_member; - u16 last_member_type_id; + u32 last_member_type_id; last_member = btf_type_member(v->t) + v->next_member - 1; last_member_type_id = last_member->type; From 226e6f241258186e9e1a1c3fbb88305d001187d1 Mon Sep 17 00:00:00 2001 From: Bitterblue Smith Date: Fri, 2 Sep 2022 14:48:32 +0300 Subject: [PATCH 0115/1477] wifi: rtl8xxxu: gen2: Fix mistake in path B IQ calibration [ Upstream commit e963a19c64ac0d2f8785d36a27391abd91ac77aa ] Found by comparing with the vendor driver. Currently this affects only the RTL8192EU, which is the only gen2 chip with 2 TX paths supported by this driver. It's unclear what kind of effect the mistake had in practice, since I don't have any RTL8192EU devices to test it. Fixes: e1547c535ede ("rtl8xxxu: First stab at adding IQK calibration for 8723bu parts") Signed-off-by: Bitterblue Smith Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/30a59f3a-cfa9-8379-7af0-78a8f4c77cfd@gmail.com Signed-off-by: Sasha Levin --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index e8b4544b5b15..8668b03bd8c7 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -2925,12 +2925,12 @@ bool rtl8xxxu_gen2_simularity_compare(struct rtl8xxxu_priv *priv, } if (!(simubitmap & 0x30) && priv->tx_paths > 1) { - /* path B RX OK */ + /* path B TX OK */ for (i = 4; i < 6; i++) result[3][i] = result[c1][i]; } - if (!(simubitmap & 0x30) && priv->tx_paths > 1) { + if (!(simubitmap & 0xc0) && priv->tx_paths > 1) { /* path B RX OK */ for (i = 6; i < 8; i++) result[3][i] = result[c1][i]; From 795954d75197e085e6287d46a73413cb962c81e8 Mon Sep 17 00:00:00 2001 From: Bitterblue Smith Date: Fri, 2 Sep 2022 16:15:30 +0300 Subject: [PATCH 0116/1477] wifi: rtl8xxxu: Remove copy-paste leftover in gen2_update_rate_mask [ Upstream commit d5350756c03cdf18696295c6b11d7acc4dbf825c ] It looks like a leftover from copying rtl8xxxu_update_rate_mask, which is used with the gen1 chips. It wasn't causing any problems for my RTL8188FU test device, but it's clearly a mistake, so remove it. Fixes: f653e69009c6 ("rtl8xxxu: Implement basic 8723b specific update_rate_mask() function") Signed-off-by: Bitterblue Smith Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/d5544fe8-9798-28f1-54bd-6839a1974b10@gmail.com Signed-off-by: Sasha Levin --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index 8668b03bd8c7..7818a7ea0498 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -4338,15 +4338,14 @@ void rtl8xxxu_gen2_update_rate_mask(struct rtl8xxxu_priv *priv, h2c.b_macid_cfg.ramask2 = (ramask >> 16) & 0xff; h2c.b_macid_cfg.ramask3 = (ramask >> 24) & 0xff; - h2c.ramask.arg = 0x80; h2c.b_macid_cfg.data1 = rateid; if (sgi) h2c.b_macid_cfg.data1 |= BIT(7); h2c.b_macid_cfg.data2 = bw; - dev_dbg(&priv->udev->dev, "%s: rate mask %08x, arg %02x, size %zi\n", - __func__, ramask, h2c.ramask.arg, sizeof(h2c.b_macid_cfg)); + dev_dbg(&priv->udev->dev, "%s: rate mask %08x, rateid %02x, sgi %d, size %zi\n", + __func__, ramask, rateid, sgi, sizeof(h2c.b_macid_cfg)); rtl8xxxu_gen2_h2c_cmd(priv, &h2c, sizeof(h2c.b_macid_cfg)); } From 2a1c29dc9b7e8c5dbc96ee3c20bb620984b69474 Mon Sep 17 00:00:00 2001 From: Zheng Yongjun Date: Thu, 8 Sep 2022 13:55:13 +0000 Subject: [PATCH 0117/1477] net: fs_enet: Fix wrong check in do_pd_setup [ Upstream commit ec3f06b542a960806a81345042e4eee3f8c5dec4 ] Should check of_iomap return value 'fep->fec.fecp' instead of 'fep->fcc.fccp' Fixes: 976de6a8c304 ("fs_enet: Be an of_platform device when CONFIG_PPC_CPM_NEW_BINDING is set.") Signed-off-by: Zheng Yongjun Reviewed-by: Christophe Leroy Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/freescale/fs_enet/mac-fec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-fec.c b/drivers/net/ethernet/freescale/fs_enet/mac-fec.c index 99fe2c210d0f..61f4b6e50d29 100644 --- a/drivers/net/ethernet/freescale/fs_enet/mac-fec.c +++ b/drivers/net/ethernet/freescale/fs_enet/mac-fec.c @@ -98,7 +98,7 @@ static int do_pd_setup(struct fs_enet_private *fep) return -EINVAL; fep->fec.fecp = of_iomap(ofdev->dev.of_node, 0); - if (!fep->fcc.fccp) + if (!fep->fec.fecp) return -EINVAL; return 0; From ed403bcd979d5be20edfbc98b5c7bcce1b2a5c33 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 12 Sep 2022 14:38:55 +0100 Subject: [PATCH 0118/1477] bpf: Ensure correct locking around vulnerable function find_vpid() [ Upstream commit 83c10cc362d91c0d8d25e60779ee52fdbbf3894d ] The documentation for find_vpid() clearly states: "Must be called with the tasklist_lock or rcu_read_lock() held." Presently we do neither for find_vpid() instance in bpf_task_fd_query(). Add proper rcu_read_lock/unlock() to fix the issue. Fixes: 41bdc4b40ed6f ("bpf: introduce bpf subcommand BPF_TASK_FD_QUERY") Signed-off-by: Lee Jones Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20220912133855.1218900-1-lee@kernel.org Signed-off-by: Sasha Levin --- kernel/bpf/syscall.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 419dbc3d060e..aaad2dce2be6 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3915,7 +3915,9 @@ static int bpf_task_fd_query(const union bpf_attr *attr, if (attr->task_fd_query.flags != 0) return -EINVAL; + rcu_read_lock(); task = get_pid_task(find_vpid(pid), PIDTYPE_PID); + rcu_read_unlock(); if (!task) return -ENOENT; From d7cc0d51ffcbfd1caaa809fcf9cff05c46d0fb4d Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Mon, 29 Aug 2022 23:58:12 +0900 Subject: [PATCH 0119/1477] Bluetooth: hci_{ldisc,serdev}: check percpu_init_rwsem() failure [ Upstream commit 3124d320c22f3f4388d9ac5c8f37eaad0cefd6b1 ] syzbot is reporting NULL pointer dereference at hci_uart_tty_close() [1], for rcu_sync_enter() is called without rcu_sync_init() due to hci_uart_tty_open() ignoring percpu_init_rwsem() failure. While we are at it, fix that hci_uart_register_device() ignores percpu_init_rwsem() failure and hci_uart_unregister_device() does not call percpu_free_rwsem(). Link: https://syzkaller.appspot.com/bug?extid=576dfca25381fb6fbc5f [1] Reported-by: syzbot Signed-off-by: Tetsuo Handa Fixes: 67d2f8781b9f00d1 ("Bluetooth: hci_ldisc: Allow sleeping while proto locks are held.") Fixes: d73e172816652772 ("Bluetooth: hci_serdev: Init hci_uart proto_lock to avoid oops") Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- drivers/bluetooth/hci_ldisc.c | 7 +++++-- drivers/bluetooth/hci_serdev.c | 10 +++++++--- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c index 637c5b8c2aa1..726d5c83c550 100644 --- a/drivers/bluetooth/hci_ldisc.c +++ b/drivers/bluetooth/hci_ldisc.c @@ -490,6 +490,11 @@ static int hci_uart_tty_open(struct tty_struct *tty) BT_ERR("Can't allocate control structure"); return -ENFILE; } + if (percpu_init_rwsem(&hu->proto_lock)) { + BT_ERR("Can't allocate semaphore structure"); + kfree(hu); + return -ENOMEM; + } tty->disc_data = hu; hu->tty = tty; @@ -502,8 +507,6 @@ static int hci_uart_tty_open(struct tty_struct *tty) INIT_WORK(&hu->init_ready, hci_uart_init_work); INIT_WORK(&hu->write_work, hci_uart_write_work); - percpu_init_rwsem(&hu->proto_lock); - /* Flush any pending characters in the driver */ tty_driver_flush_buffer(tty); diff --git a/drivers/bluetooth/hci_serdev.c b/drivers/bluetooth/hci_serdev.c index e9a44ab3812d..f2e2e553d4de 100644 --- a/drivers/bluetooth/hci_serdev.c +++ b/drivers/bluetooth/hci_serdev.c @@ -301,11 +301,12 @@ int hci_uart_register_device(struct hci_uart *hu, serdev_device_set_client_ops(hu->serdev, &hci_serdev_client_ops); + if (percpu_init_rwsem(&hu->proto_lock)) + return -ENOMEM; + err = serdev_device_open(hu->serdev); if (err) - return err; - - percpu_init_rwsem(&hu->proto_lock); + goto err_rwsem; err = p->open(hu); if (err) @@ -375,6 +376,8 @@ err_alloc: p->close(hu); err_open: serdev_device_close(hu->serdev); +err_rwsem: + percpu_free_rwsem(&hu->proto_lock); return err; } EXPORT_SYMBOL_GPL(hci_uart_register_device); @@ -396,5 +399,6 @@ void hci_uart_unregister_device(struct hci_uart *hu) clear_bit(HCI_UART_PROTO_READY, &hu->flags); serdev_device_close(hu->serdev); } + percpu_free_rwsem(&hu->proto_lock); } EXPORT_SYMBOL_GPL(hci_uart_unregister_device); From 07299e52e5b9ac580dc16e786804dbd517afde2d Mon Sep 17 00:00:00 2001 From: Jesus Fernandez Manzano Date: Thu, 22 Sep 2022 10:35:14 +0300 Subject: [PATCH 0120/1477] wifi: ath11k: fix number of VHT beamformee spatial streams [ Upstream commit 55b5ee3357d7bb98ee578cf9b84a652e7a1bc199 ] The number of spatial streams used when acting as a beamformee in VHT mode are reported by the firmware as 7 (8 sts - 1) both in IPQ6018 and IPQ8074 which respectively have 2 and 4 sts each. So the firmware should report 1 (2 - 1) and 3 (4 - 1). Fix this by checking that the number of VHT beamformee sts reported by the firmware is not greater than the number of receiving antennas - 1. The fix is based on the same approach used in this same function for sanitizing the number of sounding dimensions reported by the firmware. Without this change, acting as a beamformee in VHT mode is not working properly. Tested-on: IPQ6018 hw1.0 AHB WLAN.HK.2.5.0.1-01208-QCAHKSWPL_SILICONZ-1 Tested-on: IPQ8074 hw2.0 AHB WLAN.HK.2.5.0.1-01208-QCAHKSWPL_SILICONZ-1 Fixes: d5c65159f289 ("ath11k: driver for Qualcomm IEEE 802.11ax devices") Signed-off-by: Jesus Fernandez Manzano Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20220616173947.21901-1-jesus.manzano@galgus.net Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath11k/mac.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 44282aec069d..67faf62999de 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -3419,6 +3419,8 @@ static int ath11k_mac_set_txbf_conf(struct ath11k_vif *arvif) if (vht_cap & (IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE)) { nsts = vht_cap & IEEE80211_VHT_CAP_BEAMFORMEE_STS_MASK; nsts >>= IEEE80211_VHT_CAP_BEAMFORMEE_STS_SHIFT; + if (nsts > (ar->num_rx_chains - 1)) + nsts = ar->num_rx_chains - 1; value |= SM(nsts, WMI_TXBF_STS_CAP_OFFSET); } @@ -3459,7 +3461,7 @@ static int ath11k_mac_set_txbf_conf(struct ath11k_vif *arvif) static void ath11k_set_vht_txbf_cap(struct ath11k *ar, u32 *vht_cap) { bool subfer, subfee; - int sound_dim = 0; + int sound_dim = 0, nsts = 0; subfer = !!(*vht_cap & (IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE)); subfee = !!(*vht_cap & (IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE)); @@ -3469,6 +3471,11 @@ static void ath11k_set_vht_txbf_cap(struct ath11k *ar, u32 *vht_cap) subfer = false; } + if (ar->num_rx_chains < 2) { + *vht_cap &= ~(IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE); + subfee = false; + } + /* If SU Beaformer is not set, then disable MU Beamformer Capability */ if (!subfer) *vht_cap &= ~(IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE); @@ -3481,7 +3488,9 @@ static void ath11k_set_vht_txbf_cap(struct ath11k *ar, u32 *vht_cap) sound_dim >>= IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_SHIFT; *vht_cap &= ~IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_MASK; - /* TODO: Need to check invalid STS and Sound_dim values set by FW? */ + nsts = (*vht_cap & IEEE80211_VHT_CAP_BEAMFORMEE_STS_MASK); + nsts >>= IEEE80211_VHT_CAP_BEAMFORMEE_STS_SHIFT; + *vht_cap &= ~IEEE80211_VHT_CAP_BEAMFORMEE_STS_MASK; /* Enable Sounding Dimension Field only if SU BF is enabled */ if (subfer) { @@ -3493,9 +3502,15 @@ static void ath11k_set_vht_txbf_cap(struct ath11k *ar, u32 *vht_cap) *vht_cap |= sound_dim; } - /* Use the STS advertised by FW unless SU Beamformee is not supported*/ - if (!subfee) - *vht_cap &= ~(IEEE80211_VHT_CAP_BEAMFORMEE_STS_MASK); + /* Enable Beamformee STS Field only if SU BF is enabled */ + if (subfee) { + if (nsts > (ar->num_rx_chains - 1)) + nsts = ar->num_rx_chains - 1; + + nsts <<= IEEE80211_VHT_CAP_BEAMFORMEE_STS_SHIFT; + nsts &= IEEE80211_VHT_CAP_BEAMFORMEE_STS_MASK; + *vht_cap |= nsts; + } } static struct ieee80211_sta_vht_cap From 6ed7b05a3592e96023989417f617f80a5e25dedd Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 21 Sep 2022 20:10:10 -0700 Subject: [PATCH 0121/1477] x86/microcode/AMD: Track patch allocation size explicitly [ Upstream commit 712f210a457d9c32414df246a72781550bc23ef6 ] In preparation for reducing the use of ksize(), record the actual allocation size for later memcpy(). This avoids copying extra (uninitialized!) bytes into the patch buffer when the requested allocation size isn't exactly the size of a kmalloc bucket. Additionally, fix potential future issues where runtime bounds checking will notice that the buffer was allocated to a smaller value than returned by ksize(). Fixes: 757885e94a22 ("x86, microcode, amd: Early microcode patch loading support for AMD") Suggested-by: Daniel Micay Signed-off-by: Kees Cook Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/lkml/CA+DvKQ+bp7Y7gmaVhacjv9uF6Ar-o4tet872h4Q8RPYPJjcJQA@mail.gmail.com/ Signed-off-by: Sasha Levin --- arch/x86/include/asm/microcode.h | 1 + arch/x86/kernel/cpu/microcode/amd.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 91a06cef50c1..f73327397b89 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -9,6 +9,7 @@ struct ucode_patch { struct list_head plist; void *data; /* Intel uses only this one */ + unsigned int size; u32 patch_id; u16 equiv_cpu; }; diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 3f6b137ef4e6..c87936441339 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -783,6 +783,7 @@ static int verify_and_add_patch(u8 family, u8 *fw, unsigned int leftover, kfree(patch); return -EINVAL; } + patch->size = *patch_size; mc_hdr = (struct microcode_header_amd *)(fw + SECTION_HDR_SIZE); proc_id = mc_hdr->processor_rev_id; @@ -864,7 +865,7 @@ load_microcode_amd(bool save, u8 family, const u8 *data, size_t size) return ret; memset(amd_ucode_patch, 0, PATCH_MAX_SIZE); - memcpy(amd_ucode_patch, p->data, min_t(u32, ksize(p->data), PATCH_MAX_SIZE)); + memcpy(amd_ucode_patch, p->data, min_t(u32, p->size, PATCH_MAX_SIZE)); return ret; } From 1ef5798638bd2389c25ff784fb4c64e2cd2ec077 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luciano=20Le=C3=A3o?= Date: Thu, 22 Sep 2022 17:00:54 -0300 Subject: [PATCH 0122/1477] x86/cpu: Include the header of init_ia32_feat_ctl()'s prototype MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 30ea703a38ef76ca119673cd8bdd05c6e068e2ac ] Include the header containing the prototype of init_ia32_feat_ctl(), solving the following warning: $ make W=1 arch/x86/kernel/cpu/feat_ctl.o arch/x86/kernel/cpu/feat_ctl.c:112:6: warning: no previous prototype for ‘init_ia32_feat_ctl’ [-Wmissing-prototypes] 112 | void init_ia32_feat_ctl(struct cpuinfo_x86 *c) This warning appeared after commit 5d5103595e9e5 ("x86/cpu: Reinitialize IA32_FEAT_CTL MSR on BSP during wakeup") had moved the function init_ia32_feat_ctl()'s prototype from arch/x86/kernel/cpu/cpu.h to arch/x86/include/asm/cpu.h. Note that, before the commit mentioned above, the header include "cpu.h" (arch/x86/kernel/cpu/cpu.h) was added by commit 0e79ad863df43 ("x86/cpu: Fix a -Wmissing-prototypes warning for init_ia32_feat_ctl()") solely to fix init_ia32_feat_ctl()'s missing prototype. So, the header include "cpu.h" is no longer necessary. [ bp: Massage commit message. ] Fixes: 5d5103595e9e5 ("x86/cpu: Reinitialize IA32_FEAT_CTL MSR on BSP during wakeup") Signed-off-by: Luciano Leão Signed-off-by: Borislav Petkov Reviewed-by: Nícolas F. R. A. Prado Link: https://lore.kernel.org/r/20220922200053.1357470-1-lucianorsleao@gmail.com Signed-off-by: Sasha Levin --- arch/x86/kernel/cpu/feat_ctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/feat_ctl.c b/arch/x86/kernel/cpu/feat_ctl.c index 29a3bedabd06..d7541851288e 100644 --- a/arch/x86/kernel/cpu/feat_ctl.c +++ b/arch/x86/kernel/cpu/feat_ctl.c @@ -1,11 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include #include #include #include -#include "cpu.h" #undef pr_fmt #define pr_fmt(fmt) "x86/cpu: " fmt From 9da61e7b5993ec5442b61254246a4b7944a04262 Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Sat, 24 Sep 2022 20:13:08 +0800 Subject: [PATCH 0123/1477] spi: dw: Fix PM disable depth imbalance in dw_spi_bt1_probe [ Upstream commit 618d815fc93477b1675878f3c04ff32657cc18b4 ] The pm_runtime_enable will increase power disable depth. Thus a pairing decrement is needed on the error handling path to keep it balanced according to context. Fixes:abf00907538e2 ("spi: dw: Add Baikal-T1 SPI Controller glue driver") Signed-off-by: Zhang Qilong Link: https://lore.kernel.org/r/20220924121310.78331-3-zhangqilong3@huawei.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-dw-bt1.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-dw-bt1.c b/drivers/spi/spi-dw-bt1.c index bc9d5eab3c58..8f6a1af14456 100644 --- a/drivers/spi/spi-dw-bt1.c +++ b/drivers/spi/spi-dw-bt1.c @@ -293,8 +293,10 @@ static int dw_spi_bt1_probe(struct platform_device *pdev) pm_runtime_enable(&pdev->dev); ret = dw_spi_add_host(&pdev->dev, dws); - if (ret) + if (ret) { + pm_runtime_disable(&pdev->dev); goto err_disable_clk; + } platform_set_drvdata(pdev, dwsbt1); From 534909fe3c9253dc929158f89c081bce4cfaa5f7 Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Sat, 24 Sep 2022 20:13:09 +0800 Subject: [PATCH 0124/1477] spi/omap100k:Fix PM disable depth imbalance in omap1_spi100k_probe [ Upstream commit 29f65f2171c85a9633daa380df14009a365f42f2 ] The pm_runtime_enable will increase power disable depth. Thus a pairing decrement is needed on the error handling path to keep it balanced according to context. Fixes:db91841b58f9a ("spi/omap100k: Convert to runtime PM") Signed-off-by: Zhang Qilong Link: https://lore.kernel.org/r/20220924121310.78331-4-zhangqilong3@huawei.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-omap-100k.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spi-omap-100k.c b/drivers/spi/spi-omap-100k.c index 0d0cd061d356..7c992d1f4abd 100644 --- a/drivers/spi/spi-omap-100k.c +++ b/drivers/spi/spi-omap-100k.c @@ -414,6 +414,7 @@ static int omap1_spi100k_probe(struct platform_device *pdev) return status; err_fck: + pm_runtime_disable(&pdev->dev); clk_disable_unprepare(spi100k->fck); err_ick: clk_disable_unprepare(spi100k->ick); From 129f01116b8c71c632b3b6e0dc9982e887ca2e91 Mon Sep 17 00:00:00 2001 From: Asmaa Mnebhi Date: Mon, 26 Sep 2022 15:45:04 -0400 Subject: [PATCH 0125/1477] i2c: mlxbf: support lock mechanism [ Upstream commit 86067ccfa1424a26491542d6f6d7546d40b61a10 ] Linux is not the only entity using the BlueField I2C busses so support a lock mechanism provided by hardware to avoid issues when multiple entities are trying to access the same bus. The lock is acquired whenever written explicitely or the lock register is read. So make sure it is always released at the end of a successful or failed transaction. Fixes: b5b5b32081cd206b (i2c: mlxbf: I2C SMBus driver for Mellanox BlueField SoC) Reviewed-by: Khalil Blaiech Signed-off-by: Asmaa Mnebhi Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-mlxbf.c | 44 ++++++++++++++++++++++++++++++---- 1 file changed, 39 insertions(+), 5 deletions(-) diff --git a/drivers/i2c/busses/i2c-mlxbf.c b/drivers/i2c/busses/i2c-mlxbf.c index bea82a787b4f..90c488a60693 100644 --- a/drivers/i2c/busses/i2c-mlxbf.c +++ b/drivers/i2c/busses/i2c-mlxbf.c @@ -312,6 +312,7 @@ static u64 mlxbf_i2c_corepll_frequency; * exact. */ #define MLXBF_I2C_SMBUS_TIMEOUT (300 * 1000) /* 300ms */ +#define MLXBF_I2C_SMBUS_LOCK_POLL_TIMEOUT (300 * 1000) /* 300ms */ /* Encapsulates timing parameters. */ struct mlxbf_i2c_timings { @@ -520,6 +521,25 @@ static bool mlxbf_smbus_master_wait_for_idle(struct mlxbf_i2c_priv *priv) return false; } +/* + * wait for the lock to be released before acquiring it. + */ +static bool mlxbf_i2c_smbus_master_lock(struct mlxbf_i2c_priv *priv) +{ + if (mlxbf_smbus_poll(priv->smbus->io, MLXBF_I2C_SMBUS_MASTER_GW, + MLXBF_I2C_MASTER_LOCK_BIT, true, + MLXBF_I2C_SMBUS_LOCK_POLL_TIMEOUT)) + return true; + + return false; +} + +static void mlxbf_i2c_smbus_master_unlock(struct mlxbf_i2c_priv *priv) +{ + /* Clear the gw to clear the lock */ + writel(0, priv->smbus->io + MLXBF_I2C_SMBUS_MASTER_GW); +} + static bool mlxbf_i2c_smbus_transaction_success(u32 master_status, u32 cause_status) { @@ -711,10 +731,19 @@ mlxbf_i2c_smbus_start_transaction(struct mlxbf_i2c_priv *priv, slave = request->slave & GENMASK(6, 0); addr = slave << 1; - /* First of all, check whether the HW is idle. */ - if (WARN_ON(!mlxbf_smbus_master_wait_for_idle(priv))) + /* + * Try to acquire the smbus gw lock before any reads of the GW register since + * a read sets the lock. + */ + if (WARN_ON(!mlxbf_i2c_smbus_master_lock(priv))) return -EBUSY; + /* Check whether the HW is idle */ + if (WARN_ON(!mlxbf_smbus_master_wait_for_idle(priv))) { + ret = -EBUSY; + goto out_unlock; + } + /* Set first byte. */ data_desc[data_idx++] = addr; @@ -738,8 +767,10 @@ mlxbf_i2c_smbus_start_transaction(struct mlxbf_i2c_priv *priv, write_en = 1; write_len += operation->length; if (data_idx + operation->length > - MLXBF_I2C_MASTER_DATA_DESC_SIZE) - return -ENOBUFS; + MLXBF_I2C_MASTER_DATA_DESC_SIZE) { + ret = -ENOBUFS; + goto out_unlock; + } memcpy(data_desc + data_idx, operation->buffer, operation->length); data_idx += operation->length; @@ -771,7 +802,7 @@ mlxbf_i2c_smbus_start_transaction(struct mlxbf_i2c_priv *priv, ret = mlxbf_i2c_smbus_enable(priv, slave, write_len, block_en, pec_en, 0); if (ret) - return ret; + goto out_unlock; } if (read_en) { @@ -798,6 +829,9 @@ mlxbf_i2c_smbus_start_transaction(struct mlxbf_i2c_priv *priv, priv->smbus->io + MLXBF_I2C_SMBUS_MASTER_FSM); } +out_unlock: + mlxbf_i2c_smbus_master_unlock(priv); + return ret; } From b384e8fb16068702a087e1d09814784753d7e6e2 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 26 Sep 2022 15:44:42 -0700 Subject: [PATCH 0126/1477] Bluetooth: hci_core: Fix not handling link timeouts propertly [ Upstream commit 116523c8fac05d1d26f748fee7919a4ec5df67ea ] Change that introduced the use of __check_timeout did not account for link types properly, it always assumes ACL_LINK is used thus causing hdev->acl_last_tx to be used even in case of LE_LINK and then again uses ACL_LINK with hci_link_tx_to. To fix this __check_timeout now takes the link type as parameter and then procedure to use the right last_tx based on the link type and pass it to hci_link_tx_to. Fixes: 1b1d29e51499 ("Bluetooth: Make use of __check_timeout on hci_sched_le") Signed-off-by: Luiz Augusto von Dentz Tested-by: David Beinder Signed-off-by: Sasha Levin --- net/bluetooth/hci_core.c | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 2cb0cf035476..866eb22432de 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -4482,15 +4482,27 @@ static inline int __get_blocks(struct hci_dev *hdev, struct sk_buff *skb) return DIV_ROUND_UP(skb->len - HCI_ACL_HDR_SIZE, hdev->block_len); } -static void __check_timeout(struct hci_dev *hdev, unsigned int cnt) +static void __check_timeout(struct hci_dev *hdev, unsigned int cnt, u8 type) { - if (!hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { - /* ACL tx timeout must be longer than maximum - * link supervision timeout (40.9 seconds) */ - if (!cnt && time_after(jiffies, hdev->acl_last_tx + - HCI_ACL_TX_TIMEOUT)) - hci_link_tx_to(hdev, ACL_LINK); + unsigned long last_tx; + + if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) + return; + + switch (type) { + case LE_LINK: + last_tx = hdev->le_last_tx; + break; + default: + last_tx = hdev->acl_last_tx; + break; } + + /* tx timeout must be longer than maximum link supervision timeout + * (40.9 seconds) + */ + if (!cnt && time_after(jiffies, last_tx + HCI_ACL_TX_TIMEOUT)) + hci_link_tx_to(hdev, type); } /* Schedule SCO */ @@ -4548,7 +4560,7 @@ static void hci_sched_acl_pkt(struct hci_dev *hdev) struct sk_buff *skb; int quote; - __check_timeout(hdev, cnt); + __check_timeout(hdev, cnt, ACL_LINK); while (hdev->acl_cnt && (chan = hci_chan_sent(hdev, ACL_LINK, "e))) { @@ -4591,8 +4603,6 @@ static void hci_sched_acl_blk(struct hci_dev *hdev) int quote; u8 type; - __check_timeout(hdev, cnt); - BT_DBG("%s", hdev->name); if (hdev->dev_type == HCI_AMP) @@ -4600,6 +4610,8 @@ static void hci_sched_acl_blk(struct hci_dev *hdev) else type = ACL_LINK; + __check_timeout(hdev, cnt, type); + while (hdev->block_cnt > 0 && (chan = hci_chan_sent(hdev, type, "e))) { u32 priority = (skb_peek(&chan->data_q))->priority; @@ -4673,7 +4685,7 @@ static void hci_sched_le(struct hci_dev *hdev) cnt = hdev->le_pkts ? hdev->le_cnt : hdev->acl_cnt; - __check_timeout(hdev, cnt); + __check_timeout(hdev, cnt, LE_LINK); tmp = cnt; while (cnt && (chan = hci_chan_sent(hdev, LE_LINK, "e))) { From b284e1fe15c419717720508b367fe7cbfd8c74af Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 21 Sep 2022 13:07:31 +0200 Subject: [PATCH 0127/1477] netfilter: nft_fib: Fix for rpath check with VRF devices [ Upstream commit 2a8a7c0eaa8747c16aa4a48d573aa920d5c00a5c ] Analogous to commit b575b24b8eee3 ("netfilter: Fix rpfilter dropping vrf packets by mistake") but for nftables fib expression: Add special treatment of VRF devices so that typical reverse path filtering via 'fib saddr . iif oif' expression works as expected. Fixes: f6d0cbcf09c50 ("netfilter: nf_tables: add fib expression") Signed-off-by: Phil Sutter Signed-off-by: Florian Westphal Signed-off-by: Sasha Levin --- net/ipv4/netfilter/nft_fib_ipv4.c | 3 +++ net/ipv6/netfilter/nft_fib_ipv6.c | 6 +++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c index 03df986217b7..9e6f0f1275e2 100644 --- a/net/ipv4/netfilter/nft_fib_ipv4.c +++ b/net/ipv4/netfilter/nft_fib_ipv4.c @@ -83,6 +83,9 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, else oif = NULL; + if (priv->flags & NFTA_FIB_F_IIF) + fl4.flowi4_oif = l3mdev_master_ifindex_rcu(oif); + if (nft_hook(pkt) == NF_INET_PRE_ROUTING && nft_fib_is_loopback(pkt->skb, nft_in(pkt))) { nft_fib_store_result(dest, priv, nft_in(pkt)); diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index 92f3235fa287..602743f6dcee 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -37,6 +37,9 @@ static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv, if (ipv6_addr_type(&fl6->daddr) & IPV6_ADDR_LINKLOCAL) { lookup_flags |= RT6_LOOKUP_F_IFACE; fl6->flowi6_oif = get_ifindex(dev ? dev : pkt->skb->dev); + } else if ((priv->flags & NFTA_FIB_F_IIF) && + (netif_is_l3_master(dev) || netif_is_l3_slave(dev))) { + fl6->flowi6_oif = dev->ifindex; } if (ipv6_addr_type(&fl6->saddr) & IPV6_ADDR_UNICAST) @@ -193,7 +196,8 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, if (rt->rt6i_flags & (RTF_REJECT | RTF_ANYCAST | RTF_LOCAL)) goto put_rt_err; - if (oif && oif != rt->rt6i_idev->dev) + if (oif && oif != rt->rt6i_idev->dev && + l3mdev_master_ifindex_rcu(rt->rt6i_idev->dev) != oif->ifindex) goto put_rt_err; nft_fib_store_result(dest, priv, rt->rt6i_idev->dev); From 17196f2f98abaa0f2b576310b082adfcbbfe97a0 Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Tue, 27 Sep 2022 13:21:17 +0200 Subject: [PATCH 0128/1477] spi: s3c64xx: Fix large transfers with DMA [ Upstream commit 1224e29572f655facfcd850cf0f0a4784f36a903 ] The COUNT_VALUE in the PACKET_CNT register is 16-bit so the maximum value is 65535. Asking the driver to transfer a larger size currently leads to the DMA transfer timing out. Implement ->max_transfer_size() and have the core split the transfer as needed. Fixes: 230d42d422e7 ("spi: Add s3c64xx SPI Controller driver") Signed-off-by: Vincent Whitchurch Link: https://lore.kernel.org/r/20220927112117.77599-5-vincent.whitchurch@axis.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-s3c64xx.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c index dfa7c91e13aa..d435df1b715b 100644 --- a/drivers/spi/spi-s3c64xx.c +++ b/drivers/spi/spi-s3c64xx.c @@ -84,6 +84,7 @@ #define S3C64XX_SPI_ST_TX_FIFORDY (1<<0) #define S3C64XX_SPI_PACKET_CNT_EN (1<<16) +#define S3C64XX_SPI_PACKET_CNT_MASK GENMASK(15, 0) #define S3C64XX_SPI_PND_TX_UNDERRUN_CLR (1<<4) #define S3C64XX_SPI_PND_TX_OVERRUN_CLR (1<<3) @@ -660,6 +661,13 @@ static int s3c64xx_spi_prepare_message(struct spi_master *master, return 0; } +static size_t s3c64xx_spi_max_transfer_size(struct spi_device *spi) +{ + struct spi_controller *ctlr = spi->controller; + + return ctlr->can_dma ? S3C64XX_SPI_PACKET_CNT_MASK : SIZE_MAX; +} + static int s3c64xx_spi_transfer_one(struct spi_master *master, struct spi_device *spi, struct spi_transfer *xfer) @@ -1135,6 +1143,7 @@ static int s3c64xx_spi_probe(struct platform_device *pdev) master->prepare_transfer_hardware = s3c64xx_spi_prepare_transfer; master->prepare_message = s3c64xx_spi_prepare_message; master->transfer_one = s3c64xx_spi_transfer_one; + master->max_transfer_size = s3c64xx_spi_max_transfer_size; master->num_chipselect = sci->num_cs; master->dma_alignment = 8; master->bits_per_word_mask = SPI_BPW_MASK(32) | SPI_BPW_MASK(16) | From d2b5dc3a53943a9c146d3a438aa26bd0b1fb671d Mon Sep 17 00:00:00 2001 From: Bitterblue Smith Date: Sun, 18 Sep 2022 15:42:25 +0300 Subject: [PATCH 0129/1477] wifi: rtl8xxxu: Fix AIFS written to REG_EDCA_*_PARAM [ Upstream commit 5574d3290449916397f3092dcd2bac92415498e1 ] ieee80211_tx_queue_params.aifs is not supposed to be written directly to the REG_EDCA_*_PARAM registers. Instead process it like the vendor drivers do. It's kinda hacky but it works. This change boosts the download speed and makes it more stable. Tested with RTL8188FU but all the other supported chips should also benefit. Fixes: 26f1fad29ad9 ("New driver: rtl8xxxu (mac80211)") Signed-off-by: Bitterblue Smith Acked-by: Jes Sorensen Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/038cc03f-3567-77ba-a7bd-c4930e3b2fad@gmail.com Signed-off-by: Sasha Levin --- .../wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index 7818a7ea0498..e34cd6fed7e8 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -4507,6 +4507,53 @@ rtl8xxxu_wireless_mode(struct ieee80211_hw *hw, struct ieee80211_sta *sta) return network_type; } +static void rtl8xxxu_set_aifs(struct rtl8xxxu_priv *priv, u8 slot_time) +{ + u32 reg_edca_param[IEEE80211_NUM_ACS] = { + [IEEE80211_AC_VO] = REG_EDCA_VO_PARAM, + [IEEE80211_AC_VI] = REG_EDCA_VI_PARAM, + [IEEE80211_AC_BE] = REG_EDCA_BE_PARAM, + [IEEE80211_AC_BK] = REG_EDCA_BK_PARAM, + }; + u32 val32; + u16 wireless_mode = 0; + u8 aifs, aifsn, sifs; + int i; + + if (priv->vif) { + struct ieee80211_sta *sta; + + rcu_read_lock(); + sta = ieee80211_find_sta(priv->vif, priv->vif->bss_conf.bssid); + if (sta) + wireless_mode = rtl8xxxu_wireless_mode(priv->hw, sta); + rcu_read_unlock(); + } + + if (priv->hw->conf.chandef.chan->band == NL80211_BAND_5GHZ || + (wireless_mode & WIRELESS_MODE_N_24G)) + sifs = 16; + else + sifs = 10; + + for (i = 0; i < IEEE80211_NUM_ACS; i++) { + val32 = rtl8xxxu_read32(priv, reg_edca_param[i]); + + /* It was set in conf_tx. */ + aifsn = val32 & 0xff; + + /* aifsn not set yet or already fixed */ + if (aifsn < 2 || aifsn > 15) + continue; + + aifs = aifsn * slot_time + sifs; + + val32 &= ~0xff; + val32 |= aifs; + rtl8xxxu_write32(priv, reg_edca_param[i], val32); + } +} + static void rtl8xxxu_bss_info_changed(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_bss_conf *bss_conf, u32 changed) @@ -4592,6 +4639,8 @@ rtl8xxxu_bss_info_changed(struct ieee80211_hw *hw, struct ieee80211_vif *vif, else val8 = 20; rtl8xxxu_write8(priv, REG_SLOT, val8); + + rtl8xxxu_set_aifs(priv, val8); } if (changed & BSS_CHANGED_BSSID) { From b4a5905fd2ef841cd61e969ea692c213c2e5c1f7 Mon Sep 17 00:00:00 2001 From: Junichi Uekawa Date: Wed, 28 Sep 2022 15:45:38 +0900 Subject: [PATCH 0130/1477] vhost/vsock: Use kvmalloc/kvfree for larger packets. [ Upstream commit 0e3f72931fc47bb81686020cc643cde5d9cd0bb8 ] When copying a large file over sftp over vsock, data size is usually 32kB, and kmalloc seems to fail to try to allocate 32 32kB regions. vhost-5837: page allocation failure: order:4, mode:0x24040c0 Call Trace: [] dump_stack+0x97/0xdb [] warn_alloc_failed+0x10f/0x138 [] ? __alloc_pages_direct_compact+0x38/0xc8 [] __alloc_pages_nodemask+0x84c/0x90d [] alloc_kmem_pages+0x17/0x19 [] kmalloc_order_trace+0x2b/0xdb [] __kmalloc+0x177/0x1f7 [] ? copy_from_iter+0x8d/0x31d [] vhost_vsock_handle_tx_kick+0x1fa/0x301 [vhost_vsock] [] vhost_worker+0xf7/0x157 [vhost] [] kthread+0xfd/0x105 [] ? vhost_dev_set_owner+0x22e/0x22e [vhost] [] ? flush_kthread_worker+0xf3/0xf3 [] ret_from_fork+0x4e/0x80 [] ? flush_kthread_worker+0xf3/0xf3 Work around by doing kvmalloc instead. Fixes: 433fc58e6bf2 ("VSOCK: Introduce vhost_vsock.ko") Signed-off-by: Junichi Uekawa Reviewed-by: Stefano Garzarella Acked-by: Michael S. Tsirkin Link: https://lore.kernel.org/r/20220928064538.667678-1-uekawa@chromium.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/vhost/vsock.c | 2 +- net/vmw_vsock/virtio_transport_common.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 5d2d6ce7ff41..b0153617fe0e 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -359,7 +359,7 @@ vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq, return NULL; } - pkt->buf = kmalloc(pkt->len, GFP_KERNEL); + pkt->buf = kvmalloc(pkt->len, GFP_KERNEL); if (!pkt->buf) { kfree(pkt); return NULL; diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index d6d3a05c008a..c9ee9259af48 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -1196,7 +1196,7 @@ EXPORT_SYMBOL_GPL(virtio_transport_recv_pkt); void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt) { - kfree(pkt->buf); + kvfree(pkt->buf); kfree(pkt); } EXPORT_SYMBOL_GPL(virtio_transport_free_pkt); From 2a1d0363208528a3bacbc2c37264d60182efd482 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Wed, 28 Sep 2022 21:39:38 +0800 Subject: [PATCH 0131/1477] mISDN: fix use-after-free bugs in l1oip timer handlers [ Upstream commit 2568a7e0832ee30b0a351016d03062ab4e0e0a3f ] The l1oip_cleanup() traverses the l1oip_ilist and calls release_card() to cleanup module and stack. However, release_card() calls del_timer() to delete the timers such as keep_tl and timeout_tl. If the timer handler is running, the del_timer() will not stop it and result in UAF bugs. One of the processes is shown below: (cleanup routine) | (timer handler) release_card() | l1oip_timeout() ... | del_timer() | ... ... | kfree(hc) //FREE | | hc->timeout_on = 0 //USE Fix by calling del_timer_sync() in release_card(), which makes sure the timer handlers have finished before the resources, such as l1oip and so on, have been deallocated. What's more, the hc->workq and hc->socket_thread can kick those timers right back in. We add a bool flag to show if card is released. Then, check this flag in hc->workq and hc->socket_thread. Fixes: 3712b42d4b1b ("Add layer1 over IP support") Signed-off-by: Duoming Zhou Reviewed-by: Leon Romanovsky Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/isdn/mISDN/l1oip.h | 1 + drivers/isdn/mISDN/l1oip_core.c | 13 +++++++------ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/isdn/mISDN/l1oip.h b/drivers/isdn/mISDN/l1oip.h index 7ea10db20e3a..48133d022812 100644 --- a/drivers/isdn/mISDN/l1oip.h +++ b/drivers/isdn/mISDN/l1oip.h @@ -59,6 +59,7 @@ struct l1oip { int bundle; /* bundle channels in one frm */ int codec; /* codec to use for transmis. */ int limit; /* limit number of bchannels */ + bool shutdown; /* if card is released */ /* timer */ struct timer_list keep_tl; diff --git a/drivers/isdn/mISDN/l1oip_core.c b/drivers/isdn/mISDN/l1oip_core.c index b57dcb834594..aec4f2a69c3b 100644 --- a/drivers/isdn/mISDN/l1oip_core.c +++ b/drivers/isdn/mISDN/l1oip_core.c @@ -275,7 +275,7 @@ l1oip_socket_send(struct l1oip *hc, u8 localcodec, u8 channel, u32 chanmask, p = frame; /* restart timer */ - if (time_before(hc->keep_tl.expires, jiffies + 5 * HZ)) + if (time_before(hc->keep_tl.expires, jiffies + 5 * HZ) && !hc->shutdown) mod_timer(&hc->keep_tl, jiffies + L1OIP_KEEPALIVE * HZ); else hc->keep_tl.expires = jiffies + L1OIP_KEEPALIVE * HZ; @@ -601,7 +601,9 @@ multiframe: goto multiframe; /* restart timer */ - if (time_before(hc->timeout_tl.expires, jiffies + 5 * HZ) || !hc->timeout_on) { + if ((time_before(hc->timeout_tl.expires, jiffies + 5 * HZ) || + !hc->timeout_on) && + !hc->shutdown) { hc->timeout_on = 1; mod_timer(&hc->timeout_tl, jiffies + L1OIP_TIMEOUT * HZ); } else /* only adjust timer */ @@ -1232,11 +1234,10 @@ release_card(struct l1oip *hc) { int ch; - if (timer_pending(&hc->keep_tl)) - del_timer(&hc->keep_tl); + hc->shutdown = true; - if (timer_pending(&hc->timeout_tl)) - del_timer(&hc->timeout_tl); + del_timer_sync(&hc->keep_tl); + del_timer_sync(&hc->timeout_tl); cancel_work_sync(&hc->workq); From f65955340e0044f5c41ac799a01698ac7dee8a4e Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 28 Sep 2022 14:10:13 -0400 Subject: [PATCH 0132/1477] sctp: handle the error returned from sctp_auth_asoc_init_active_key [ Upstream commit 022152aaebe116a25c39818a07e175a8cd3c1e11 ] When it returns an error from sctp_auth_asoc_init_active_key(), the active_key is actually not updated. The old sh_key will be freeed while it's still used as active key in asoc. Then an use-after-free will be triggered when sending patckets, as found by syzbot: sctp_auth_shkey_hold+0x22/0xa0 net/sctp/auth.c:112 sctp_set_owner_w net/sctp/socket.c:132 [inline] sctp_sendmsg_to_asoc+0xbd5/0x1a20 net/sctp/socket.c:1863 sctp_sendmsg+0x1053/0x1d50 net/sctp/socket.c:2025 inet_sendmsg+0x99/0xe0 net/ipv4/af_inet.c:819 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:734 This patch is to fix it by not replacing the sh_key when it returns errors from sctp_auth_asoc_init_active_key() in sctp_auth_set_key(). For sctp_auth_set_active_key(), old active_key_id will be set back to asoc->active_key_id when the same thing happens. Fixes: 58acd1009226 ("sctp: update active_key for asoc when old key is being replaced") Reported-by: syzbot+a236dd8e9622ed8954a3@syzkaller.appspotmail.com Signed-off-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/sctp/auth.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/net/sctp/auth.c b/net/sctp/auth.c index db6b7373d16c..34964145514e 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -863,12 +863,17 @@ int sctp_auth_set_key(struct sctp_endpoint *ep, } list_del_init(&shkey->key_list); - sctp_auth_shkey_release(shkey); list_add(&cur_key->key_list, sh_keys); - if (asoc && asoc->active_key_id == auth_key->sca_keynumber) - sctp_auth_asoc_init_active_key(asoc, GFP_KERNEL); + if (asoc && asoc->active_key_id == auth_key->sca_keynumber && + sctp_auth_asoc_init_active_key(asoc, GFP_KERNEL)) { + list_del_init(&cur_key->key_list); + sctp_auth_shkey_release(cur_key); + list_add(&shkey->key_list, sh_keys); + return -ENOMEM; + } + sctp_auth_shkey_release(shkey); return 0; } @@ -902,8 +907,13 @@ int sctp_auth_set_active_key(struct sctp_endpoint *ep, return -EINVAL; if (asoc) { + __u16 active_key_id = asoc->active_key_id; + asoc->active_key_id = key_id; - sctp_auth_asoc_init_active_key(asoc, GFP_KERNEL); + if (sctp_auth_asoc_init_active_key(asoc, GFP_KERNEL)) { + asoc->active_key_id = active_key_id; + return -ENOMEM; + } } else ep->active_key_id = key_id; From 96a3ddb870313286935a2a7184c08777369089c2 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Wed, 28 Sep 2022 16:03:31 -0400 Subject: [PATCH 0133/1477] tcp: fix tcp_cwnd_validate() to not forget is_cwnd_limited [ Upstream commit f4ce91ce12a7c6ead19b128ffa8cff6e3ded2a14 ] This commit fixes a bug in the tracking of max_packets_out and is_cwnd_limited. This bug can cause the connection to fail to remember that is_cwnd_limited is true, causing the connection to fail to grow cwnd when it should, causing throughput to be lower than it should be. The following event sequence is an example that triggers the bug: (a) The connection is cwnd_limited, but packets_out is not at its peak due to TSO deferral deciding not to send another skb yet. In such cases the connection can advance max_packets_seq and set tp->is_cwnd_limited to true and max_packets_out to a small number. (b) Then later in the round trip the connection is pacing-limited (not cwnd-limited), and packets_out is larger. In such cases the connection would raise max_packets_out to a bigger number but (unexpectedly) flip tp->is_cwnd_limited from true to false. This commit fixes that bug. One straightforward fix would be to separately track (a) the next window after max_packets_out reaches a maximum, and (b) the next window after tp->is_cwnd_limited is set to true. But this would require consuming an extra u32 sequence number. Instead, to save space we track only the most important information. Specifically, we track the strongest available signal of the degree to which the cwnd is fully utilized: (1) If the connection is cwnd-limited then we remember that fact for the current window. (2) If the connection not cwnd-limited then we track the maximum number of outstanding packets in the current window. In particular, note that the new logic cannot trigger the buggy (a)/(b) sequence above because with the new logic a condition where tp->packets_out > tp->max_packets_out can only trigger an update of tp->is_cwnd_limited if tp->is_cwnd_limited is false. This first showed up in a testing of a BBRv2 dev branch, but this buggy behavior highlighted a general issue with the tcp_cwnd_validate() logic that can cause cwnd to fail to increase at the proper rate for any TCP congestion control, including Reno or CUBIC. Fixes: ca8a22634381 ("tcp: make cwnd-limited checks measurement-based, and gentler") Signed-off-by: Neal Cardwell Signed-off-by: Kevin(Yudong) Yang Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/linux/tcp.h | 2 +- include/net/tcp.h | 5 ++++- net/ipv4/tcp.c | 2 ++ net/ipv4/tcp_output.c | 19 ++++++++++++------- 4 files changed, 19 insertions(+), 9 deletions(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 2f87377e9af7..6e3340379d85 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -265,7 +265,7 @@ struct tcp_sock { u32 packets_out; /* Packets which are "in flight" */ u32 retrans_out; /* Retransmitted packets out */ u32 max_packets_out; /* max packets_out in last window */ - u32 max_packets_seq; /* right edge of max_packets_out flight */ + u32 cwnd_usage_seq; /* right edge of cwnd usage tracking flight */ u16 urg_data; /* Saved octet of OOB data and control flags */ u8 ecn_flags; /* ECN status bits. */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 8129ce9a0771..bf4af27f5620 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1271,11 +1271,14 @@ static inline bool tcp_is_cwnd_limited(const struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); + if (tp->is_cwnd_limited) + return true; + /* If in slow start, ensure cwnd grows to twice what was ACKed. */ if (tcp_in_slow_start(tp)) return tp->snd_cwnd < 2 * tp->max_packets_out; - return tp->is_cwnd_limited; + return false; } /* BBR congestion control needs pacing. diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index bfeb05f62b94..24328ad00278 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2796,6 +2796,8 @@ int tcp_disconnect(struct sock *sk, int flags) tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; tp->snd_cwnd = TCP_INIT_CWND; tp->snd_cwnd_cnt = 0; + tp->is_cwnd_limited = 0; + tp->max_packets_out = 0; tp->window_clamp = 0; tp->delivered = 0; tp->delivered_ce = 0; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 48fce999dc61..eefd032bc6db 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1876,15 +1876,20 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited) const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; struct tcp_sock *tp = tcp_sk(sk); - /* Track the maximum number of outstanding packets in each - * window, and remember whether we were cwnd-limited then. + /* Track the strongest available signal of the degree to which the cwnd + * is fully utilized. If cwnd-limited then remember that fact for the + * current window. If not cwnd-limited then track the maximum number of + * outstanding packets in the current window. (If cwnd-limited then we + * chose to not update tp->max_packets_out to avoid an extra else + * clause with no functional impact.) */ - if (!before(tp->snd_una, tp->max_packets_seq) || - tp->packets_out > tp->max_packets_out || - is_cwnd_limited) { - tp->max_packets_out = tp->packets_out; - tp->max_packets_seq = tp->snd_nxt; + if (!before(tp->snd_una, tp->cwnd_usage_seq) || + is_cwnd_limited || + (!tp->is_cwnd_limited && + tp->packets_out > tp->max_packets_out)) { tp->is_cwnd_limited = is_cwnd_limited; + tp->max_packets_out = tp->packets_out; + tp->cwnd_usage_seq = tp->snd_nxt; } if (tcp_is_cwnd_limited(sk)) { From d9e25dc053f6114d3fb5dca0e52409778e28acbc Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Fri, 30 Sep 2022 13:34:08 +0200 Subject: [PATCH 0134/1477] spi: Ensure that sg_table won't be used after being freed [ Upstream commit 8e9204cddcc3fea9affcfa411715ba4f66e97587 ] SPI code checks for non-zero sgt->orig_nents to determine if the buffer has been DMA-mapped. Ensure that sg_table is really zeroed after free to avoid potential NULL pointer dereference if the given SPI xfer object is reused again without being DMA-mapped. Fixes: 0c17ba73c08f ("spi: Fix cache corruption due to DMA/PIO overlap") Signed-off-by: Marek Szyprowski Link: https://lore.kernel.org/r/20220930113408.19720-1-m.szyprowski@samsung.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 6ea7b286c80c..857a1399850c 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -946,6 +946,8 @@ void spi_unmap_buf(struct spi_controller *ctlr, struct device *dev, if (sgt->orig_nents) { dma_unmap_sg(dev, sgt->sgl, sgt->orig_nents, dir); sg_free_table(sgt); + sgt->orig_nents = 0; + sgt->nents = 0; } } From da349221c4d2d4ac5f606c1c3b36d4ef0b3e6a0c Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Thu, 29 Sep 2022 00:25:37 +0900 Subject: [PATCH 0135/1477] net: rds: don't hold sock lock when cancelling work from rds_tcp_reset_callbacks() [ Upstream commit a91b750fd6629354460282bbf5146c01b05c4859 ] syzbot is reporting lockdep warning at rds_tcp_reset_callbacks() [1], for commit ac3615e7f3cffe2a ("RDS: TCP: Reduce code duplication in rds_tcp_reset_callbacks()") added cancel_delayed_work_sync() into a section protected by lock_sock() without realizing that rds_send_xmit() might call lock_sock(). We don't need to protect cancel_delayed_work_sync() using lock_sock(), for even if rds_{send,recv}_worker() re-queued this work while __flush_work() from cancel_delayed_work_sync() was waiting for this work to complete, retried rds_{send,recv}_worker() is no-op due to the absence of RDS_CONN_UP bit. Link: https://syzkaller.appspot.com/bug?extid=78c55c7bc6f66e53dce2 [1] Reported-by: syzbot Co-developed-by: Hillf Danton Signed-off-by: Hillf Danton Signed-off-by: Tetsuo Handa Tested-by: syzbot Fixes: ac3615e7f3cffe2a ("RDS: TCP: Reduce code duplication in rds_tcp_reset_callbacks()") Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/rds/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 5327d130c4b5..b560d06e6d96 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -166,10 +166,10 @@ void rds_tcp_reset_callbacks(struct socket *sock, */ atomic_set(&cp->cp_state, RDS_CONN_RESETTING); wait_event(cp->cp_waitq, !test_bit(RDS_IN_XMIT, &cp->cp_flags)); - lock_sock(osock->sk); /* reset receive side state for rds_tcp_data_recv() for osock */ cancel_delayed_work_sync(&cp->cp_send_w); cancel_delayed_work_sync(&cp->cp_recv_w); + lock_sock(osock->sk); if (tc->t_tinc) { rds_inc_put(&tc->t_tinc->ti_inc); tc->t_tinc = NULL; From 6cc0e2afc6a137d45b9523f61a1b1b16a68c9dc0 Mon Sep 17 00:00:00 2001 From: Jianglei Nie Date: Fri, 30 Sep 2022 14:28:43 +0800 Subject: [PATCH 0136/1477] bnx2x: fix potential memory leak in bnx2x_tpa_stop() [ Upstream commit b43f9acbb8942b05252be83ac25a81cec70cc192 ] bnx2x_tpa_stop() allocates a memory chunk from new_data with bnx2x_frag_alloc(). The new_data should be freed when gets some error. But when "pad + len > fp->rx_buf_size" is true, bnx2x_tpa_stop() returns without releasing the new_data, which will lead to a memory leak. We should free the new_data with bnx2x_frag_free() when "pad + len > fp->rx_buf_size" is true. Fixes: 07b0f00964def8af9321cfd6c4a7e84f6362f728 ("bnx2x: fix possible panic under memory stress") Signed-off-by: Jianglei Nie Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 198e041d8410..4f669e7c7558 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -788,6 +788,7 @@ static void bnx2x_tpa_stop(struct bnx2x *bp, struct bnx2x_fastpath *fp, BNX2X_ERR("skb_put is about to fail... pad %d len %d rx_buf_size %d\n", pad, len, fp->rx_buf_size); bnx2x_panic(); + bnx2x_frag_free(fp, new_data); return; } #endif From 67cb80a9d2c83edac0e42aaa91ed4dd527cec284 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sun, 2 Oct 2022 01:43:44 +0900 Subject: [PATCH 0137/1477] net/ieee802154: reject zero-sized raw_sendmsg() [ Upstream commit 3a4d061c699bd3eedc80dc97a4b2a2e1af83c6f5 ] syzbot is hitting skb_assert_len() warning at raw_sendmsg() for ieee802154 socket. What commit dc633700f00f726e ("net/af_packet: check len when min_header_len equals to 0") does also applies to ieee802154 socket. Link: https://syzkaller.appspot.com/bug?extid=5ea725c25d06fb9114c4 Reported-by: syzbot Fixes: fd1894224407c484 ("bpf: Don't redirect packets with invalid pkt_len") Signed-off-by: Tetsuo Handa Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ieee802154/socket.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index 7edec210780a..d4c162d63634 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -252,6 +252,9 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) return -EOPNOTSUPP; } + if (!size) + return -EINVAL; + lock_sock(sk); if (!sk->sk_bound_dev_if) dev = dev_getfirstbyhwtype(sock_net(sk), ARPHRD_IEEE802154); From 6cb54f21623d55024b1264184f6e7bf6d613dcde Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 1 Oct 2022 13:51:02 -0700 Subject: [PATCH 0138/1477] once: add DO_ONCE_SLOW() for sleepable contexts [ Upstream commit 62c07983bef9d3e78e71189441e1a470f0d1e653 ] Christophe Leroy reported a ~80ms latency spike happening at first TCP connect() time. This is because __inet_hash_connect() uses get_random_once() to populate a perturbation table which became quite big after commit 4c2c8f03a5ab ("tcp: increase source port perturb table to 2^16") get_random_once() uses DO_ONCE(), which block hard irqs for the duration of the operation. This patch adds DO_ONCE_SLOW() which uses a mutex instead of a spinlock for operations where we prefer to stay in process context. Then __inet_hash_connect() can use get_random_slow_once() to populate its perturbation table. Fixes: 4c2c8f03a5ab ("tcp: increase source port perturb table to 2^16") Fixes: 190cc82489f4 ("tcp: change source port randomizarion at connect() time") Reported-by: Christophe Leroy Link: https://lore.kernel.org/netdev/CANn89iLAEYBaoYajy0Y9UmGFff5GPxDUoG-ErVB2jDdRNQ5Tug@mail.gmail.com/T/#t Signed-off-by: Eric Dumazet Cc: Willy Tarreau Tested-by: Christophe Leroy Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/linux/once.h | 28 ++++++++++++++++++++++++++++ lib/once.c | 30 ++++++++++++++++++++++++++++++ net/ipv4/inet_hashtables.c | 4 ++-- 3 files changed, 60 insertions(+), 2 deletions(-) diff --git a/include/linux/once.h b/include/linux/once.h index ae6f4eb41cbe..bb58e1c3aa03 100644 --- a/include/linux/once.h +++ b/include/linux/once.h @@ -5,10 +5,18 @@ #include #include +/* Helpers used from arbitrary contexts. + * Hard irqs are blocked, be cautious. + */ bool __do_once_start(bool *done, unsigned long *flags); void __do_once_done(bool *done, struct static_key_true *once_key, unsigned long *flags, struct module *mod); +/* Variant for process contexts only. */ +bool __do_once_slow_start(bool *done); +void __do_once_slow_done(bool *done, struct static_key_true *once_key, + struct module *mod); + /* Call a function exactly once. The idea of DO_ONCE() is to perform * a function call such as initialization of random seeds, etc, only * once, where DO_ONCE() can live in the fast-path. After @func has @@ -52,9 +60,29 @@ void __do_once_done(bool *done, struct static_key_true *once_key, ___ret; \ }) +/* Variant of DO_ONCE() for process/sleepable contexts. */ +#define DO_ONCE_SLOW(func, ...) \ + ({ \ + bool ___ret = false; \ + static bool __section(".data.once") ___done = false; \ + static DEFINE_STATIC_KEY_TRUE(___once_key); \ + if (static_branch_unlikely(&___once_key)) { \ + ___ret = __do_once_slow_start(&___done); \ + if (unlikely(___ret)) { \ + func(__VA_ARGS__); \ + __do_once_slow_done(&___done, &___once_key, \ + THIS_MODULE); \ + } \ + } \ + ___ret; \ + }) + #define get_random_once(buf, nbytes) \ DO_ONCE(get_random_bytes, (buf), (nbytes)) #define get_random_once_wait(buf, nbytes) \ DO_ONCE(get_random_bytes_wait, (buf), (nbytes)) \ +#define get_random_slow_once(buf, nbytes) \ + DO_ONCE_SLOW(get_random_bytes, (buf), (nbytes)) + #endif /* _LINUX_ONCE_H */ diff --git a/lib/once.c b/lib/once.c index 59149bf3bfb4..351f66aad310 100644 --- a/lib/once.c +++ b/lib/once.c @@ -66,3 +66,33 @@ void __do_once_done(bool *done, struct static_key_true *once_key, once_disable_jump(once_key, mod); } EXPORT_SYMBOL(__do_once_done); + +static DEFINE_MUTEX(once_mutex); + +bool __do_once_slow_start(bool *done) + __acquires(once_mutex) +{ + mutex_lock(&once_mutex); + if (*done) { + mutex_unlock(&once_mutex); + /* Keep sparse happy by restoring an even lock count on + * this mutex. In case we return here, we don't call into + * __do_once_done but return early in the DO_ONCE_SLOW() macro. + */ + __acquire(once_mutex); + return false; + } + + return true; +} +EXPORT_SYMBOL(__do_once_slow_start); + +void __do_once_slow_done(bool *done, struct static_key_true *once_key, + struct module *mod) + __releases(once_mutex) +{ + *done = true; + mutex_unlock(&once_mutex); + once_disable_jump(once_key, mod); +} +EXPORT_SYMBOL(__do_once_slow_done); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index feb7f072f2b2..c0de655fffd7 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -771,8 +771,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, if (likely(remaining > 1)) remaining &= ~1U; - net_get_random_once(table_perturb, - INET_TABLE_PERTURB_SIZE * sizeof(*table_perturb)); + get_random_slow_once(table_perturb, + INET_TABLE_PERTURB_SIZE * sizeof(*table_perturb)); index = port_offset & (INET_TABLE_PERTURB_SIZE - 1); offset = READ_ONCE(table_perturb[index]) + (port_offset >> 32); From 29f50bcf0f8b9e49c3c9b0e08fcae2ec3a88cc9f Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 3 Oct 2022 17:19:27 +0100 Subject: [PATCH 0139/1477] net: mvpp2: fix mvpp2 debugfs leak [ Upstream commit 0152dfee235e87660f52a117fc9f70dc55956bb4 ] When mvpp2 is unloaded, the driver specific debugfs directory is not removed, which technically leads to a memory leak. However, this directory is only created when the first device is probed, so the hardware is present. Removing the module is only something a developer would to when e.g. testing out changes, so the module would be reloaded. So this memory leak is minor. The original attempt in commit fe2c9c61f668 ("net: mvpp2: debugfs: fix memory leak when using debugfs_lookup()") that was labelled as a memory leak fix was not, it fixed a refcount leak, but in doing so created a problem when the module is reloaded - the directory already exists, but mvpp2_root is NULL, so we lose all debugfs entries. This fix has been reverted. This is the alternative fix, where we remove the offending directory whenever the driver is unloaded. Fixes: 21da57a23125 ("net: mvpp2: add a debugfs interface for the Header Parser") Signed-off-by: Russell King (Oracle) Reviewed-by: Greg Kroah-Hartman Reviewed-by: Marcin Wojtas Link: https://lore.kernel.org/r/E1ofOAB-00CzkG-UO@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/mvpp2/mvpp2.h | 1 + drivers/net/ethernet/marvell/mvpp2/mvpp2_debugfs.c | 10 ++++++++-- drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 13 ++++++++++++- 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h index d825eb021b22..e999ac2de34e 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h @@ -1434,6 +1434,7 @@ u32 mvpp2_read(struct mvpp2 *priv, u32 offset); void mvpp2_dbgfs_init(struct mvpp2 *priv, const char *name); void mvpp2_dbgfs_cleanup(struct mvpp2 *priv); +void mvpp2_dbgfs_exit(void); #ifdef CONFIG_MVPP2_PTP int mvpp22_tai_probe(struct device *dev, struct mvpp2 *priv); diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_debugfs.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_debugfs.c index 4a3baa7e0142..75e83ea2a926 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_debugfs.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_debugfs.c @@ -691,6 +691,13 @@ static int mvpp2_dbgfs_port_init(struct dentry *parent, return 0; } +static struct dentry *mvpp2_root; + +void mvpp2_dbgfs_exit(void) +{ + debugfs_remove(mvpp2_root); +} + void mvpp2_dbgfs_cleanup(struct mvpp2 *priv) { debugfs_remove_recursive(priv->dbgfs_dir); @@ -700,10 +707,9 @@ void mvpp2_dbgfs_cleanup(struct mvpp2 *priv) void mvpp2_dbgfs_init(struct mvpp2 *priv, const char *name) { - struct dentry *mvpp2_dir, *mvpp2_root; + struct dentry *mvpp2_dir; int ret, i; - mvpp2_root = debugfs_lookup(MVPP2_DRIVER_NAME, NULL); if (!mvpp2_root) mvpp2_root = debugfs_create_dir(MVPP2_DRIVER_NAME, NULL); diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 542cd6f2c9bd..68c5ed8716c8 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -7155,7 +7155,18 @@ static struct platform_driver mvpp2_driver = { }, }; -module_platform_driver(mvpp2_driver); +static int __init mvpp2_driver_init(void) +{ + return platform_driver_register(&mvpp2_driver); +} +module_init(mvpp2_driver_init); + +static void __exit mvpp2_driver_exit(void) +{ + platform_driver_unregister(&mvpp2_driver); + mvpp2_dbgfs_exit(); +} +module_exit(mvpp2_driver_exit); MODULE_DESCRIPTION("Marvell PPv2 Ethernet Driver - www.marvell.com"); MODULE_AUTHOR("Marcin Wojtas "); From 7839f2b3495be9d3dbc9e4a6a166ae8d6d15be68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alvin=20=C5=A0ipraga?= Date: Sun, 12 Jun 2022 16:48:53 +0200 Subject: [PATCH 0140/1477] drm: bridge: adv7511: fix CEC power down control register offset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 1d22b6033ea113a4c3850dfa2c0770885c81aec8 ] The ADV7511_REG_CEC_CTRL = 0xE2 register is part of the main register map - not the CEC register map. As such, we shouldn't apply an offset to the register address. Doing so will cause us to address a bogus register for chips with a CEC register map offset (e.g. ADV7533). Fixes: 3b1b975003e4 ("drm: adv7511/33: add HDMI CEC support") Signed-off-by: Alvin Šipraga Reviewed-by: Robert Foss Signed-off-by: Robert Foss Link: https://patchwork.freedesktop.org/patch/msgid/20220612144854.2223873-2-alvin@pqrs.dk Signed-off-by: Sasha Levin --- drivers/gpu/drm/bridge/adv7511/adv7511.h | 5 +---- drivers/gpu/drm/bridge/adv7511/adv7511_cec.c | 4 ++-- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511.h b/drivers/gpu/drm/bridge/adv7511/adv7511.h index a0f6ee15c248..711061bf3eb7 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511.h +++ b/drivers/gpu/drm/bridge/adv7511/adv7511.h @@ -386,10 +386,7 @@ void adv7511_cec_irq_process(struct adv7511 *adv7511, unsigned int irq1); #else static inline int adv7511_cec_init(struct device *dev, struct adv7511 *adv7511) { - unsigned int offset = adv7511->type == ADV7533 ? - ADV7533_REG_CEC_OFFSET : 0; - - regmap_write(adv7511->regmap, ADV7511_REG_CEC_CTRL + offset, + regmap_write(adv7511->regmap, ADV7511_REG_CEC_CTRL, ADV7511_CEC_CTRL_POWER_DOWN); return 0; } diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_cec.c b/drivers/gpu/drm/bridge/adv7511/adv7511_cec.c index a20a45c0b353..ddd1305b82b2 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511_cec.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7511_cec.c @@ -316,7 +316,7 @@ int adv7511_cec_init(struct device *dev, struct adv7511 *adv7511) goto err_cec_alloc; } - regmap_write(adv7511->regmap, ADV7511_REG_CEC_CTRL + offset, 0); + regmap_write(adv7511->regmap, ADV7511_REG_CEC_CTRL, 0); /* cec soft reset */ regmap_write(adv7511->regmap_cec, ADV7511_REG_CEC_SOFT_RESET + offset, 0x01); @@ -343,7 +343,7 @@ err_cec_alloc: dev_info(dev, "Initializing CEC failed with error %d, disabling CEC\n", ret); err_cec_parse_dt: - regmap_write(adv7511->regmap, ADV7511_REG_CEC_CTRL + offset, + regmap_write(adv7511->regmap, ADV7511_REG_CEC_CTRL, ADV7511_CEC_CTRL_POWER_DOWN); return ret == -EPROBE_DEFER ? ret : 0; } From 050b6505074180e8404be9dc0d182ebeaea75cc1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 4 Jul 2022 13:55:40 +0300 Subject: [PATCH 0141/1477] drm/bridge: Avoid uninitialized variable warning [ Upstream commit 7d1202738efda60155d98b370b3c70d336be0eea ] This code works, but technically it uses "num_in_bus_fmts" before it has been initialized so it leads to static checker warnings and probably KMEMsan warnings at run time. Initialize the variable to zero to silence the warning. Fixes: f32df58acc68 ("drm/bridge: Add the necessary bits to support bus format negotiation") Signed-off-by: Dan Carpenter Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/YrrIs3hoGcPVmXc5@kili Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_bridge.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_bridge.c b/drivers/gpu/drm/drm_bridge.c index 044acd07c153..d799ec14fd7f 100644 --- a/drivers/gpu/drm/drm_bridge.c +++ b/drivers/gpu/drm/drm_bridge.c @@ -753,8 +753,8 @@ static int select_bus_fmt_recursive(struct drm_bridge *first_bridge, struct drm_connector_state *conn_state, u32 out_bus_fmt) { + unsigned int i, num_in_bus_fmts = 0; struct drm_bridge_state *cur_state; - unsigned int num_in_bus_fmts, i; struct drm_bridge *prev_bridge; u32 *in_bus_fmts; int ret; @@ -875,7 +875,7 @@ drm_atomic_bridge_chain_select_bus_fmts(struct drm_bridge *bridge, struct drm_connector *conn = conn_state->connector; struct drm_encoder *encoder = bridge->encoder; struct drm_bridge_state *last_bridge_state; - unsigned int i, num_out_bus_fmts; + unsigned int i, num_out_bus_fmts = 0; struct drm_bridge *last_bridge; u32 *out_bus_fmts; int ret = 0; From 45120fa5e522d444e3fc1c5a9afc5d53eed91d00 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 11 Jul 2022 19:38:31 +0200 Subject: [PATCH 0142/1477] drm/mipi-dsi: Detach devices when removing the host [ Upstream commit 668a8f17b5290d04ef7343636a5588a0692731a1 ] Whenever the MIPI-DSI host is unregistered, the code of mipi_dsi_host_unregister() loops over every device currently found on that bus and will unregister it. However, it doesn't detach it from the bus first, which leads to all kind of resource leaks if the host wants to perform some clean up whenever a device is detached. Fixes: 068a00233969 ("drm: Add MIPI DSI bus support") Acked-by: Thomas Zimmermann Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20220711173939.1132294-2-maxime@cerno.tech Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_mipi_dsi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/drm_mipi_dsi.c b/drivers/gpu/drm/drm_mipi_dsi.c index 5dd475e82995..2c43d54766f3 100644 --- a/drivers/gpu/drm/drm_mipi_dsi.c +++ b/drivers/gpu/drm/drm_mipi_dsi.c @@ -300,6 +300,7 @@ static int mipi_dsi_remove_device_fn(struct device *dev, void *priv) { struct mipi_dsi_device *dsi = to_mipi_dsi_device(dev); + mipi_dsi_detach(dsi); mipi_dsi_device_unregister(dsi); return 0; From 3f5889fd65004e84f3b9806bff1ddd0b90aede2d Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Thu, 21 Jul 2022 17:22:58 +0800 Subject: [PATCH 0143/1477] drm/bridge: parade-ps8640: Fix regulator supply order [ Upstream commit fc94224c2e0ae8d83ac511a3ef4962178505469d ] The datasheet says that VDD12 must be enabled and at full voltage before VDD33 is enabled. Reorder the bulk regulator supply names so that VDD12 is enabled before VDD33. Any enable ramp delays should be handled by setting proper constraints on the regulators. Fixes: bc1aee7fc8f0 ("drm/bridge: Add I2C based driver for ps8640 bridge") Signed-off-by: Chen-Yu Tsai Reviewed-by: Neil Armstrong Signed-off-by: Robert Foss Link: https://patchwork.freedesktop.org/patch/msgid/20220721092258.3397461-1-wenst@chromium.org Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/bridge/parade-ps8640.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/bridge/parade-ps8640.c b/drivers/gpu/drm/bridge/parade-ps8640.c index 7bd0affa057a..924851010400 100644 --- a/drivers/gpu/drm/bridge/parade-ps8640.c +++ b/drivers/gpu/drm/bridge/parade-ps8640.c @@ -333,8 +333,8 @@ static int ps8640_probe(struct i2c_client *client) if (IS_ERR(ps_bridge->panel_bridge)) return PTR_ERR(ps_bridge->panel_bridge); - ps_bridge->supplies[0].supply = "vdd33"; - ps_bridge->supplies[1].supply = "vdd12"; + ps_bridge->supplies[0].supply = "vdd12"; + ps_bridge->supplies[1].supply = "vdd33"; ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(ps_bridge->supplies), ps_bridge->supplies); if (ret) From ad06d6bed5f265ade68123dfd35400a025b29011 Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Thu, 10 Feb 2022 15:40:25 +0000 Subject: [PATCH 0144/1477] drm/dp_mst: fix drm_dp_dpcd_read return value checks [ Upstream commit 2ac6cdd581f48c8f68747156fde5868486a44985 ] drm_dp_dpcd_read returns the number of bytes read. The previous code would print garbage on DPCD error, and would exit with on error on success. Signed-off-by: Simon Ser Fixes: cb897542c6d2 ("drm/dp_mst: Fix W=1 warnings") Cc: Lyude Paul Cc: Benjamin Gaignard Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/473500/ Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_dp_mst_topology.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index ab423b0413ee..4272cd3622f8 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -4856,14 +4856,14 @@ void drm_dp_mst_dump_topology(struct seq_file *m, seq_printf(m, "dpcd: %*ph\n", DP_RECEIVER_CAP_SIZE, buf); ret = drm_dp_dpcd_read(mgr->aux, DP_FAUX_CAP, buf, 2); - if (ret) { + if (ret != 2) { seq_printf(m, "faux/mst read failed\n"); goto out; } seq_printf(m, "faux/mst: %*ph\n", 2, buf); ret = drm_dp_dpcd_read(mgr->aux, DP_MSTM_CTRL, buf, 1); - if (ret) { + if (ret != 1) { seq_printf(m, "mst ctrl read failed\n"); goto out; } @@ -4871,7 +4871,7 @@ void drm_dp_mst_dump_topology(struct seq_file *m, /* dump the standard OUI branch header */ ret = drm_dp_dpcd_read(mgr->aux, DP_BRANCH_OUI, buf, DP_BRANCH_OUI_HEADER_SIZE); - if (ret) { + if (ret != DP_BRANCH_OUI_HEADER_SIZE) { seq_printf(m, "branch oui read failed\n"); goto out; } From 5e25bfcd12d8e90324e27b1adc1fc7410d3664ee Mon Sep 17 00:00:00 2001 From: Liang He Date: Mon, 11 Jul 2022 21:15:50 +0800 Subject: [PATCH 0145/1477] drm:pl111: Add of_node_put() when breaking out of for_each_available_child_of_node() [ Upstream commit e0686dc6f2252e009c455fe99e2ce9d62a60eb47 ] The reference 'child' in the iteration of for_each_available_child_of_node() is only escaped out into a local variable which is only used to check its value. So we still need to the of_node_put() when breaking of the for_each_available_child_of_node() which will automatically increase and decrease the refcount. Fixes: ca454bd42dc2 ("drm/pl111: Support the Versatile Express") Signed-off-by: Liang He Reviewed-by: Rob Herring Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220711131550.361350-1-windhl@126.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/pl111/pl111_versatile.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/pl111/pl111_versatile.c b/drivers/gpu/drm/pl111/pl111_versatile.c index bdd883f4f0da..963a5d5e6987 100644 --- a/drivers/gpu/drm/pl111/pl111_versatile.c +++ b/drivers/gpu/drm/pl111/pl111_versatile.c @@ -402,6 +402,7 @@ static int pl111_vexpress_clcd_init(struct device *dev, struct device_node *np, if (of_device_is_compatible(child, "arm,pl111")) { has_coretile_clcd = true; ct_clcd = child; + of_node_put(child); break; } if (of_device_is_compatible(child, "arm,hdlcd")) { From 84da5cdf43d24768c53984fa05cea60c195b030f Mon Sep 17 00:00:00 2001 From: Rustam Subkhankulov Date: Sun, 14 Aug 2022 01:08:43 +0300 Subject: [PATCH 0146/1477] platform/chrome: fix double-free in chromeos_laptop_prepare() [ Upstream commit 6ad4194d6a1e1d11b285989cd648ef695b4a93c0 ] If chromeos_laptop_prepare_i2c_peripherals() fails after allocating memory for 'cros_laptop->i2c_peripherals', this memory is freed at 'err_out' label and nonzero value is returned. Then chromeos_laptop_destroy() is called, resulting in double-free error. Found by Linux Verification Center (linuxtesting.org) with SVACE. Signed-off-by: Rustam Subkhankulov Fixes: 5020cd29d8bf ("platform/chrome: chromeos_laptop - supply properties for ACPI devices") Reviewed-by: Dmitry Torokhov Signed-off-by: Tzung-Bi Shih Link: https://lore.kernel.org/r/20220813220843.2373004-1-subkhankulov@ispras.ru Signed-off-by: Sasha Levin --- drivers/platform/chrome/chromeos_laptop.c | 24 ++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/platform/chrome/chromeos_laptop.c b/drivers/platform/chrome/chromeos_laptop.c index 472a03daa869..109c191d35cf 100644 --- a/drivers/platform/chrome/chromeos_laptop.c +++ b/drivers/platform/chrome/chromeos_laptop.c @@ -718,6 +718,7 @@ static int __init chromeos_laptop_prepare_i2c_peripherals(struct chromeos_laptop *cros_laptop, const struct chromeos_laptop *src) { + struct i2c_peripheral *i2c_peripherals; struct i2c_peripheral *i2c_dev; struct i2c_board_info *info; int i; @@ -726,17 +727,15 @@ chromeos_laptop_prepare_i2c_peripherals(struct chromeos_laptop *cros_laptop, if (!src->num_i2c_peripherals) return 0; - cros_laptop->i2c_peripherals = kmemdup(src->i2c_peripherals, - src->num_i2c_peripherals * - sizeof(*src->i2c_peripherals), - GFP_KERNEL); - if (!cros_laptop->i2c_peripherals) + i2c_peripherals = kmemdup(src->i2c_peripherals, + src->num_i2c_peripherals * + sizeof(*src->i2c_peripherals), + GFP_KERNEL); + if (!i2c_peripherals) return -ENOMEM; - cros_laptop->num_i2c_peripherals = src->num_i2c_peripherals; - - for (i = 0; i < cros_laptop->num_i2c_peripherals; i++) { - i2c_dev = &cros_laptop->i2c_peripherals[i]; + for (i = 0; i < src->num_i2c_peripherals; i++) { + i2c_dev = &i2c_peripherals[i]; info = &i2c_dev->board_info; error = chromeos_laptop_setup_irq(i2c_dev); @@ -754,16 +753,19 @@ chromeos_laptop_prepare_i2c_peripherals(struct chromeos_laptop *cros_laptop, } } + cros_laptop->i2c_peripherals = i2c_peripherals; + cros_laptop->num_i2c_peripherals = src->num_i2c_peripherals; + return 0; err_out: while (--i >= 0) { - i2c_dev = &cros_laptop->i2c_peripherals[i]; + i2c_dev = &i2c_peripherals[i]; info = &i2c_dev->board_info; if (info->properties) property_entries_free(info->properties); } - kfree(cros_laptop->i2c_peripherals); + kfree(i2c_peripherals); return error; } From 868fc93b615b9f6c2b0b1894536618fa6cd66acc Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 19 Aug 2022 08:20:36 +0300 Subject: [PATCH 0147/1477] platform/chrome: fix memory corruption in ioctl [ Upstream commit 8a07b45fd3c2dda24fad43639be5335a4595196a ] If "s_mem.bytes" is larger than the buffer size it leads to memory corruption. Fixes: eda2e30c6684 ("mfd / platform: cros_ec: Miscellaneous character device to talk with the EC") Signed-off-by: Dan Carpenter Reviewed-by: Guenter Roeck Signed-off-by: Tzung-Bi Shih Link: https://lore.kernel.org/r/Yv8dpCFZJdbUT5ye@kili Signed-off-by: Sasha Levin --- drivers/platform/chrome/cros_ec_chardev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/platform/chrome/cros_ec_chardev.c b/drivers/platform/chrome/cros_ec_chardev.c index fd33de546aee..0de7c255254e 100644 --- a/drivers/platform/chrome/cros_ec_chardev.c +++ b/drivers/platform/chrome/cros_ec_chardev.c @@ -327,6 +327,9 @@ static long cros_ec_chardev_ioctl_readmem(struct cros_ec_dev *ec, if (copy_from_user(&s_mem, arg, sizeof(s_mem))) return -EFAULT; + if (s_mem.bytes > sizeof(s_mem.buffer)) + return -EINVAL; + num = ec_dev->cmd_readmem(ec_dev, s_mem.offset, s_mem.bytes, s_mem.buffer); if (num <= 0) From c2c6022e100426e8fc9276e9ebb203687ece5807 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Povi=C5=A1er?= Date: Thu, 25 Aug 2022 16:02:37 +0200 Subject: [PATCH 0148/1477] ASoC: tas2764: Allow mono streams MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 23204d928a27146d13e11c9383632775345ecca8 ] The part is a mono speaker amp, but it can do downmix and switch between left and right channel, so the right channel range is 1 to 2. (This mirrors commit bf54d97a835d ("ASoC: tas2770: Allow mono streams") which was a fix to the tas2770 driver.) Fixes: 827ed8a0fa50 ("ASoC: tas2764: Add the driver for the TAS2764") Signed-off-by: Martin Povišer Link: https://lore.kernel.org/r/20220825140241.53963-2-povik+lin@cutebit.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/tas2764.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/tas2764.c b/sound/soc/codecs/tas2764.c index 37588804a6b5..bde92f080459 100644 --- a/sound/soc/codecs/tas2764.c +++ b/sound/soc/codecs/tas2764.c @@ -485,7 +485,7 @@ static struct snd_soc_dai_driver tas2764_dai_driver[] = { .id = 0, .playback = { .stream_name = "ASI1 Playback", - .channels_min = 2, + .channels_min = 1, .channels_max = 2, .rates = TAS2764_RATES, .formats = TAS2764_FORMATS, From 2e6b64df54cde7b39ccb161bf5e2485a1a9a4a35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Povi=C5=A1er?= Date: Thu, 25 Aug 2022 16:02:38 +0200 Subject: [PATCH 0149/1477] ASoC: tas2764: Drop conflicting set_bias_level power setting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 09273f38832406db19a8907a934687cc10660a6b ] The driver is setting the PWR_CTRL field in both the set_bias_level callback and on DAPM events of the DAC widget (and also in the mute_stream method). Drop the set_bias_level callback altogether as the power setting it does is in conflict with the other code paths. (This mirrors commit c8a6ae3fe1c8 ("ASoC: tas2770: Drop conflicting set_bias_level power setting") which was a fix to the tas2770 driver.) Fixes: 827ed8a0fa50 ("ASoC: tas2764: Add the driver for the TAS2764") Signed-off-by: Martin Povišer Link: https://lore.kernel.org/r/20220825140241.53963-3-povik+lin@cutebit.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/tas2764.c | 33 --------------------------------- 1 file changed, 33 deletions(-) diff --git a/sound/soc/codecs/tas2764.c b/sound/soc/codecs/tas2764.c index bde92f080459..6b6e30b072f2 100644 --- a/sound/soc/codecs/tas2764.c +++ b/sound/soc/codecs/tas2764.c @@ -50,38 +50,6 @@ static void tas2764_reset(struct tas2764_priv *tas2764) usleep_range(1000, 2000); } -static int tas2764_set_bias_level(struct snd_soc_component *component, - enum snd_soc_bias_level level) -{ - struct tas2764_priv *tas2764 = snd_soc_component_get_drvdata(component); - - switch (level) { - case SND_SOC_BIAS_ON: - snd_soc_component_update_bits(component, TAS2764_PWR_CTRL, - TAS2764_PWR_CTRL_MASK, - TAS2764_PWR_CTRL_ACTIVE); - break; - case SND_SOC_BIAS_STANDBY: - case SND_SOC_BIAS_PREPARE: - snd_soc_component_update_bits(component, TAS2764_PWR_CTRL, - TAS2764_PWR_CTRL_MASK, - TAS2764_PWR_CTRL_MUTE); - break; - case SND_SOC_BIAS_OFF: - snd_soc_component_update_bits(component, TAS2764_PWR_CTRL, - TAS2764_PWR_CTRL_MASK, - TAS2764_PWR_CTRL_SHUTDOWN); - break; - - default: - dev_err(tas2764->dev, - "wrong power level setting %d\n", level); - return -EINVAL; - } - - return 0; -} - #ifdef CONFIG_PM static int tas2764_codec_suspend(struct snd_soc_component *component) { @@ -549,7 +517,6 @@ static const struct snd_soc_component_driver soc_component_driver_tas2764 = { .probe = tas2764_codec_probe, .suspend = tas2764_codec_suspend, .resume = tas2764_codec_resume, - .set_bias_level = tas2764_set_bias_level, .controls = tas2764_snd_controls, .num_controls = ARRAY_SIZE(tas2764_snd_controls), .dapm_widgets = tas2764_dapm_widgets, From b77755f58ede5d6bfefc070d4276971b6d571c1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Povi=C5=A1er?= Date: Thu, 25 Aug 2022 16:02:39 +0200 Subject: [PATCH 0150/1477] ASoC: tas2764: Fix mute/unmute MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit f5ad67f13623548e5aff847f89700c178aaf2a98 ] Because the PWR_CTRL field is modeled as the power state of the DAC widget, and at the same time it is used to implement mute/unmute, we need some additional book-keeping to have the right end result no matter the sequence of calls. Without this fix, one permanently mutes an ongoing stream by toggling the associated speaker pin control. (This mirrors commit 1e5907bcb3a3 ("ASoC: tas2770: Fix handling of mute/unmute") which was a fix to the tas2770 driver.) Fixes: 827ed8a0fa50 ("ASoC: tas2764: Add the driver for the TAS2764") Signed-off-by: Martin Povišer Link: https://lore.kernel.org/r/20220825140241.53963-4-povik+lin@cutebit.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/tas2764.c | 57 +++++++++++++++++++++----------------- 1 file changed, 32 insertions(+), 25 deletions(-) diff --git a/sound/soc/codecs/tas2764.c b/sound/soc/codecs/tas2764.c index 6b6e30b072f2..8b262e7f5275 100644 --- a/sound/soc/codecs/tas2764.c +++ b/sound/soc/codecs/tas2764.c @@ -34,6 +34,9 @@ struct tas2764_priv { int v_sense_slot; int i_sense_slot; + + bool dac_powered; + bool unmuted; }; static void tas2764_reset(struct tas2764_priv *tas2764) @@ -50,6 +53,26 @@ static void tas2764_reset(struct tas2764_priv *tas2764) usleep_range(1000, 2000); } +static int tas2764_update_pwr_ctrl(struct tas2764_priv *tas2764) +{ + struct snd_soc_component *component = tas2764->component; + unsigned int val; + int ret; + + if (tas2764->dac_powered) + val = tas2764->unmuted ? + TAS2764_PWR_CTRL_ACTIVE : TAS2764_PWR_CTRL_MUTE; + else + val = TAS2764_PWR_CTRL_SHUTDOWN; + + ret = snd_soc_component_update_bits(component, TAS2764_PWR_CTRL, + TAS2764_PWR_CTRL_MASK, val); + if (ret < 0) + return ret; + + return 0; +} + #ifdef CONFIG_PM static int tas2764_codec_suspend(struct snd_soc_component *component) { @@ -82,9 +105,7 @@ static int tas2764_codec_resume(struct snd_soc_component *component) usleep_range(1000, 2000); } - ret = snd_soc_component_update_bits(component, TAS2764_PWR_CTRL, - TAS2764_PWR_CTRL_MASK, - TAS2764_PWR_CTRL_ACTIVE); + ret = tas2764_update_pwr_ctrl(tas2764); if (ret < 0) return ret; @@ -118,14 +139,12 @@ static int tas2764_dac_event(struct snd_soc_dapm_widget *w, switch (event) { case SND_SOC_DAPM_POST_PMU: - ret = snd_soc_component_update_bits(component, TAS2764_PWR_CTRL, - TAS2764_PWR_CTRL_MASK, - TAS2764_PWR_CTRL_MUTE); + tas2764->dac_powered = true; + ret = tas2764_update_pwr_ctrl(tas2764); break; case SND_SOC_DAPM_PRE_PMD: - ret = snd_soc_component_update_bits(component, TAS2764_PWR_CTRL, - TAS2764_PWR_CTRL_MASK, - TAS2764_PWR_CTRL_SHUTDOWN); + tas2764->dac_powered = false; + ret = tas2764_update_pwr_ctrl(tas2764); break; default: dev_err(tas2764->dev, "Unsupported event\n"); @@ -170,17 +189,11 @@ static const struct snd_soc_dapm_route tas2764_audio_map[] = { static int tas2764_mute(struct snd_soc_dai *dai, int mute, int direction) { - struct snd_soc_component *component = dai->component; - int ret; + struct tas2764_priv *tas2764 = + snd_soc_component_get_drvdata(dai->component); - ret = snd_soc_component_update_bits(component, TAS2764_PWR_CTRL, - TAS2764_PWR_CTRL_MASK, - mute ? TAS2764_PWR_CTRL_MUTE : 0); - - if (ret < 0) - return ret; - - return 0; + tas2764->unmuted = !mute; + return tas2764_update_pwr_ctrl(tas2764); } static int tas2764_set_bitwidth(struct tas2764_priv *tas2764, int bitwidth) @@ -494,12 +507,6 @@ static int tas2764_codec_probe(struct snd_soc_component *component) if (ret < 0) return ret; - ret = snd_soc_component_update_bits(component, TAS2764_PWR_CTRL, - TAS2764_PWR_CTRL_MASK, - TAS2764_PWR_CTRL_MUTE); - if (ret < 0) - return ret; - return 0; } From c21c08fab716a8f8340bce66391cd99d65b14070 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 25 Aug 2022 16:13:34 +0200 Subject: [PATCH 0151/1477] platform/x86: msi-laptop: Fix old-ec check for backlight registering [ Upstream commit 83ac7a1c2ed5f17caa07cbbc84bad3c05dc3bf22 ] Commit 2cc6c717799f ("msi-laptop: Port to new backlight interface selection API") replaced this check: if (!quirks->old_ec_model || acpi_video_backlight_support()) pr_info("Brightness ignored, ..."); else do_register(); With: if (quirks->old_ec_model || acpi_video_get_backlight_type() == acpi_backlight_vendor) do_register(); But since the do_register() part was part of the else branch, the entire condition should be inverted. So not only the 2 statements on either side of the || should be inverted, but the || itself should be replaced with a &&. In practice this has likely not been an issue because the new-ec models (old_ec_model==false) likely all support ACPI video backlight control, making acpi_video_get_backlight_type() return acpi_backlight_video turning the second part of the || also false when old_ec_model == false. Fixes: 2cc6c717799f ("msi-laptop: Port to new backlight interface selection API") Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20220825141336.208597-1-hdegoede@redhat.com Signed-off-by: Sasha Levin --- drivers/platform/x86/msi-laptop.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/platform/x86/msi-laptop.c b/drivers/platform/x86/msi-laptop.c index 24ffc8e2d2d1..0960205ee49f 100644 --- a/drivers/platform/x86/msi-laptop.c +++ b/drivers/platform/x86/msi-laptop.c @@ -1048,8 +1048,7 @@ static int __init msi_init(void) return -EINVAL; /* Register backlight stuff */ - - if (quirks->old_ec_model || + if (quirks->old_ec_model && acpi_video_get_backlight_type() == acpi_backlight_vendor) { struct backlight_properties props; memset(&props, 0, sizeof(struct backlight_properties)); From 804d8e59f34f298cd198134a9e6887f3bbb863c0 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 25 Aug 2022 16:13:36 +0200 Subject: [PATCH 0152/1477] platform/x86: msi-laptop: Fix resource cleanup [ Upstream commit 5523632aa10f906dfe2eb714ee748590dc7fc6b1 ] Fix the input-device not getting free-ed on probe-errors and fix the msi_touchpad_dwork not getting cancelled on neither probe-errors nor on remove. Fixes: 143a4c0284dc ("msi-laptop: send out touchpad on/off key") Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20220825141336.208597-3-hdegoede@redhat.com Signed-off-by: Sasha Levin --- drivers/platform/x86/msi-laptop.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/platform/x86/msi-laptop.c b/drivers/platform/x86/msi-laptop.c index 0960205ee49f..3e935303b143 100644 --- a/drivers/platform/x86/msi-laptop.c +++ b/drivers/platform/x86/msi-laptop.c @@ -1116,6 +1116,8 @@ fail_create_attr: fail_create_group: if (quirks->load_scm_model) { i8042_remove_filter(msi_laptop_i8042_filter); + cancel_delayed_work_sync(&msi_touchpad_dwork); + input_unregister_device(msi_laptop_input_dev); cancel_delayed_work_sync(&msi_rfkill_dwork); cancel_work_sync(&msi_rfkill_work); rfkill_cleanup(); @@ -1136,6 +1138,7 @@ static void __exit msi_cleanup(void) { if (quirks->load_scm_model) { i8042_remove_filter(msi_laptop_i8042_filter); + cancel_delayed_work_sync(&msi_touchpad_dwork); input_unregister_device(msi_laptop_input_dev); cancel_delayed_work_sync(&msi_rfkill_dwork); cancel_work_sync(&msi_rfkill_work); From c577b4e9722711ee4c93f738c12da32da7e36085 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 22 Aug 2022 17:42:43 -0700 Subject: [PATCH 0153/1477] drm: fix drm_mipi_dbi build errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit eb7de496451bd969e203f02f66585131228ba4ae ] drm_mipi_dbi needs lots of DRM_KMS_HELPER support, so select that Kconfig symbol like it is done is most other uses, and the way that it was before MIPS_DBI was moved from tinydrm to its core location. Fixes these build errors: ld: drivers/gpu/drm/drm_mipi_dbi.o: in function `mipi_dbi_buf_copy': drivers/gpu/drm/drm_mipi_dbi.c:205: undefined reference to `drm_gem_fb_get_obj' ld: drivers/gpu/drm/drm_mipi_dbi.c:211: undefined reference to `drm_gem_fb_begin_cpu_access' ld: drivers/gpu/drm/drm_mipi_dbi.c:215: undefined reference to `drm_gem_fb_vmap' ld: drivers/gpu/drm/drm_mipi_dbi.c:222: undefined reference to `drm_fb_swab' ld: drivers/gpu/drm/drm_mipi_dbi.c:224: undefined reference to `drm_fb_memcpy' ld: drivers/gpu/drm/drm_mipi_dbi.c:227: undefined reference to `drm_fb_xrgb8888_to_rgb565' ld: drivers/gpu/drm/drm_mipi_dbi.c:235: undefined reference to `drm_gem_fb_vunmap' ld: drivers/gpu/drm/drm_mipi_dbi.c:237: undefined reference to `drm_gem_fb_end_cpu_access' ld: drivers/gpu/drm/drm_mipi_dbi.o: in function `mipi_dbi_dev_init_with_formats': ld: drivers/gpu/drm/drm_mipi_dbi.o:/X64/../drivers/gpu/drm/drm_mipi_dbi.c:469: undefined reference to `drm_gem_fb_create_with_dirty' Fixes: 174102f4de23 ("drm/tinydrm: Move mipi-dbi") Signed-off-by: Randy Dunlap Reported-by: kernel test robot Cc: Dillon Min Cc: Linus Walleij Cc: Sam Ravnborg Cc: Noralf Trønnes Cc: Thomas Zimmermann Cc: Thierry Reding Cc: dri-devel@lists.freedesktop.org Cc: David Airlie Cc: Daniel Vetter Signed-off-by: Linus Walleij Link: https://patchwork.freedesktop.org/patch/msgid/20220823004243.11596-1-rdunlap@infradead.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index ca868271f4c4..4e9b3a95fa7c 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -30,6 +30,7 @@ menuconfig DRM config DRM_MIPI_DBI tristate depends on DRM + select DRM_KMS_HELPER config DRM_MIPI_DSI bool From 877e92e9b1bdeb580b31a46061005936be902cd4 Mon Sep 17 00:00:00 2001 From: Zheyu Ma Date: Tue, 30 Aug 2022 15:34:50 +0800 Subject: [PATCH 0154/1477] drm/bridge: megachips: Fix a null pointer dereference bug [ Upstream commit 1ff673333d46d2c1b053ebd0c1c7c7c79e36943e ] When removing the module we will get the following warning: [ 31.911505] i2c-core: driver [stdp2690-ge-b850v3-fw] unregistered [ 31.912484] general protection fault, probably for non-canonical address 0xdffffc0000000001: 0000 [#1] PREEMPT SMP KASAN PTI [ 31.913338] KASAN: null-ptr-deref in range [0x0000000000000008-0x000000000000000f] [ 31.915280] RIP: 0010:drm_bridge_remove+0x97/0x130 [ 31.921825] Call Trace: [ 31.922533] stdp4028_ge_b850v3_fw_remove+0x34/0x60 [megachips_stdpxxxx_ge_b850v3_fw] [ 31.923139] i2c_device_remove+0x181/0x1f0 The two bridges (stdp2690, stdp4028) do not probe at the same time, so the driver does not call ge_b850v3_resgiter() when probing, causing the driver to try to remove the object that has not been initialized. Fix this by checking whether both the bridges are probed. Fixes: 11632d4aa2b3 ("drm/bridge: megachips: Ensure both bridges are probed before registration") Signed-off-by: Zheyu Ma Signed-off-by: Robert Foss Link: https://patchwork.freedesktop.org/patch/msgid/20220830073450.1897020-1-zheyuma97@gmail.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c b/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c index cce98bf2a4e7..72248a565579 100644 --- a/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c +++ b/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c @@ -296,7 +296,9 @@ static void ge_b850v3_lvds_remove(void) * This check is to avoid both the drivers * removing the bridge in their remove() function */ - if (!ge_b850v3_lvds_ptr) + if (!ge_b850v3_lvds_ptr || + !ge_b850v3_lvds_ptr->stdp2690_i2c || + !ge_b850v3_lvds_ptr->stdp4028_i2c) goto out; drm_bridge_remove(&ge_b850v3_lvds_ptr->bridge); From f0fb0817ebce3d80a1d1bcd4ce7693a7b6744f0b Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Fri, 2 Sep 2022 09:30:30 +0800 Subject: [PATCH 0155/1477] ASoC: rsnd: Add check for rsnd_mod_power_on [ Upstream commit 376be51caf8871419bbcbb755e1e615d30dc3153 ] As rsnd_mod_power_on() can return negative numbers, it should be better to check the return value and deal with the exception. Fixes: e7d850dd10f4 ("ASoC: rsnd: use mod base common method on SSI-parent") Signed-off-by: Jiasheng Jiang Acked-by: Kuninori Morimoto Link: https://lore.kernel.org/r/20220902013030.3691266-1-jiasheng@iscas.ac.cn Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/sh/rcar/ctu.c | 6 +++++- sound/soc/sh/rcar/dvc.c | 6 +++++- sound/soc/sh/rcar/mix.c | 6 +++++- sound/soc/sh/rcar/src.c | 5 ++++- sound/soc/sh/rcar/ssi.c | 4 +++- 5 files changed, 22 insertions(+), 5 deletions(-) diff --git a/sound/soc/sh/rcar/ctu.c b/sound/soc/sh/rcar/ctu.c index 7647b3d4c0ba..25a8cfc27433 100644 --- a/sound/soc/sh/rcar/ctu.c +++ b/sound/soc/sh/rcar/ctu.c @@ -171,7 +171,11 @@ static int rsnd_ctu_init(struct rsnd_mod *mod, struct rsnd_dai_stream *io, struct rsnd_priv *priv) { - rsnd_mod_power_on(mod); + int ret; + + ret = rsnd_mod_power_on(mod); + if (ret < 0) + return ret; rsnd_ctu_activation(mod); diff --git a/sound/soc/sh/rcar/dvc.c b/sound/soc/sh/rcar/dvc.c index 8d91c0eb0880..53b2ad01222b 100644 --- a/sound/soc/sh/rcar/dvc.c +++ b/sound/soc/sh/rcar/dvc.c @@ -186,7 +186,11 @@ static int rsnd_dvc_init(struct rsnd_mod *mod, struct rsnd_dai_stream *io, struct rsnd_priv *priv) { - rsnd_mod_power_on(mod); + int ret; + + ret = rsnd_mod_power_on(mod); + if (ret < 0) + return ret; rsnd_dvc_activation(mod); diff --git a/sound/soc/sh/rcar/mix.c b/sound/soc/sh/rcar/mix.c index a3e0370f5704..c6fe2595c373 100644 --- a/sound/soc/sh/rcar/mix.c +++ b/sound/soc/sh/rcar/mix.c @@ -146,7 +146,11 @@ static int rsnd_mix_init(struct rsnd_mod *mod, struct rsnd_dai_stream *io, struct rsnd_priv *priv) { - rsnd_mod_power_on(mod); + int ret; + + ret = rsnd_mod_power_on(mod); + if (ret < 0) + return ret; rsnd_mix_activation(mod); diff --git a/sound/soc/sh/rcar/src.c b/sound/soc/sh/rcar/src.c index 585ffba0244b..fd52e26a3808 100644 --- a/sound/soc/sh/rcar/src.c +++ b/sound/soc/sh/rcar/src.c @@ -454,11 +454,14 @@ static int rsnd_src_init(struct rsnd_mod *mod, struct rsnd_priv *priv) { struct rsnd_src *src = rsnd_mod_to_src(mod); + int ret; /* reset sync convert_rate */ src->sync.val = 0; - rsnd_mod_power_on(mod); + ret = rsnd_mod_power_on(mod); + if (ret < 0) + return ret; rsnd_src_activation(mod); diff --git a/sound/soc/sh/rcar/ssi.c b/sound/soc/sh/rcar/ssi.c index 042207c11651..2ead44779d46 100644 --- a/sound/soc/sh/rcar/ssi.c +++ b/sound/soc/sh/rcar/ssi.c @@ -518,7 +518,9 @@ static int rsnd_ssi_init(struct rsnd_mod *mod, ssi->usrcnt++; - rsnd_mod_power_on(mod); + ret = rsnd_mod_power_on(mod); + if (ret < 0) + return ret; rsnd_ssi_config_init(mod, io); From cbe37857dda1ceadea4ecc036ab7c6330a00777b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 6 Sep 2022 11:23:06 +0200 Subject: [PATCH 0156/1477] ALSA: hda: beep: Simplify keep-power-at-enable behavior [ Upstream commit 4c8d695cb9bc5f6fd298a586602947b2fc099a64 ] The recent fix for IDT codecs to keep the power up while the beep is enabled can be better integrated into the beep helper code. This patch cleans up the code with refactoring. Fixes: 414d38ba8710 ("ALSA: hda/sigmatel: Keep power up while beep is enabled") Link: https://lore.kernel.org/r/20220906092306.26183-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/hda_beep.c | 15 +++++++++++++-- sound/pci/hda/hda_beep.h | 1 + sound/pci/hda/patch_sigmatel.c | 25 ++----------------------- 3 files changed, 16 insertions(+), 25 deletions(-) diff --git a/sound/pci/hda/hda_beep.c b/sound/pci/hda/hda_beep.c index 53a2b89f8983..e63621bcb214 100644 --- a/sound/pci/hda/hda_beep.c +++ b/sound/pci/hda/hda_beep.c @@ -118,6 +118,12 @@ static int snd_hda_beep_event(struct input_dev *dev, unsigned int type, return 0; } +static void turn_on_beep(struct hda_beep *beep) +{ + if (beep->keep_power_at_enable) + snd_hda_power_up_pm(beep->codec); +} + static void turn_off_beep(struct hda_beep *beep) { cancel_work_sync(&beep->beep_work); @@ -125,6 +131,8 @@ static void turn_off_beep(struct hda_beep *beep) /* turn off beep */ generate_tone(beep, 0); } + if (beep->keep_power_at_enable) + snd_hda_power_down_pm(beep->codec); } /** @@ -140,7 +148,9 @@ int snd_hda_enable_beep_device(struct hda_codec *codec, int enable) enable = !!enable; if (beep->enabled != enable) { beep->enabled = enable; - if (!enable) + if (enable) + turn_on_beep(beep); + else turn_off_beep(beep); return 1; } @@ -167,7 +177,8 @@ static int beep_dev_disconnect(struct snd_device *device) input_unregister_device(beep->dev); else input_free_device(beep->dev); - turn_off_beep(beep); + if (beep->enabled) + turn_off_beep(beep); return 0; } diff --git a/sound/pci/hda/hda_beep.h b/sound/pci/hda/hda_beep.h index a25358a4807a..db76e3ddba65 100644 --- a/sound/pci/hda/hda_beep.h +++ b/sound/pci/hda/hda_beep.h @@ -25,6 +25,7 @@ struct hda_beep { unsigned int enabled:1; unsigned int linear_tone:1; /* linear tone for IDT/STAC codec */ unsigned int playing:1; + unsigned int keep_power_at_enable:1; /* set by driver */ struct work_struct beep_work; /* scheduled task for beep event */ struct mutex mutex; void (*power_hook)(struct hda_beep *beep, bool on); diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index b848e435b93f..6fc0c4e77cd1 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -4308,6 +4308,8 @@ static int stac_parse_auto_config(struct hda_codec *codec) if (codec->beep) { /* IDT/STAC codecs have linear beep tone parameter */ codec->beep->linear_tone = spec->linear_tone_beep; + /* keep power up while beep is enabled */ + codec->beep->keep_power_at_enable = 1; /* if no beep switch is available, make its own one */ caps = query_amp_caps(codec, nid, HDA_OUTPUT); if (!(caps & AC_AMPCAP_MUTE)) { @@ -4448,28 +4450,6 @@ static int stac_suspend(struct hda_codec *codec) stac_shutup(codec); return 0; } - -static int stac_check_power_status(struct hda_codec *codec, hda_nid_t nid) -{ -#ifdef CONFIG_SND_HDA_INPUT_BEEP - struct sigmatel_spec *spec = codec->spec; -#endif - int ret = snd_hda_gen_check_power_status(codec, nid); - -#ifdef CONFIG_SND_HDA_INPUT_BEEP - if (nid == spec->gen.beep_nid && codec->beep) { - if (codec->beep->enabled != spec->beep_power_on) { - spec->beep_power_on = codec->beep->enabled; - if (spec->beep_power_on) - snd_hda_power_up_pm(codec); - else - snd_hda_power_down_pm(codec); - } - ret |= spec->beep_power_on; - } -#endif - return ret; -} #else #define stac_suspend NULL #endif /* CONFIG_PM */ @@ -4482,7 +4462,6 @@ static const struct hda_codec_ops stac_patch_ops = { .unsol_event = snd_hda_jack_unsol_event, #ifdef CONFIG_PM .suspend = stac_suspend, - .check_power_status = stac_check_power_status, #endif .reboot_notify = stac_shutup, }; From 1f340e1c1c74d11c45a6e32663829b26acd4f47b Mon Sep 17 00:00:00 2001 From: Liang He Date: Fri, 22 Jul 2022 22:43:48 +0800 Subject: [PATCH 0157/1477] drm/omap: dss: Fix refcount leak bugs [ Upstream commit 8b42057e62120813ebe9274f508fa785b7cab33a ] In dss_init_ports() and __dss_uninit_ports(), we should call of_node_put() for the reference returned by of_graph_get_port_by_id() in fail path or when it is not used anymore. Fixes: 09bffa6e5192 ("drm: omap: use common OF graph helpers") Signed-off-by: Liang He Signed-off-by: Tomi Valkeinen Link: https://patchwork.freedesktop.org/patch/msgid/20220722144348.1306569-1-windhl@126.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/omapdrm/dss/dss.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/omapdrm/dss/dss.c b/drivers/gpu/drm/omapdrm/dss/dss.c index 6ccbc29c4ce4..d5b3123ed081 100644 --- a/drivers/gpu/drm/omapdrm/dss/dss.c +++ b/drivers/gpu/drm/omapdrm/dss/dss.c @@ -1173,6 +1173,7 @@ static void __dss_uninit_ports(struct dss_device *dss, unsigned int num_ports) default: break; } + of_node_put(port); } } @@ -1205,11 +1206,13 @@ static int dss_init_ports(struct dss_device *dss) default: break; } + of_node_put(port); } return 0; error: + of_node_put(port); __dss_uninit_ports(dss, i); return r; } From ad0b8ed172a15ae6e143b6a5dcea30f4c96f8cbe Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 25 Aug 2022 09:33:57 +0200 Subject: [PATCH 0158/1477] mmc: au1xmmc: Fix an error handling path in au1xmmc_probe() [ Upstream commit 5cbedf52608cc3cbc1c2a9a861fb671620427a20 ] If clk_prepare_enable() fails, there is no point in calling clk_disable_unprepare() in the error handling path. Move the out_clk label at the right place. Fixes: b6507596dfd6 ("MIPS: Alchemy: au1xmmc: use clk framework") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/21d99886d07fa7fcbec74992657dabad98c935c4.1661412818.git.christophe.jaillet@wanadoo.fr Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/mmc/host/au1xmmc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/au1xmmc.c b/drivers/mmc/host/au1xmmc.c index bd00515fbaba..56a3bf51d446 100644 --- a/drivers/mmc/host/au1xmmc.c +++ b/drivers/mmc/host/au1xmmc.c @@ -1097,8 +1097,9 @@ out5: if (host->platdata && host->platdata->cd_setup && !(mmc->caps & MMC_CAP_NEEDS_POLL)) host->platdata->cd_setup(mmc, 0); -out_clk: + clk_disable_unprepare(host->clk); +out_clk: clk_put(host->clk); out_irq: free_irq(host->irq, host); From a9a60d64057245700513a9ed4651fa81ec306e37 Mon Sep 17 00:00:00 2001 From: Liang He Date: Wed, 14 Sep 2022 21:43:54 +0800 Subject: [PATCH 0159/1477] ASoC: eureka-tlv320: Hold reference returned from of_find_xxx API [ Upstream commit bfb735a3ceff0bab6473bac275da96f9b2a06dec ] In eukrea_tlv320_probe(), we need to hold the reference returned from of_find_compatible_node() which has increased the refcount and then call of_node_put() with it when done. Fixes: 66f232908de2 ("ASoC: eukrea-tlv320: Add DT support.") Co-authored-by: Kelin Wang Signed-off-by: Liang He Link: https://lore.kernel.org/r/20220914134354.3995587-1-windhl@126.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/fsl/eukrea-tlv320.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sound/soc/fsl/eukrea-tlv320.c b/sound/soc/fsl/eukrea-tlv320.c index e13271ea84de..29cf9234984d 100644 --- a/sound/soc/fsl/eukrea-tlv320.c +++ b/sound/soc/fsl/eukrea-tlv320.c @@ -86,7 +86,7 @@ static int eukrea_tlv320_probe(struct platform_device *pdev) int ret; int int_port = 0, ext_port; struct device_node *np = pdev->dev.of_node; - struct device_node *ssi_np = NULL, *codec_np = NULL; + struct device_node *ssi_np = NULL, *codec_np = NULL, *tmp_np = NULL; eukrea_tlv320.dev = &pdev->dev; if (np) { @@ -143,7 +143,7 @@ static int eukrea_tlv320_probe(struct platform_device *pdev) } if (machine_is_eukrea_cpuimx27() || - of_find_compatible_node(NULL, NULL, "fsl,imx21-audmux")) { + (tmp_np = of_find_compatible_node(NULL, NULL, "fsl,imx21-audmux"))) { imx_audmux_v1_configure_port(MX27_AUDMUX_HPCR1_SSI0, IMX_AUDMUX_V1_PCR_SYN | IMX_AUDMUX_V1_PCR_TFSDIR | @@ -158,10 +158,11 @@ static int eukrea_tlv320_probe(struct platform_device *pdev) IMX_AUDMUX_V1_PCR_SYN | IMX_AUDMUX_V1_PCR_RXDSEL(MX27_AUDMUX_HPCR1_SSI0) ); + of_node_put(tmp_np); } else if (machine_is_eukrea_cpuimx25sd() || machine_is_eukrea_cpuimx35sd() || machine_is_eukrea_cpuimx51sd() || - of_find_compatible_node(NULL, NULL, "fsl,imx31-audmux")) { + (tmp_np = of_find_compatible_node(NULL, NULL, "fsl,imx31-audmux"))) { if (!np) ext_port = machine_is_eukrea_cpuimx25sd() ? 4 : 3; @@ -178,6 +179,7 @@ static int eukrea_tlv320_probe(struct platform_device *pdev) IMX_AUDMUX_V2_PTCR_SYN, IMX_AUDMUX_V2_PDCR_RXDSEL(int_port) ); + of_node_put(tmp_np); } else { if (np) { /* The eukrea,asoc-tlv320 driver was explicitly From 598d8f7d86f1405fff8ac38a0912be9a85b00251 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Wed, 15 Jun 2022 15:57:01 +0300 Subject: [PATCH 0160/1477] drm/msm/dpu: index dpu_kms->hw_vbif using vbif_idx [ Upstream commit 7538f80ae0d98bf51eb89eee5344aec219902d42 ] Remove loops over hw_vbif. Instead always VBIF's idx as an index in the array. This fixes an error in dpu_kms_hw_init(), where we fill dpu_kms->hw_vbif[i], but check for an error pointer at dpu_kms->hw_vbif[vbif_idx]. Fixes: 25fdd5933e4c ("drm/msm: Add SDM845 DPU support") Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/489569/ Link: https://lore.kernel.org/r/20220615125703.24647-1-dmitry.baryshkov@linaro.org Signed-off-by: Dmitry Baryshkov Signed-off-by: Rob Clark Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c | 12 ++++------ drivers/gpu/drm/msm/disp/dpu1/dpu_vbif.c | 29 +++++++++++------------- 2 files changed, 18 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c index 7503f093f3b6..b7841f7fc10a 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c @@ -675,12 +675,10 @@ static void _dpu_kms_hw_destroy(struct dpu_kms *dpu_kms) _dpu_kms_mmu_destroy(dpu_kms); if (dpu_kms->catalog) { - for (i = 0; i < dpu_kms->catalog->vbif_count; i++) { - u32 vbif_idx = dpu_kms->catalog->vbif[i].id; - - if ((vbif_idx < VBIF_MAX) && dpu_kms->hw_vbif[vbif_idx]) { - dpu_hw_vbif_destroy(dpu_kms->hw_vbif[vbif_idx]); - dpu_kms->hw_vbif[vbif_idx] = NULL; + for (i = 0; i < ARRAY_SIZE(dpu_kms->hw_vbif); i++) { + if (dpu_kms->hw_vbif[i]) { + dpu_hw_vbif_destroy(dpu_kms->hw_vbif[i]); + dpu_kms->hw_vbif[i] = NULL; } } } @@ -987,7 +985,7 @@ static int dpu_kms_hw_init(struct msm_kms *kms) for (i = 0; i < dpu_kms->catalog->vbif_count; i++) { u32 vbif_idx = dpu_kms->catalog->vbif[i].id; - dpu_kms->hw_vbif[i] = dpu_hw_vbif_init(vbif_idx, + dpu_kms->hw_vbif[vbif_idx] = dpu_hw_vbif_init(vbif_idx, dpu_kms->vbif[vbif_idx], dpu_kms->catalog); if (IS_ERR_OR_NULL(dpu_kms->hw_vbif[vbif_idx])) { rc = PTR_ERR(dpu_kms->hw_vbif[vbif_idx]); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_vbif.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_vbif.c index 5e8c3f3e6625..fc86d34aec80 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_vbif.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_vbif.c @@ -11,6 +11,14 @@ #include "dpu_hw_vbif.h" #include "dpu_trace.h" +static struct dpu_hw_vbif *dpu_get_vbif(struct dpu_kms *dpu_kms, enum dpu_vbif vbif_idx) +{ + if (vbif_idx < ARRAY_SIZE(dpu_kms->hw_vbif)) + return dpu_kms->hw_vbif[vbif_idx]; + + return NULL; +} + /** * _dpu_vbif_wait_for_xin_halt - wait for the xin to halt * @vbif: Pointer to hardware vbif driver @@ -148,20 +156,15 @@ exit: void dpu_vbif_set_ot_limit(struct dpu_kms *dpu_kms, struct dpu_vbif_set_ot_params *params) { - struct dpu_hw_vbif *vbif = NULL; + struct dpu_hw_vbif *vbif; struct dpu_hw_mdp *mdp; bool forced_on = false; u32 ot_lim; - int ret, i; + int ret; mdp = dpu_kms->hw_mdp; - for (i = 0; i < ARRAY_SIZE(dpu_kms->hw_vbif); i++) { - if (dpu_kms->hw_vbif[i] && - dpu_kms->hw_vbif[i]->idx == params->vbif_idx) - vbif = dpu_kms->hw_vbif[i]; - } - + vbif = dpu_get_vbif(dpu_kms, params->vbif_idx); if (!vbif || !mdp) { DPU_DEBUG("invalid arguments vbif %d mdp %d\n", vbif != NULL, mdp != NULL); @@ -204,7 +207,7 @@ void dpu_vbif_set_ot_limit(struct dpu_kms *dpu_kms, void dpu_vbif_set_qos_remap(struct dpu_kms *dpu_kms, struct dpu_vbif_set_qos_params *params) { - struct dpu_hw_vbif *vbif = NULL; + struct dpu_hw_vbif *vbif; struct dpu_hw_mdp *mdp; bool forced_on = false; const struct dpu_vbif_qos_tbl *qos_tbl; @@ -216,13 +219,7 @@ void dpu_vbif_set_qos_remap(struct dpu_kms *dpu_kms, } mdp = dpu_kms->hw_mdp; - for (i = 0; i < ARRAY_SIZE(dpu_kms->hw_vbif); i++) { - if (dpu_kms->hw_vbif[i] && - dpu_kms->hw_vbif[i]->idx == params->vbif_idx) { - vbif = dpu_kms->hw_vbif[i]; - break; - } - } + vbif = dpu_get_vbif(dpu_kms, params->vbif_idx); if (!vbif || !vbif->cap) { DPU_ERROR("invalid vbif %d\n", params->vbif_idx); From ef59819976da26e71b885cd43e2b5cf30da2d4d8 Mon Sep 17 00:00:00 2001 From: Kuogee Hsieh Date: Wed, 24 Aug 2022 13:15:50 -0700 Subject: [PATCH 0161/1477] drm/msm/dp: correct 1.62G link rate at dp_catalog_ctrl_config_msa() [ Upstream commit aa0bff10af1c4b92e6b56e3e1b7f81c660d3ba78 ] At current implementation there is an extra 0 at 1.62G link rate which cause no correct pixel_div selected for 1.62G link rate to calculate mvid and nvid. This patch delete the extra 0 to have mvid and nvid be calculated correctly. Changes in v2: -- fix Fixes tag's text Changes in v3: -- fix misspelling of "Reviewed-by" Fixes: 937f941ca06f ("drm/msm/dp: Use qmp phy for DP PLL and PHY") Signed-off-by: Kuogee Hsieh Reviewed-by: Stephen Boyd Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/499328/ Link: https://lore.kernel.org/r/1661372150-3764-1-git-send-email-quic_khsieh@quicinc.com [DB: rewrapped commit message] Signed-off-by: Dmitry Baryshkov Signed-off-by: Rob Clark Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/dp/dp_catalog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/dp/dp_catalog.c b/drivers/gpu/drm/msm/dp/dp_catalog.c index 2da6982efdbf..613348b022fe 100644 --- a/drivers/gpu/drm/msm/dp/dp_catalog.c +++ b/drivers/gpu/drm/msm/dp/dp_catalog.c @@ -416,7 +416,7 @@ void dp_catalog_ctrl_config_msa(struct dp_catalog *dp_catalog, if (rate == link_rate_hbr3) pixel_div = 6; - else if (rate == 1620000 || rate == 270000) + else if (rate == 162000 || rate == 270000) pixel_div = 2; else if (rate == link_rate_hbr2) pixel_div = 4; From 4993c1511d66326f1037bc5156b024a6a96d23ef Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 22 Sep 2022 21:44:57 +0200 Subject: [PATCH 0162/1477] ASoC: da7219: Fix an error handling path in da7219_register_dai_clks() [ Upstream commit abb4e4349afe7eecdb0499582f1c777031e3a7c8 ] If clk_hw_register() fails, the corresponding clk should not be unregistered. To handle errors from loops, clean up partial iterations before doing the goto. So add a clk_hw_unregister(). Then use a while (--i >= 0) loop in the unwind section. Fixes: 78013a1cf297 ("ASoC: da7219: Fix clock handling around codec level probe") Reported-by: Dan Carpenter Signed-off-by: Christophe JAILLET Reviewed-by: Dan Carpenter Link: https://lore.kernel.org/r/e4acceab57a0d9e477a8d5890a45c5309e553e7c.1663875789.git.christophe.jaillet@wanadoo.fr Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/da7219.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/da7219.c b/sound/soc/codecs/da7219.c index 5f8c96dea094..f9e58d6509a8 100644 --- a/sound/soc/codecs/da7219.c +++ b/sound/soc/codecs/da7219.c @@ -2194,6 +2194,7 @@ static int da7219_register_dai_clks(struct snd_soc_component *component) dai_clk_lookup = clkdev_hw_create(dai_clk_hw, init.name, "%s", dev_name(dev)); if (!dai_clk_lookup) { + clk_hw_unregister(dai_clk_hw); ret = -ENOMEM; goto err; } else { @@ -2215,12 +2216,12 @@ static int da7219_register_dai_clks(struct snd_soc_component *component) return 0; err: - do { + while (--i >= 0) { if (da7219->dai_clks_lookup[i]) clkdev_drop(da7219->dai_clks_lookup[i]); clk_hw_unregister(&da7219->dai_clks_hw[i]); - } while (i-- > 0); + } if (np) kfree(da7219->clk_hw_data); From c940636d9c745b1129f7e8d51c16a1b1669c1e40 Mon Sep 17 00:00:00 2001 From: Andreas Pape Date: Mon, 26 Sep 2022 18:58:13 +0200 Subject: [PATCH 0163/1477] ALSA: dmaengine: increment buffer pointer atomically [ Upstream commit d1c442019594692c64a70a86ad88eb5b6db92216 ] Setting pointer and afterwards checking for wraparound leads to the possibility of returning the inconsistent pointer position. This patch increments buffer pointer atomically to avoid this issue. Fixes: e7f73a1613567a ("ASoC: Add dmaengine PCM helper functions") Signed-off-by: Andreas Pape Signed-off-by: Eugeniu Rosca Link: https://lore.kernel.org/r/1664211493-11789-1-git-send-email-erosca@de.adit-jv.com Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/core/pcm_dmaengine.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sound/core/pcm_dmaengine.c b/sound/core/pcm_dmaengine.c index 4d0e8fe535a1..be58505889a3 100644 --- a/sound/core/pcm_dmaengine.c +++ b/sound/core/pcm_dmaengine.c @@ -130,12 +130,14 @@ EXPORT_SYMBOL_GPL(snd_dmaengine_pcm_set_config_from_dai_data); static void dmaengine_pcm_dma_complete(void *arg) { + unsigned int new_pos; struct snd_pcm_substream *substream = arg; struct dmaengine_pcm_runtime_data *prtd = substream_to_prtd(substream); - prtd->pos += snd_pcm_lib_period_bytes(substream); - if (prtd->pos >= snd_pcm_lib_buffer_bytes(substream)) - prtd->pos = 0; + new_pos = prtd->pos + snd_pcm_lib_period_bytes(substream); + if (new_pos >= snd_pcm_lib_buffer_bytes(substream)) + new_pos = 0; + prtd->pos = new_pos; snd_pcm_period_elapsed(substream); } From 71704c2e1b2c159db0d6f90e2108a72eb342f101 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 22 Sep 2022 21:06:40 +0200 Subject: [PATCH 0164/1477] mmc: wmt-sdmmc: Fix an error handling path in wmt_mci_probe() [ Upstream commit cb58188ad90a61784a56a64f5107faaf2ad323e7 ] A dma_free_coherent() call is missing in the error handling path of the probe, as already done in the remove function. Fixes: 3a96dff0f828 ("mmc: SD/MMC Host Controller for Wondermedia WM8505/WM8650") Signed-off-by: Christophe JAILLET Reviewed-by: Dan Carpenter Link: https://lore.kernel.org/r/53fc6ffa5d1c428fefeae7d313cf4a669c3a1e98.1663873255.git.christophe.jaillet@wanadoo.fr Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/mmc/host/wmt-sdmmc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/wmt-sdmmc.c b/drivers/mmc/host/wmt-sdmmc.c index cf10949fb0ac..8df722ec57ed 100644 --- a/drivers/mmc/host/wmt-sdmmc.c +++ b/drivers/mmc/host/wmt-sdmmc.c @@ -849,7 +849,7 @@ static int wmt_mci_probe(struct platform_device *pdev) if (IS_ERR(priv->clk_sdmmc)) { dev_err(&pdev->dev, "Error getting clock\n"); ret = PTR_ERR(priv->clk_sdmmc); - goto fail5; + goto fail5_and_a_half; } ret = clk_prepare_enable(priv->clk_sdmmc); @@ -866,6 +866,9 @@ static int wmt_mci_probe(struct platform_device *pdev) return 0; fail6: clk_put(priv->clk_sdmmc); +fail5_and_a_half: + dma_free_coherent(&pdev->dev, mmc->max_blk_count * 16, + priv->dma_desc_buffer, priv->dma_desc_device_addr); fail5: free_irq(dma_irq, priv); fail4: From c1b269dda1e747746f0b13c2fd6296aa4992bc8d Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Thu, 29 Sep 2022 00:01:13 +0800 Subject: [PATCH 0165/1477] ASoC: wm8997: Fix PM disable depth imbalance in wm8997_probe [ Upstream commit 41a736ac20602f64773e80f0f5b32cde1830a44a ] The pm_runtime_enable will increase power disable depth. Thus a pairing decrement is needed on the error handling path to keep it balanced according to context. We fix it by moving pm_runtime_enable to the endding of wm8997_probe Fixes:40843aea5a9bd ("ASoC: wm8997: Initial CODEC driver") Signed-off-by: Zhang Qilong Link: https://lore.kernel.org/r/20220928160116.125020-2-zhangqilong3@huawei.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/wm8997.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/wm8997.c b/sound/soc/codecs/wm8997.c index 229f2986cd96..07378714b013 100644 --- a/sound/soc/codecs/wm8997.c +++ b/sound/soc/codecs/wm8997.c @@ -1156,9 +1156,6 @@ static int wm8997_probe(struct platform_device *pdev) regmap_update_bits(arizona->regmap, wm8997_digital_vu[i], WM8997_DIG_VU, WM8997_DIG_VU); - pm_runtime_enable(&pdev->dev); - pm_runtime_idle(&pdev->dev); - arizona_init_common(arizona); ret = arizona_init_vol_limit(arizona); @@ -1177,6 +1174,9 @@ static int wm8997_probe(struct platform_device *pdev) goto err_spk_irqs; } + pm_runtime_enable(&pdev->dev); + pm_runtime_idle(&pdev->dev); + return ret; err_spk_irqs: From fb23569699359d9fba5bf1257da0445c8776de29 Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Thu, 29 Sep 2022 00:01:14 +0800 Subject: [PATCH 0166/1477] ASoC: wm5110: Fix PM disable depth imbalance in wm5110_probe [ Upstream commit 86b46bf1feb83898d89a2b4a8d08d21e9ea277a7 ] The pm_runtime_enable will increase power disable depth. Thus a pairing decrement is needed on the error handling path to keep it balanced according to context. We fix it by moving pm_runtime_enable to the endding of wm5110_probe. Fixes:5c6af635fd772 ("ASoC: wm5110: Add audio CODEC driver") Signed-off-by: Zhang Qilong Link: https://lore.kernel.org/r/20220928160116.125020-3-zhangqilong3@huawei.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/wm5110.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/wm5110.c b/sound/soc/codecs/wm5110.c index d0cef982215d..c158f8b1e8e4 100644 --- a/sound/soc/codecs/wm5110.c +++ b/sound/soc/codecs/wm5110.c @@ -2452,9 +2452,6 @@ static int wm5110_probe(struct platform_device *pdev) regmap_update_bits(arizona->regmap, wm5110_digital_vu[i], WM5110_DIG_VU, WM5110_DIG_VU); - pm_runtime_enable(&pdev->dev); - pm_runtime_idle(&pdev->dev); - ret = arizona_request_irq(arizona, ARIZONA_IRQ_DSP_IRQ1, "ADSP2 Compressed IRQ", wm5110_adsp2_irq, wm5110); @@ -2487,6 +2484,9 @@ static int wm5110_probe(struct platform_device *pdev) goto err_spk_irqs; } + pm_runtime_enable(&pdev->dev); + pm_runtime_idle(&pdev->dev); + return ret; err_spk_irqs: From 37e3e01c9a78e9ef18f54c92aed34dda8b4a95f9 Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Thu, 29 Sep 2022 00:01:15 +0800 Subject: [PATCH 0167/1477] ASoC: wm5102: Fix PM disable depth imbalance in wm5102_probe [ Upstream commit fcbb60820cd3008bb44334a0395e5e57ccb77329 ] The pm_runtime_enable will increase power disable depth. Thus a pairing decrement is needed on the error handling path to keep it balanced according to context. We fix it by moving pm_runtime_enable to the endding of wm5102_probe. Fixes:93e8791dd34ca ("ASoC: wm5102: Initial driver") Signed-off-by: Zhang Qilong Link: https://lore.kernel.org/r/20220928160116.125020-4-zhangqilong3@huawei.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/wm5102.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/wm5102.c b/sound/soc/codecs/wm5102.c index 2ed3fa67027d..b7f5e5391fdb 100644 --- a/sound/soc/codecs/wm5102.c +++ b/sound/soc/codecs/wm5102.c @@ -2083,9 +2083,6 @@ static int wm5102_probe(struct platform_device *pdev) regmap_update_bits(arizona->regmap, wm5102_digital_vu[i], WM5102_DIG_VU, WM5102_DIG_VU); - pm_runtime_enable(&pdev->dev); - pm_runtime_idle(&pdev->dev); - ret = arizona_request_irq(arizona, ARIZONA_IRQ_DSP_IRQ1, "ADSP2 Compressed IRQ", wm5102_adsp2_irq, wm5102); @@ -2118,6 +2115,9 @@ static int wm5102_probe(struct platform_device *pdev) goto err_spk_irqs; } + pm_runtime_enable(&pdev->dev); + pm_runtime_idle(&pdev->dev); + return ret; err_spk_irqs: From f182de42d786becce464ae7a1e7eca3fcbfc2469 Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Thu, 29 Sep 2022 00:01:16 +0800 Subject: [PATCH 0168/1477] ASoC: mt6660: Fix PM disable depth imbalance in mt6660_i2c_probe [ Upstream commit b73f11e895e140537e7f8c7251211ccd3ce0782b ] The pm_runtime_enable will increase power disable depth. Thus a pairing decrement is needed on the error handling path to keep it balanced according to context. We fix it by moving pm_runtime_enable to the endding of mt6660_i2c_probe. Fixes:f289e55c6eeb4 ("ASoC: Add MediaTek MT6660 Speaker Amp Driver") Signed-off-by: Zhang Qilong Link: https://lore.kernel.org/r/20220928160116.125020-5-zhangqilong3@huawei.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/mt6660.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/mt6660.c b/sound/soc/codecs/mt6660.c index d1797003c83d..e18a58868273 100644 --- a/sound/soc/codecs/mt6660.c +++ b/sound/soc/codecs/mt6660.c @@ -504,13 +504,17 @@ static int mt6660_i2c_probe(struct i2c_client *client, dev_err(chip->dev, "read chip revision fail\n"); goto probe_fail; } - pm_runtime_set_active(chip->dev); - pm_runtime_enable(chip->dev); ret = devm_snd_soc_register_component(chip->dev, &mt6660_component_driver, &mt6660_codec_dai, 1); + if (!ret) { + pm_runtime_set_active(chip->dev); + pm_runtime_enable(chip->dev); + } + return ret; + probe_fail: _mt6660_chip_power_on(chip, 0); mutex_destroy(&chip->io_lock); From 2c442b0c0624c56f55d30bcb00e1b9d5cc6787c1 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sat, 1 Oct 2022 09:48:10 +0200 Subject: [PATCH 0169/1477] ALSA: hda/hdmi: Don't skip notification handling during PM operation [ Upstream commit 5226c7b9784eee215e3914f440b3c2e1764f67a8 ] The HDMI driver skips the notification handling from the graphics driver when the codec driver is being in the PM operation. This behavior was introduced by the commit eb399d3c99d8 ("ALSA: hda - Skip ELD notification during PM process"). This skip may cause a problem, as we may miss the ELD update when the connection/disconnection happens right at the runtime-PM operation of the audio codec. Although this workaround was valid at that time, it's no longer true; the fix was required just because the ELD update procedure needed to wake up the audio codec, which had lead to a runtime-resume during a runtime-suspend. Meanwhile, the ELD update procedure doesn't need a codec wake up any longer since the commit 788d441a164c ("ALSA: hda - Use component ops for i915 HDMI/DP audio jack handling"); i.e. there is no much reason for skipping the notification. Let's drop those checks for addressing the missing notification. Fixes: 788d441a164c ("ALSA: hda - Use component ops for i915 HDMI/DP audio jack handling") Reported-by: Brent Lu Link: https://lore.kernel.org/r/20220927135807.4097052-1-brent.lu@intel.com Link: https://lore.kernel.org/r/20221001074809.7461-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/patch_hdmi.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index c3fcf478037f..b1c57c65f6cd 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -2684,9 +2684,6 @@ static void generic_acomp_pin_eld_notify(void *audio_ptr, int port, int dev_id) */ if (codec->core.dev.power.power_state.event == PM_EVENT_SUSPEND) return; - /* ditto during suspend/resume process itself */ - if (snd_hdac_is_in_pm(&codec->core)) - return; check_presence_and_report(codec, pin_nid, dev_id); } @@ -2870,9 +2867,6 @@ static void intel_pin_eld_notify(void *audio_ptr, int port, int pipe) */ if (codec->core.dev.power.power_state.event == PM_EVENT_SUSPEND) return; - /* ditto during suspend/resume process itself */ - if (snd_hdac_is_in_pm(&codec->core)) - return; snd_hdac_i915_set_bclk(&codec->bus->core); check_presence_and_report(codec, pin_nid, dev_id); From fde46754d5483bc398018bbec3c8ef5c55219e67 Mon Sep 17 00:00:00 2001 From: Liang He Date: Sat, 16 Jul 2022 11:13:24 +0800 Subject: [PATCH 0170/1477] memory: pl353-smc: Fix refcount leak bug in pl353_smc_probe() [ Upstream commit 61b3c876c1cbdb1efd1f52a1f348580e6e14efb6 ] The break of for_each_available_child_of_node() needs a corresponding of_node_put() when the reference 'child' is not used anymore. Here we do not need to call of_node_put() in fail path as '!match' means no break. While the of_platform_device_create() will created a new reference by 'child' but it has considered the refcounting. Fixes: fee10bd22678 ("memory: pl353: Add driver for arm pl353 static memory controller") Signed-off-by: Liang He Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20220716031324.447680-1-windhl@126.com Signed-off-by: Sasha Levin --- drivers/memory/pl353-smc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/memory/pl353-smc.c b/drivers/memory/pl353-smc.c index b0b251bb207f..1a6964f1ba6a 100644 --- a/drivers/memory/pl353-smc.c +++ b/drivers/memory/pl353-smc.c @@ -416,6 +416,7 @@ static int pl353_smc_probe(struct amba_device *adev, const struct amba_id *id) if (init) init(adev, child); of_platform_device_create(child, NULL, &adev->dev); + of_node_put(child); return 0; From daaec4b3fe2297b022c6b2d6bf48b6e5265a60b9 Mon Sep 17 00:00:00 2001 From: Liang He Date: Tue, 19 Jul 2022 16:56:39 +0800 Subject: [PATCH 0171/1477] memory: of: Fix refcount leak bug in of_get_ddr_timings() [ Upstream commit 05215fb32010d4afb68fbdbb4d237df6e2d4567b ] We should add the of_node_put() when breaking out of for_each_child_of_node() as it will automatically increase and decrease the refcount. Fixes: e6b42eb6a66c ("memory: emif: add device tree support to emif driver") Signed-off-by: Liang He Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20220719085640.1210583-1-windhl@126.com Signed-off-by: Sasha Levin --- drivers/memory/of_memory.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/memory/of_memory.c b/drivers/memory/of_memory.c index d9f5437d3bce..d0a80aefdea8 100644 --- a/drivers/memory/of_memory.c +++ b/drivers/memory/of_memory.c @@ -134,6 +134,7 @@ const struct lpddr2_timings *of_get_ddr_timings(struct device_node *np_ddr, for_each_child_of_node(np_ddr, np_tim) { if (of_device_is_compatible(np_tim, tim_compat)) { if (of_do_get_timings(np_tim, &timings[i])) { + of_node_put(np_tim); devm_kfree(dev, timings); goto default_timings; } From 1d312c12c91f831fcc48623c921f2d4560edb159 Mon Sep 17 00:00:00 2001 From: Liang He Date: Tue, 19 Jul 2022 16:56:40 +0800 Subject: [PATCH 0172/1477] memory: of: Fix refcount leak bug in of_lpddr3_get_ddr_timings() [ Upstream commit 48af14fb0eaa63d9aa68f59fb0b205ec55a95636 ] We should add the of_node_put() when breaking out of for_each_child_of_node() as it will automatically increase and decrease the refcount. Fixes: 976897dd96db ("memory: Extend of_memory with LPDDR3 support") Signed-off-by: Liang He Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20220719085640.1210583-2-windhl@126.com Signed-off-by: Sasha Levin --- drivers/memory/of_memory.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/memory/of_memory.c b/drivers/memory/of_memory.c index d0a80aefdea8..1791614f324b 100644 --- a/drivers/memory/of_memory.c +++ b/drivers/memory/of_memory.c @@ -283,6 +283,7 @@ const struct lpddr3_timings if (of_device_is_compatible(np_tim, tim_compat)) { if (of_lpddr3_do_get_timings(np_tim, &timings[i])) { devm_kfree(dev, timings); + of_node_put(np_tim); goto default_timings; } i++; From 39781c98ad46b4e85053345dff797240c1ed7935 Mon Sep 17 00:00:00 2001 From: Liang He Date: Thu, 21 Jul 2022 21:52:16 +0800 Subject: [PATCH 0173/1477] soc: qcom: smsm: Fix refcount leak bugs in qcom_smsm_probe() [ Upstream commit af8f6f39b8afd772fda4f8e61823ef8c021bf382 ] There are two refcount leak bugs in qcom_smsm_probe(): (1) The 'local_node' is escaped out from for_each_child_of_node() as the break of iteration, we should call of_node_put() for it in error path or when it is not used anymore. (2) The 'node' is escaped out from for_each_available_child_of_node() as the 'goto', we should call of_node_put() for it in goto target. Fixes: c97c4090ff72 ("soc: qcom: smsm: Add driver for Qualcomm SMSM") Signed-off-by: Liang He Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220721135217.1301039-1-windhl@126.com Signed-off-by: Sasha Levin --- drivers/soc/qcom/smsm.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/soc/qcom/smsm.c b/drivers/soc/qcom/smsm.c index 6564f15c5319..acba67dfbc85 100644 --- a/drivers/soc/qcom/smsm.c +++ b/drivers/soc/qcom/smsm.c @@ -511,7 +511,7 @@ static int qcom_smsm_probe(struct platform_device *pdev) for (id = 0; id < smsm->num_hosts; id++) { ret = smsm_parse_ipc(smsm, id); if (ret < 0) - return ret; + goto out_put; } /* Acquire the main SMSM state vector */ @@ -519,13 +519,14 @@ static int qcom_smsm_probe(struct platform_device *pdev) smsm->num_entries * sizeof(u32)); if (ret < 0 && ret != -EEXIST) { dev_err(&pdev->dev, "unable to allocate shared state entry\n"); - return ret; + goto out_put; } states = qcom_smem_get(QCOM_SMEM_HOST_ANY, SMEM_SMSM_SHARED_STATE, NULL); if (IS_ERR(states)) { dev_err(&pdev->dev, "Unable to acquire shared state entry\n"); - return PTR_ERR(states); + ret = PTR_ERR(states); + goto out_put; } /* Acquire the list of interrupt mask vectors */ @@ -533,13 +534,14 @@ static int qcom_smsm_probe(struct platform_device *pdev) ret = qcom_smem_alloc(QCOM_SMEM_HOST_ANY, SMEM_SMSM_CPU_INTR_MASK, size); if (ret < 0 && ret != -EEXIST) { dev_err(&pdev->dev, "unable to allocate smsm interrupt mask\n"); - return ret; + goto out_put; } intr_mask = qcom_smem_get(QCOM_SMEM_HOST_ANY, SMEM_SMSM_CPU_INTR_MASK, NULL); if (IS_ERR(intr_mask)) { dev_err(&pdev->dev, "unable to acquire shared memory interrupt mask\n"); - return PTR_ERR(intr_mask); + ret = PTR_ERR(intr_mask); + goto out_put; } /* Setup the reference to the local state bits */ @@ -550,7 +552,8 @@ static int qcom_smsm_probe(struct platform_device *pdev) smsm->state = qcom_smem_state_register(local_node, &smsm_state_ops, smsm); if (IS_ERR(smsm->state)) { dev_err(smsm->dev, "failed to register qcom_smem_state\n"); - return PTR_ERR(smsm->state); + ret = PTR_ERR(smsm->state); + goto out_put; } /* Register handlers for remote processor entries of interest. */ @@ -580,16 +583,19 @@ static int qcom_smsm_probe(struct platform_device *pdev) } platform_set_drvdata(pdev, smsm); + of_node_put(local_node); return 0; unwind_interfaces: + of_node_put(node); for (id = 0; id < smsm->num_entries; id++) if (smsm->entries[id].domain) irq_domain_remove(smsm->entries[id].domain); qcom_smem_state_unregister(smsm->state); - +out_put: + of_node_put(local_node); return ret; } From d5c2051898fdaf57ea36eb6fde74190c73f977fa Mon Sep 17 00:00:00 2001 From: Liang He Date: Thu, 21 Jul 2022 21:52:17 +0800 Subject: [PATCH 0174/1477] soc: qcom: smem_state: Add refcounting for the 'state->of_node' [ Upstream commit 90681f53b9381c23ff7762a3b13826d620c272de ] In qcom_smem_state_register() and qcom_smem_state_release(), we should better use of_node_get() and of_node_put() for the reference creation and destruction of 'device_node'. Fixes: 9460ae2ff308 ("soc: qcom: Introduce common SMEM state machine code") Signed-off-by: Liang He Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220721135217.1301039-2-windhl@126.com Signed-off-by: Sasha Levin --- drivers/soc/qcom/smem_state.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/soc/qcom/smem_state.c b/drivers/soc/qcom/smem_state.c index d2b558438deb..41e929407196 100644 --- a/drivers/soc/qcom/smem_state.c +++ b/drivers/soc/qcom/smem_state.c @@ -136,6 +136,7 @@ static void qcom_smem_state_release(struct kref *ref) struct qcom_smem_state *state = container_of(ref, struct qcom_smem_state, refcount); list_del(&state->list); + of_node_put(state->of_node); kfree(state); } @@ -169,7 +170,7 @@ struct qcom_smem_state *qcom_smem_state_register(struct device_node *of_node, kref_init(&state->refcount); - state->of_node = of_node; + state->of_node = of_node_get(of_node); state->ops = *ops; state->priv = priv; From 43faaedf3a7f3d4836f72a5957f2b4bd14a6a553 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Beh=C3=BAn?= Date: Wed, 27 Jul 2022 14:56:10 +0200 Subject: [PATCH 0175/1477] ARM: dts: turris-omnia: Fix mpp26 pin name and comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 49e93898f0dc177e645c22d0664813567fd9ec00 ] There is a bug in Turris Omnia's schematics, whereupon the MPP[26] pin, which is routed to CN11 pin header, is documented as SPI CS1, but MPP[26] pin does not support this function. Instead it controls chip select 2 if in "spi0" mode. Fix the name of the pin node in pinctrl node and fix the comment in SPI node. Fixes: 26ca8b52d6e1 ("ARM: dts: add support for Turris Omnia") Signed-off-by: Marek Behún Signed-off-by: Gregory CLEMENT Signed-off-by: Sasha Levin --- arch/arm/boot/dts/armada-385-turris-omnia.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/armada-385-turris-omnia.dts b/arch/arm/boot/dts/armada-385-turris-omnia.dts index fde4c302f08e..92e08486ec81 100644 --- a/arch/arm/boot/dts/armada-385-turris-omnia.dts +++ b/arch/arm/boot/dts/armada-385-turris-omnia.dts @@ -307,7 +307,7 @@ marvell,function = "spi0"; }; - spi0cs1_pins: spi0cs1-pins { + spi0cs2_pins: spi0cs2-pins { marvell,pins = "mpp26"; marvell,function = "spi0"; }; @@ -342,7 +342,7 @@ }; }; - /* MISO, MOSI, SCLK and CS1 are routed to pin header CN11 */ + /* MISO, MOSI, SCLK and CS2 are routed to pin header CN11 */ }; &uart0 { From df4f05b35634839b070b4171411880fcb4258b94 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Tue, 16 Aug 2022 02:10:24 +0200 Subject: [PATCH 0176/1477] ARM: dts: kirkwood: lsxl: fix serial line [ Upstream commit 04eabc6ac10fda9424606d9a7ab6ab9a5d95350a ] Commit 327e15428977 ("ARM: dts: kirkwood: consolidate common pinctrl settings") unknowingly broke the serial output on this board. Before this commit, the pinmux was still configured by the bootloader and the kernel didn't reconfigured it again. This was an oversight by the initial board support where the pinmux for the serial line was never configured by the kernel. But with this commit, the serial line will be reconfigured to the wrong pins. This is especially confusing, because the output still works, but the input doesn't. Presumingly, the input is reconfigured to MPP10, but the output is connected to both MPP11 and MPP5. Override the pinmux in the board device tree. Fixes: 327e15428977 ("ARM: dts: kirkwood: consolidate common pinctrl settings") Signed-off-by: Michael Walle Reviewed-by: Andrew Lunn Signed-off-by: Gregory CLEMENT Signed-off-by: Sasha Levin --- arch/arm/boot/dts/kirkwood-lsxl.dtsi | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm/boot/dts/kirkwood-lsxl.dtsi b/arch/arm/boot/dts/kirkwood-lsxl.dtsi index 7b151acb9984..321a40a98ed2 100644 --- a/arch/arm/boot/dts/kirkwood-lsxl.dtsi +++ b/arch/arm/boot/dts/kirkwood-lsxl.dtsi @@ -10,6 +10,11 @@ ocp@f1000000 { pinctrl: pin-controller@10000 { + /* Non-default UART pins */ + pmx_uart0: pmx-uart0 { + marvell,pins = "mpp4", "mpp5"; + }; + pmx_power_hdd: pmx-power-hdd { marvell,pins = "mpp10"; marvell,function = "gpo"; From e31c0e14cfad8a621832def190f63b72a701cfff Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Tue, 16 Aug 2022 02:10:25 +0200 Subject: [PATCH 0177/1477] ARM: dts: kirkwood: lsxl: remove first ethernet port [ Upstream commit 2d528eda7c96ce5c70f895854ecd5684bd5d80b9 ] Both the Linkstation LS-CHLv2 and the LS-XHL have only one ethernet port. This has always been wrong, i.e. the board code used to set up both ports, but the driver will play nice and return -ENODEV if the assiciated PHY is not found. Nevertheless, it is wrong. Remove it. Fixes: 876e23333511 ("ARM: kirkwood: add gigabit ethernet and mvmdio device tree nodes") Signed-off-by: Michael Walle Reviewed-by: Andrew Lunn Signed-off-by: Gregory CLEMENT Signed-off-by: Sasha Levin --- arch/arm/boot/dts/kirkwood-lsxl.dtsi | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/arch/arm/boot/dts/kirkwood-lsxl.dtsi b/arch/arm/boot/dts/kirkwood-lsxl.dtsi index 321a40a98ed2..88b70ba1c8fe 100644 --- a/arch/arm/boot/dts/kirkwood-lsxl.dtsi +++ b/arch/arm/boot/dts/kirkwood-lsxl.dtsi @@ -218,22 +218,11 @@ &mdio { status = "okay"; - ethphy0: ethernet-phy@0 { - reg = <0>; - }; - ethphy1: ethernet-phy@8 { reg = <8>; }; }; -ð0 { - status = "okay"; - ethernet0-port@0 { - phy-handle = <ðphy0>; - }; -}; - ð1 { status = "okay"; ethernet1-port@0 { From 09c35f1520e773c89cc32edb7abcfbcf3f4f4c9b Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 10 Sep 2022 18:26:16 -0700 Subject: [PATCH 0178/1477] ia64: export memory_add_physaddr_to_nid to fix cxl build error [ Upstream commit 97c318bfbe84efded246e80428054f300042f110 ] cxl_pmem.ko uses memory_add_physaddr_to_nid() but ia64 does not export it, so this causes a build error: ERROR: modpost: "memory_add_physaddr_to_nid" [drivers/cxl/cxl_pmem.ko] undefined! Fix this by exporting that function. Fixes: 8c2676a5870a ("hot-add-mem x86_64: memory_add_physaddr_to_nid node fixup") Reported-by: kernel test robot Signed-off-by: Randy Dunlap Cc: Dan Williams Cc: Ben Widawsky Cc: Jonathan Cameron Cc: linux-ia64@vger.kernel.org Cc: Arnd Bergmann Cc: Keith Mannthey Cc: Andrew Morton Signed-off-by: Arnd Bergmann Signed-off-by: Sasha Levin --- arch/ia64/mm/numa.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c index f34964271101..6cd002e8163d 100644 --- a/arch/ia64/mm/numa.c +++ b/arch/ia64/mm/numa.c @@ -106,5 +106,6 @@ int memory_add_physaddr_to_nid(u64 addr) return 0; return nid; } +EXPORT_SYMBOL(memory_add_physaddr_to_nid); #endif #endif From 5bbd3dd7f92349cd836e64186dce7539348d88ac Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 23 Sep 2020 03:34:21 +0300 Subject: [PATCH 0179/1477] soc/tegra: fuse: Drop Kconfig dependency on TEGRA20_APB_DMA [ Upstream commit 2254182807fc09ba9dec9a42ef239e373796f1b2 ] The DMA subsystem could be entirely disabled in Kconfig and then the TEGRA20_APB_DMA option isn't available too. Hence kernel configuration fails if DMADEVICES Kconfig option is disabled due to the unsatisfiable dependency. The FUSE driver isn't a critical driver and currently it only provides NVMEM interface to userspace which isn't known to be widely used, and thus, it's fine if FUSE driver fails to load. Let's remove the erroneous Kconfig dependency and let the FUSE driver to fail the probing if DMA is unavailable. Fixes: 19d41e5e9c68 ("soc/tegra: fuse: Add APB DMA dependency for Tegra20") Reported-by: Necip Fazil Yildiran Link: https://bugzilla.kernel.org/show_bug.cgi?id=209301 Signed-off-by: Dmitry Osipenko Signed-off-by: Sasha Levin --- drivers/soc/tegra/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/soc/tegra/Kconfig b/drivers/soc/tegra/Kconfig index 976dee036470..676807c5a215 100644 --- a/drivers/soc/tegra/Kconfig +++ b/drivers/soc/tegra/Kconfig @@ -136,7 +136,6 @@ config SOC_TEGRA_FUSE def_bool y depends on ARCH_TEGRA select SOC_BUS - select TEGRA20_APB_DMA if ARCH_TEGRA_2x_SOC config SOC_TEGRA_FLOWCTRL bool From 477dbf9d1bd553bb5df842ce0395cb98a4a4363f Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 26 Sep 2022 12:43:53 +0200 Subject: [PATCH 0180/1477] ARM: dts: exynos: correct s5k6a3 reset polarity on Midas family [ Upstream commit 3ba2d4bb9592bf7a6a3fe3dbe711ecfc3d004bab ] According to s5k6a3 driver code, the reset line for the chip appears to be active low. This also matches the typical polarity of reset lines in general. Let's fix it up as having correct polarity in DTS is important when the driver will be switched over to gpiod API. Fixes: b4fec64758ab ("ARM: dts: Add camera device nodes for Exynos4412 TRATS2 board") Signed-off-by: Dmitry Torokhov Signed-off-by: Krzysztof Kozlowski Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20220913164104.203957-1-dmitry.torokhov@gmail.com Link: https://lore.kernel.org/r/20220926104354.118578-2-krzysztof.kozlowski@linaro.org' Signed-off-by: Arnd Bergmann Signed-off-by: Sasha Levin --- arch/arm/boot/dts/exynos4412-midas.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/exynos4412-midas.dtsi b/arch/arm/boot/dts/exynos4412-midas.dtsi index 06450066b178..255a13666edc 100644 --- a/arch/arm/boot/dts/exynos4412-midas.dtsi +++ b/arch/arm/boot/dts/exynos4412-midas.dtsi @@ -588,7 +588,7 @@ clocks = <&camera 1>; clock-names = "extclk"; samsung,camclk-out = <1>; - gpios = <&gpm1 6 GPIO_ACTIVE_HIGH>; + gpios = <&gpm1 6 GPIO_ACTIVE_LOW>; port { is_s5k6a3_ep: endpoint { From 40e966a404c74f44b070e6e82da97a1ba2043fbb Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 27 Sep 2022 15:28:26 +0200 Subject: [PATCH 0181/1477] ARM: Drop CMDLINE_* dependency on ATAGS [ Upstream commit 136f4b1ec7c962ee37a787e095fd37b058d72bd3 ] On arm32, the configuration options to specify the kernel command line type depend on ATAGS. However, the actual CMDLINE cofiguration option does not depend on ATAGS, and the code that handles this is not specific to ATAGS (see drivers/of/fdt.c:early_init_dt_scan_chosen()). Hence users who desire to override the kernel command line on arm32 must enable support for ATAGS, even on a pure-DT system. Other architectures (arm64, loongarch, microblaze, nios2, powerpc, and riscv) do not impose such a restriction. Hence drop the dependency on ATAGS. Fixes: bd51e2f595580fb6 ("ARM: 7506/1: allow for ATAGS to be configured out when DT support is selected") Signed-off-by: Geert Uytterhoeven Acked-by: Ard Biesheuvel Signed-off-by: Arnd Bergmann Signed-off-by: Sasha Levin --- arch/arm/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index b587ecc6f949..985ab0b091a6 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1791,7 +1791,6 @@ config CMDLINE choice prompt "Kernel command line type" if CMDLINE != "" default CMDLINE_FROM_BOOTLOADER - depends on ATAGS config CMDLINE_FROM_BOOTLOADER bool "Use bootloader kernel arguments if available" From 657de36c72f57fa172a66b06f826b3f5bc56f42e Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 29 Sep 2022 14:45:25 +0100 Subject: [PATCH 0182/1477] arm64: ftrace: fix module PLTs with mcount [ Upstream commit 8cfb08575c6d4585f1ce0deeb189e5c824776b04 ] Li Huafei reports that mcount-based ftrace with module PLTs was broken by commit: a6253579977e4c6f ("arm64: ftrace: consistently handle PLTs.") When a module PLTs are used and a module is loaded sufficiently far away from the kernel, we'll create PLTs for any branches which are out-of-range. These are separate from the special ftrace trampoline PLTs, which the module PLT code doesn't directly manipulate. When mcount is in use this is a problem, as each mcount callsite in a module will be initialized to point to a module PLT, but since commit a6253579977e4c6f ftrace_make_nop() will assume that the callsite has been initialized to point to the special ftrace trampoline PLT, and ftrace_find_callable_addr() rejects other cases. This means that when ftrace tries to initialize a callsite via ftrace_make_nop(), the call to ftrace_find_callable_addr() will find that the `_mcount` stub is out-of-range and is not handled by the ftrace PLT, resulting in a splat: | ftrace_test: loading out-of-tree module taints kernel. | ftrace: no module PLT for _mcount | ------------[ ftrace bug ]------------ | ftrace failed to modify | [] 0xffff800029180014 | actual: 44:00:00:94 | Initializing ftrace call sites | ftrace record flags: 2000000 | (0) | expected tramp: ffff80000802eb3c | ------------[ cut here ]------------ | WARNING: CPU: 3 PID: 157 at kernel/trace/ftrace.c:2120 ftrace_bug+0x94/0x270 | Modules linked in: | CPU: 3 PID: 157 Comm: insmod Tainted: G O 6.0.0-rc6-00151-gcd722513a189-dirty #22 | Hardware name: linux,dummy-virt (DT) | pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) | pc : ftrace_bug+0x94/0x270 | lr : ftrace_bug+0x21c/0x270 | sp : ffff80000b2bbaf0 | x29: ffff80000b2bbaf0 x28: 0000000000000000 x27: ffff0000c4d38000 | x26: 0000000000000001 x25: ffff800009d7e000 x24: ffff0000c4d86e00 | x23: 0000000002000000 x22: ffff80000a62b000 x21: ffff8000098ebea8 | x20: ffff0000c4d38000 x19: ffff80000aa24158 x18: ffffffffffffffff | x17: 0000000000000000 x16: 0a0d2d2d2d2d2d2d x15: ffff800009aa9118 | x14: 0000000000000000 x13: 6333626532303830 x12: 3030303866666666 | x11: 203a706d61727420 x10: 6465746365707865 x9 : 3362653230383030 | x8 : c0000000ffffefff x7 : 0000000000017fe8 x6 : 000000000000bff4 | x5 : 0000000000057fa8 x4 : 0000000000000000 x3 : 0000000000000001 | x2 : ad2cb14bb5438900 x1 : 0000000000000000 x0 : 0000000000000022 | Call trace: | ftrace_bug+0x94/0x270 | ftrace_process_locs+0x308/0x430 | ftrace_module_init+0x44/0x60 | load_module+0x15b4/0x1ce8 | __do_sys_init_module+0x1ec/0x238 | __arm64_sys_init_module+0x24/0x30 | invoke_syscall+0x54/0x118 | el0_svc_common.constprop.4+0x84/0x100 | do_el0_svc+0x3c/0xd0 | el0_svc+0x1c/0x50 | el0t_64_sync_handler+0x90/0xb8 | el0t_64_sync+0x15c/0x160 | ---[ end trace 0000000000000000 ]--- | ---------test_init----------- Fix this by reverting to the old behaviour of ignoring the old instruction when initialising an mcount callsite in a module, which was the behaviour prior to commit a6253579977e4c6f. Signed-off-by: Mark Rutland Fixes: a6253579977e ("arm64: ftrace: consistently handle PLTs.") Reported-by: Li Huafei Link: https://lore.kernel.org/linux-arm-kernel/20220929094134.99512-1-lihuafei1@huawei.com Cc: Ard Biesheuvel Cc: Will Deacon Link: https://lore.kernel.org/r/20220929134525.798593-1-mark.rutland@arm.com Signed-off-by: Catalin Marinas Signed-off-by: Sasha Levin --- arch/arm64/kernel/ftrace.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 3724bab278b2..402a24f845b9 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -216,11 +216,26 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long pc = rec->ip; u32 old = 0, new; + new = aarch64_insn_gen_nop(); + + /* + * When using mcount, callsites in modules may have been initalized to + * call an arbitrary module PLT (which redirects to the _mcount stub) + * rather than the ftrace PLT we'll use at runtime (which redirects to + * the ftrace trampoline). We can ignore the old PLT when initializing + * the callsite. + * + * Note: 'mod' is only set at module load time. + */ + if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && + IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && mod) { + return aarch64_insn_patch_text_nosync((void *)pc, new); + } + if (!ftrace_find_callable_addr(rec, mod, &addr)) return -EINVAL; old = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK); - new = aarch64_insn_gen_nop(); return ftrace_modify_code(pc, old, new, true); } From 5b9bb0cbd9e7b69ed5416a28aa338acf2fd050d5 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 27 Sep 2022 15:05:03 -0700 Subject: [PATCH 0183/1477] ARM: dts: exynos: fix polarity of VBUS GPIO of Origen [ Upstream commit a08137bd1e0a7ce951dce9ce4a83e39d379b6e1b ] EHCI Oxynos (drivers/usb/host/ehci-exynos.c) drives VBUS GPIO high when trying to power up the bus, therefore the GPIO in DTS must be marked as "active high". This will be important when EHCI driver is converted to gpiod API that respects declared polarities. Fixes: 4e8991def565 ("ARM: dts: exynos: Enable AX88760 USB hub on Origen board") Signed-off-by: Dmitry Torokhov Link: https://lore.kernel.org/r/20220927220504.3744878-1-dmitry.torokhov@gmail.com Signed-off-by: Krzysztof Kozlowski Signed-off-by: Sasha Levin --- arch/arm/boot/dts/exynos4412-origen.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/exynos4412-origen.dts b/arch/arm/boot/dts/exynos4412-origen.dts index c2e793b69e7d..e2d76ea4404e 100644 --- a/arch/arm/boot/dts/exynos4412-origen.dts +++ b/arch/arm/boot/dts/exynos4412-origen.dts @@ -95,7 +95,7 @@ }; &ehci { - samsung,vbus-gpio = <&gpx3 5 1>; + samsung,vbus-gpio = <&gpx3 5 GPIO_ACTIVE_HIGH>; status = "okay"; phys = <&exynos_usbphy 2>, <&exynos_usbphy 3>; phy-names = "hsic0", "hsic1"; From bc2b97e177a953d7317d8ccd44ff18f84a1429ec Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Wed, 3 Aug 2022 13:28:37 +0300 Subject: [PATCH 0184/1477] iio: adc: at91-sama5d2_adc: fix AT91_SAMA5D2_MR_TRACKTIM_MAX [ Upstream commit bb73d5d9164c57c4bb916739a98e5cd8e0a5ed8c ] All ADC HW versions handled by this driver (SAMA5D2, SAM9X60, SAMA7G5) have MR.TRACKTIM on 4 bits. Fix AT91_SAMA5D2_MR_TRACKTIM_MAX to reflect this. Fixes: 27e177190891 ("iio:adc:at91_adc8xx: introduce new atmel adc driver") Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20220803102855.2191070-2-claudiu.beznea@microchip.com Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/adc/at91-sama5d2_adc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/at91-sama5d2_adc.c b/drivers/iio/adc/at91-sama5d2_adc.c index 4ede7e766765..230e4111517e 100644 --- a/drivers/iio/adc/at91-sama5d2_adc.c +++ b/drivers/iio/adc/at91-sama5d2_adc.c @@ -74,7 +74,7 @@ #define AT91_SAMA5D2_MR_ANACH BIT(23) /* Tracking Time */ #define AT91_SAMA5D2_MR_TRACKTIM(v) ((v) << 24) -#define AT91_SAMA5D2_MR_TRACKTIM_MAX 0xff +#define AT91_SAMA5D2_MR_TRACKTIM_MAX 0xf /* Transfer Time */ #define AT91_SAMA5D2_MR_TRANSFER(v) ((v) << 28) #define AT91_SAMA5D2_MR_TRANSFER_MAX 0x3 From d259b90f0c3d38700c5ad4ab8031cdf3a43215b2 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Wed, 3 Aug 2022 13:28:38 +0300 Subject: [PATCH 0185/1477] iio: adc: at91-sama5d2_adc: check return status for pressure and touch [ Upstream commit d84ace944a3b24529798dbae1340dea098473155 ] Check return status of at91_adc_read_position() and at91_adc_read_pressure() in at91_adc_read_info_raw(). Fixes: 6794e23fa3fe ("iio: adc: at91-sama5d2_adc: add support for oversampling resolution") Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20220803102855.2191070-3-claudiu.beznea@microchip.com Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/adc/at91-sama5d2_adc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/iio/adc/at91-sama5d2_adc.c b/drivers/iio/adc/at91-sama5d2_adc.c index 230e4111517e..fe41689c5da6 100644 --- a/drivers/iio/adc/at91-sama5d2_adc.c +++ b/drivers/iio/adc/at91-sama5d2_adc.c @@ -1355,8 +1355,10 @@ static int at91_adc_read_info_raw(struct iio_dev *indio_dev, *val = tmp_val; mutex_unlock(&st->lock); iio_device_release_direct_mode(indio_dev); + if (ret > 0) + ret = at91_adc_adjust_val_osr(st, val); - return at91_adc_adjust_val_osr(st, val); + return ret; } if (chan->type == IIO_PRESSURE) { ret = iio_device_claim_direct_mode(indio_dev); @@ -1369,8 +1371,10 @@ static int at91_adc_read_info_raw(struct iio_dev *indio_dev, *val = tmp_val; mutex_unlock(&st->lock); iio_device_release_direct_mode(indio_dev); + if (ret > 0) + ret = at91_adc_adjust_val_osr(st, val); - return at91_adc_adjust_val_osr(st, val); + return ret; } /* in this case we have a voltage channel */ From 513c72d76df653ab8a15750c3f520a39b514b2dd Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Wed, 3 Aug 2022 13:28:39 +0300 Subject: [PATCH 0186/1477] iio: adc: at91-sama5d2_adc: lock around oversampling and sample freq [ Upstream commit 9780a23ed5a0a0a63683e078f576719a98d4fb70 ] .read_raw()/.write_raw() could be called asynchronously from user space or other in kernel drivers. Without locking on st->lock these could be called asynchronously while there is a conversion in progress. Read will be harmless but changing registers while conversion is in progress may lead to inconsistent results. Thus, to avoid this lock st->lock. Fixes: 27e177190891 ("iio:adc:at91_adc8xx: introduce new atmel adc driver") Fixes: 6794e23fa3fe ("iio: adc: at91-sama5d2_adc: add support for oversampling resolution") Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20220803102855.2191070-4-claudiu.beznea@microchip.com Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/adc/at91-sama5d2_adc.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/iio/adc/at91-sama5d2_adc.c b/drivers/iio/adc/at91-sama5d2_adc.c index fe41689c5da6..ef6dc85024c1 100644 --- a/drivers/iio/adc/at91-sama5d2_adc.c +++ b/drivers/iio/adc/at91-sama5d2_adc.c @@ -1353,10 +1353,10 @@ static int at91_adc_read_info_raw(struct iio_dev *indio_dev, ret = at91_adc_read_position(st, chan->channel, &tmp_val); *val = tmp_val; - mutex_unlock(&st->lock); - iio_device_release_direct_mode(indio_dev); if (ret > 0) ret = at91_adc_adjust_val_osr(st, val); + mutex_unlock(&st->lock); + iio_device_release_direct_mode(indio_dev); return ret; } @@ -1369,10 +1369,10 @@ static int at91_adc_read_info_raw(struct iio_dev *indio_dev, ret = at91_adc_read_pressure(st, chan->channel, &tmp_val); *val = tmp_val; - mutex_unlock(&st->lock); - iio_device_release_direct_mode(indio_dev); if (ret > 0) ret = at91_adc_adjust_val_osr(st, val); + mutex_unlock(&st->lock); + iio_device_release_direct_mode(indio_dev); return ret; } @@ -1465,16 +1465,20 @@ static int at91_adc_write_raw(struct iio_dev *indio_dev, /* if no change, optimize out */ if (val == st->oversampling_ratio) return 0; + mutex_lock(&st->lock); st->oversampling_ratio = val; /* update ratio */ at91_adc_config_emr(st); + mutex_unlock(&st->lock); return 0; case IIO_CHAN_INFO_SAMP_FREQ: if (val < st->soc_info.min_sample_rate || val > st->soc_info.max_sample_rate) return -EINVAL; + mutex_lock(&st->lock); at91_adc_setup_samp_freq(indio_dev, val); + mutex_unlock(&st->lock); return 0; default: return -EINVAL; From 44ec4b04fc9950678e266f1ad07a4868120fcb7b Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Wed, 3 Aug 2022 13:28:40 +0300 Subject: [PATCH 0187/1477] iio: adc: at91-sama5d2_adc: disable/prepare buffer on suspend/resume [ Upstream commit 808175e21d9b7f866eda742e8970f27b78afe5db ] In case triggered buffers are enabled while system is suspended they will not work anymore after resume. For this call at91_adc_buffer_postdisable() on suspend and at91_adc_buffer_prepare() on resume. On tests it has been seen that at91_adc_buffer_postdisable() call is not necessary but it has been kept because it also does the book keeping for DMA. On resume path there is no need to call at91_adc_configure_touch() as it is embedded in at91_adc_buffer_prepare(). Fixes: 073c662017f2f ("iio: adc: at91-sama5d2_adc: add support for DMA") Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20220803102855.2191070-5-claudiu.beznea@microchip.com Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/adc/at91-sama5d2_adc.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/iio/adc/at91-sama5d2_adc.c b/drivers/iio/adc/at91-sama5d2_adc.c index ef6dc85024c1..250b78ee1625 100644 --- a/drivers/iio/adc/at91-sama5d2_adc.c +++ b/drivers/iio/adc/at91-sama5d2_adc.c @@ -1907,6 +1907,9 @@ static __maybe_unused int at91_adc_suspend(struct device *dev) struct iio_dev *indio_dev = dev_get_drvdata(dev); struct at91_adc_state *st = iio_priv(indio_dev); + if (iio_buffer_enabled(indio_dev)) + at91_adc_buffer_postdisable(indio_dev); + /* * Do a sofware reset of the ADC before we go to suspend. * this will ensure that all pins are free from being muxed by the ADC @@ -1950,14 +1953,11 @@ static __maybe_unused int at91_adc_resume(struct device *dev) if (!iio_buffer_enabled(indio_dev)) return 0; - /* check if we are enabling triggered buffer or the touchscreen */ - if (at91_adc_current_chan_is_touch(indio_dev)) - return at91_adc_configure_touch(st, true); - else - return at91_adc_configure_trigger(st->trig, true); + ret = at91_adc_buffer_prepare(indio_dev); + if (ret) + goto vref_disable_resume; - /* not needed but more explicit */ - return 0; + return at91_adc_configure_trigger(st->trig, true); vref_disable_resume: regulator_disable(st->vref); From b9a0526cd02bccf98d38cd428c451acc40539989 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Fri, 15 Jul 2022 14:28:49 +0200 Subject: [PATCH 0188/1477] iio: inkern: only release the device node when done with it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 79c3e84874c7d14f04ad58313b64955a0d2e9437 ] 'of_node_put()' can potentially release the memory pointed to by 'iiospec.np' which would leave us with an invalid pointer (and we would still pass it in 'of_xlate()'). Note that it is not guaranteed for the of_node lifespan to be attached to the device (to which is attached) lifespan so that there is (even though very unlikely) the possibility for the node to be freed while the device is still around. Thus, as there are indeed some of_xlate users which do access the node, a race is indeed possible. As such, we can only release the node after we are done with it. Fixes: 17d82b47a215d ("iio: Add OF support") Signed-off-by: Nuno Sá Link: https://lore.kernel.org/r/20220715122903.332535-2-nuno.sa@analog.com Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/inkern.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/iio/inkern.c b/drivers/iio/inkern.c index 8c3faa797284..c32b2577dd99 100644 --- a/drivers/iio/inkern.c +++ b/drivers/iio/inkern.c @@ -136,9 +136,10 @@ static int __of_iio_channel_get(struct iio_channel *channel, idev = bus_find_device(&iio_bus_type, NULL, iiospec.np, iio_dev_node_match); - of_node_put(iiospec.np); - if (idev == NULL) + if (idev == NULL) { + of_node_put(iiospec.np); return -EPROBE_DEFER; + } indio_dev = dev_to_iio_dev(idev); channel->indio_dev = indio_dev; @@ -146,6 +147,7 @@ static int __of_iio_channel_get(struct iio_channel *channel, index = indio_dev->info->of_xlate(indio_dev, &iiospec); else index = __of_iio_simple_xlate(indio_dev, &iiospec); + of_node_put(iiospec.np); if (index < 0) goto err_put; channel->channel = &indio_dev->channels[index]; From aa7aada4b7b853303c5658b4b04b641bc9d64fa9 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 26 Jun 2022 13:29:23 +0100 Subject: [PATCH 0189/1477] iio: ABI: Fix wrong format of differential capacitance channel ABI. [ Upstream commit 1efc41035f1841acf0af2bab153158e27ce94f10 ] in_ only occurs once in these attributes. Fixes: 0baf29d658c7 ("staging:iio:documentation Add abi docs for capacitance adcs.") Signed-off-by: Jonathan Cameron Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220626122938.582107-3-jic23@kernel.org Signed-off-by: Sasha Levin --- Documentation/ABI/testing/sysfs-bus-iio | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/sysfs-bus-iio b/Documentation/ABI/testing/sysfs-bus-iio index df42bed09f25..53f07fc41b96 100644 --- a/Documentation/ABI/testing/sysfs-bus-iio +++ b/Documentation/ABI/testing/sysfs-bus-iio @@ -142,7 +142,7 @@ Description: Raw capacitance measurement from channel Y. Units after application of scale and offset are nanofarads. -What: /sys/.../iio:deviceX/in_capacitanceY-in_capacitanceZ_raw +What: /sys/.../iio:deviceX/in_capacitanceY-capacitanceZ_raw KernelVersion: 3.2 Contact: linux-iio@vger.kernel.org Description: From ffffb159e1e52907e3b4a11de7c78ae3bcd3f247 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Wed, 13 Jan 2021 18:52:46 -0800 Subject: [PATCH 0190/1477] usb: ch9: Add USB 3.2 SSP attributes [ Upstream commit f2fc9ff28d1c9bef7760516feadd38164044caae ] In preparation for USB 3.2 dual-lane support, add sublink speed attribute macros and enum usb_ssp_rate. A USB device that operates in SuperSpeed Plus may operate at different speed and lane count. These additional macros and enum values help specifying that. Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/ae9293ebd63a29f2a2035054753534d9eb123d74.1610592135.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman Stable-dep-of: b6155eaf6b05 ("usb: common: debug: Check non-standard control requests") Signed-off-by: Sasha Levin --- include/linux/usb/ch9.h | 9 +++++++++ include/uapi/linux/usb/ch9.h | 13 +++++++++++++ 2 files changed, 22 insertions(+) diff --git a/include/linux/usb/ch9.h b/include/linux/usb/ch9.h index 604c6c514a50..86c50907634e 100644 --- a/include/linux/usb/ch9.h +++ b/include/linux/usb/ch9.h @@ -36,6 +36,15 @@ #include #include +/* USB 3.2 SuperSpeed Plus phy signaling rate generation and lane count */ + +enum usb_ssp_rate { + USB_SSP_GEN_UNKNOWN = 0, + USB_SSP_GEN_2x1, + USB_SSP_GEN_1x2, + USB_SSP_GEN_2x2, +}; + /** * usb_ep_type_string() - Returns human readable-name of the endpoint type. * @ep_type: The endpoint type to return human-readable name for. If it's not diff --git a/include/uapi/linux/usb/ch9.h b/include/uapi/linux/usb/ch9.h index 0f865ae4ba89..17ce56198c9a 100644 --- a/include/uapi/linux/usb/ch9.h +++ b/include/uapi/linux/usb/ch9.h @@ -968,9 +968,22 @@ struct usb_ssp_cap_descriptor { __le32 bmSublinkSpeedAttr[1]; /* list of sublink speed attrib entries */ #define USB_SSP_SUBLINK_SPEED_SSID (0xf) /* sublink speed ID */ #define USB_SSP_SUBLINK_SPEED_LSE (0x3 << 4) /* Lanespeed exponent */ +#define USB_SSP_SUBLINK_SPEED_LSE_BPS 0 +#define USB_SSP_SUBLINK_SPEED_LSE_KBPS 1 +#define USB_SSP_SUBLINK_SPEED_LSE_MBPS 2 +#define USB_SSP_SUBLINK_SPEED_LSE_GBPS 3 + #define USB_SSP_SUBLINK_SPEED_ST (0x3 << 6) /* Sublink type */ +#define USB_SSP_SUBLINK_SPEED_ST_SYM_RX 0 +#define USB_SSP_SUBLINK_SPEED_ST_ASYM_RX 1 +#define USB_SSP_SUBLINK_SPEED_ST_SYM_TX 2 +#define USB_SSP_SUBLINK_SPEED_ST_ASYM_TX 3 + #define USB_SSP_SUBLINK_SPEED_RSVD (0x3f << 8) /* Reserved */ #define USB_SSP_SUBLINK_SPEED_LP (0x3 << 14) /* Link protocol */ +#define USB_SSP_SUBLINK_SPEED_LP_SS 0 +#define USB_SSP_SUBLINK_SPEED_LP_SSP 1 + #define USB_SSP_SUBLINK_SPEED_LSM (0xff << 16) /* Lanespeed mantissa */ } __attribute__((packed)); From c1ef8c66a362979f5c48dd01b808ccecd5497fe4 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Tue, 19 Jan 2021 17:36:14 -0800 Subject: [PATCH 0191/1477] usb: common: Parse for USB SSP genXxY [ Upstream commit 52c2d15703c3a900d5f78cd599b823db40d5100b ] The USB "maximum-speed" property can now take the SSP signaling rate generation and lane count with these new strings: "super-speed-plus-gen2x2" "super-speed-plus-gen2x1" "super-speed-plus-gen1x2" Introduce usb_get_maximum_ssp_rate() to parse for the corresponding usb_ssp_rate enum. The original usb_get_maximum_speed() will return USB_SPEED_SUPER_PLUS if it matches one of these new strings. Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/f8ed896313d8cd8e2d2b540fc82db92b3ddf8a47.1611106162.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman Stable-dep-of: b6155eaf6b05 ("usb: common: debug: Check non-standard control requests") Signed-off-by: Sasha Levin --- drivers/usb/common/common.c | 26 +++++++++++++++++++++++++- include/linux/usb/ch9.h | 11 +++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/drivers/usb/common/common.c b/drivers/usb/common/common.c index 1433260d99b4..fc21cf2d36f6 100644 --- a/drivers/usb/common/common.c +++ b/drivers/usb/common/common.c @@ -69,6 +69,13 @@ static const char *const speed_names[] = { [USB_SPEED_SUPER_PLUS] = "super-speed-plus", }; +static const char *const ssp_rate[] = { + [USB_SSP_GEN_UNKNOWN] = "UNKNOWN", + [USB_SSP_GEN_2x1] = "super-speed-plus-gen2x1", + [USB_SSP_GEN_1x2] = "super-speed-plus-gen1x2", + [USB_SSP_GEN_2x2] = "super-speed-plus-gen2x2", +}; + const char *usb_speed_string(enum usb_device_speed speed) { if (speed < 0 || speed >= ARRAY_SIZE(speed_names)) @@ -86,12 +93,29 @@ enum usb_device_speed usb_get_maximum_speed(struct device *dev) if (ret < 0) return USB_SPEED_UNKNOWN; - ret = match_string(speed_names, ARRAY_SIZE(speed_names), maximum_speed); + ret = match_string(ssp_rate, ARRAY_SIZE(ssp_rate), maximum_speed); + if (ret > 0) + return USB_SPEED_SUPER_PLUS; + ret = match_string(speed_names, ARRAY_SIZE(speed_names), maximum_speed); return (ret < 0) ? USB_SPEED_UNKNOWN : ret; } EXPORT_SYMBOL_GPL(usb_get_maximum_speed); +enum usb_ssp_rate usb_get_maximum_ssp_rate(struct device *dev) +{ + const char *maximum_speed; + int ret; + + ret = device_property_read_string(dev, "maximum-speed", &maximum_speed); + if (ret < 0) + return USB_SSP_GEN_UNKNOWN; + + ret = match_string(ssp_rate, ARRAY_SIZE(ssp_rate), maximum_speed); + return (ret < 0) ? USB_SSP_GEN_UNKNOWN : ret; +} +EXPORT_SYMBOL_GPL(usb_get_maximum_ssp_rate); + const char *usb_state_string(enum usb_device_state state) { static const char *const names[] = { diff --git a/include/linux/usb/ch9.h b/include/linux/usb/ch9.h index 86c50907634e..abdd310c77f0 100644 --- a/include/linux/usb/ch9.h +++ b/include/linux/usb/ch9.h @@ -71,6 +71,17 @@ extern const char *usb_speed_string(enum usb_device_speed speed); */ extern enum usb_device_speed usb_get_maximum_speed(struct device *dev); +/** + * usb_get_maximum_ssp_rate - Get the signaling rate generation and lane count + * of a SuperSpeed Plus capable device. + * @dev: Pointer to the given USB controller device + * + * If the string from "maximum-speed" property is super-speed-plus-genXxY where + * 'X' is the generation number and 'Y' is the number of lanes, then this + * function returns the corresponding enum usb_ssp_rate. + */ +extern enum usb_ssp_rate usb_get_maximum_ssp_rate(struct device *dev); + /** * usb_state_string - Returns human readable name for the state. * @state: The state to return a human-readable name for. If it's not From 20b63631a38a94a7542b570cfee005ff4c0df911 Mon Sep 17 00:00:00 2001 From: Chunfeng Yun Date: Mon, 8 Mar 2021 10:52:05 +0800 Subject: [PATCH 0192/1477] usb: common: add function to get interval expressed in us unit [ Upstream commit fb95c7cf5600b7b74412f27dfb39a1e13fd8a90d ] Add a new function to convert bInterval into the time expressed in 1us unit. Signed-off-by: Chunfeng Yun Link: https://lore.kernel.org/r/25c8a09b055f716c1e5bf11fea72c3418f844482.1615170625.git.chunfeng.yun@mediatek.com Signed-off-by: Greg Kroah-Hartman Stable-dep-of: b6155eaf6b05 ("usb: common: debug: Check non-standard control requests") Signed-off-by: Sasha Levin --- drivers/usb/common/common.c | 41 +++++++++++++++++++++++++++++++++++++ drivers/usb/core/devices.c | 21 ++++--------------- drivers/usb/core/endpoint.c | 35 ++++--------------------------- include/linux/usb/ch9.h | 3 +++ 4 files changed, 52 insertions(+), 48 deletions(-) diff --git a/drivers/usb/common/common.c b/drivers/usb/common/common.c index fc21cf2d36f6..675e8a4e683a 100644 --- a/drivers/usb/common/common.c +++ b/drivers/usb/common/common.c @@ -165,6 +165,47 @@ enum usb_dr_mode usb_get_dr_mode(struct device *dev) } EXPORT_SYMBOL_GPL(usb_get_dr_mode); +/** + * usb_decode_interval - Decode bInterval into the time expressed in 1us unit + * @epd: The descriptor of the endpoint + * @speed: The speed that the endpoint works as + * + * Function returns the interval expressed in 1us unit for servicing + * endpoint for data transfers. + */ +unsigned int usb_decode_interval(const struct usb_endpoint_descriptor *epd, + enum usb_device_speed speed) +{ + unsigned int interval = 0; + + switch (usb_endpoint_type(epd)) { + case USB_ENDPOINT_XFER_CONTROL: + /* uframes per NAK */ + if (speed == USB_SPEED_HIGH) + interval = epd->bInterval; + break; + case USB_ENDPOINT_XFER_ISOC: + interval = 1 << (epd->bInterval - 1); + break; + case USB_ENDPOINT_XFER_BULK: + /* uframes per NAK */ + if (speed == USB_SPEED_HIGH && usb_endpoint_dir_out(epd)) + interval = epd->bInterval; + break; + case USB_ENDPOINT_XFER_INT: + if (speed >= USB_SPEED_HIGH) + interval = 1 << (epd->bInterval - 1); + else + interval = epd->bInterval; + break; + } + + interval *= (speed >= USB_SPEED_HIGH) ? 125 : 1000; + + return interval; +} +EXPORT_SYMBOL_GPL(usb_decode_interval); + #ifdef CONFIG_OF /** * of_usb_get_dr_mode_by_phy - Get dual role mode for the controller device diff --git a/drivers/usb/core/devices.c b/drivers/usb/core/devices.c index 1ef2de6e375a..d8b0041de612 100644 --- a/drivers/usb/core/devices.c +++ b/drivers/usb/core/devices.c @@ -157,38 +157,25 @@ static char *usb_dump_endpoint_descriptor(int speed, char *start, char *end, switch (usb_endpoint_type(desc)) { case USB_ENDPOINT_XFER_CONTROL: type = "Ctrl"; - if (speed == USB_SPEED_HIGH) /* uframes per NAK */ - interval = desc->bInterval; - else - interval = 0; dir = 'B'; /* ctrl is bidirectional */ break; case USB_ENDPOINT_XFER_ISOC: type = "Isoc"; - interval = 1 << (desc->bInterval - 1); break; case USB_ENDPOINT_XFER_BULK: type = "Bulk"; - if (speed == USB_SPEED_HIGH && dir == 'O') /* uframes per NAK */ - interval = desc->bInterval; - else - interval = 0; break; case USB_ENDPOINT_XFER_INT: type = "Int."; - if (speed == USB_SPEED_HIGH || speed >= USB_SPEED_SUPER) - interval = 1 << (desc->bInterval - 1); - else - interval = desc->bInterval; break; default: /* "can't happen" */ return start; } - interval *= (speed == USB_SPEED_HIGH || - speed >= USB_SPEED_SUPER) ? 125 : 1000; - if (interval % 1000) + + interval = usb_decode_interval(desc, speed); + if (interval % 1000) { unit = 'u'; - else { + } else { unit = 'm'; interval /= 1000; } diff --git a/drivers/usb/core/endpoint.c b/drivers/usb/core/endpoint.c index 1c2c04079676..fc3341f2bb61 100644 --- a/drivers/usb/core/endpoint.c +++ b/drivers/usb/core/endpoint.c @@ -84,40 +84,13 @@ static ssize_t interval_show(struct device *dev, struct device_attribute *attr, char *buf) { struct ep_device *ep = to_ep_device(dev); + unsigned int interval; char unit; - unsigned interval = 0; - unsigned in; - in = (ep->desc->bEndpointAddress & USB_DIR_IN); - - switch (usb_endpoint_type(ep->desc)) { - case USB_ENDPOINT_XFER_CONTROL: - if (ep->udev->speed == USB_SPEED_HIGH) - /* uframes per NAK */ - interval = ep->desc->bInterval; - break; - - case USB_ENDPOINT_XFER_ISOC: - interval = 1 << (ep->desc->bInterval - 1); - break; - - case USB_ENDPOINT_XFER_BULK: - if (ep->udev->speed == USB_SPEED_HIGH && !in) - /* uframes per NAK */ - interval = ep->desc->bInterval; - break; - - case USB_ENDPOINT_XFER_INT: - if (ep->udev->speed == USB_SPEED_HIGH) - interval = 1 << (ep->desc->bInterval - 1); - else - interval = ep->desc->bInterval; - break; - } - interval *= (ep->udev->speed == USB_SPEED_HIGH) ? 125 : 1000; - if (interval % 1000) + interval = usb_decode_interval(ep->desc, ep->udev->speed); + if (interval % 1000) { unit = 'u'; - else { + } else { unit = 'm'; interval /= 1000; } diff --git a/include/linux/usb/ch9.h b/include/linux/usb/ch9.h index abdd310c77f0..74debc824645 100644 --- a/include/linux/usb/ch9.h +++ b/include/linux/usb/ch9.h @@ -90,6 +90,9 @@ extern enum usb_ssp_rate usb_get_maximum_ssp_rate(struct device *dev); */ extern const char *usb_state_string(enum usb_device_state state); +unsigned int usb_decode_interval(const struct usb_endpoint_descriptor *epd, + enum usb_device_speed speed); + #ifdef CONFIG_TRACING /** * usb_decode_ctrl - Returns human readable representation of control request. From 9d965a22f65716dc452fd682a89c58db9235ec61 Mon Sep 17 00:00:00 2001 From: Chunfeng Yun Date: Mon, 8 Mar 2021 10:52:07 +0800 Subject: [PATCH 0193/1477] usb: common: move function's kerneldoc next to its definition [ Upstream commit 365038f24b3e9d2b7c9e499f03f432040e28a35c ] Following a general rule, add the kerneldoc for a function next to it's definition, but not next to its declaration in a header file. Suggested-by: Alan Stern Suggested-by: Greg Kroah-Hartman Signed-off-by: Chunfeng Yun Link: https://lore.kernel.org/r/c4d2e010ae2bf67cdfa0b55e6d1deb9339d9d3dc.1615170625.git.chunfeng.yun@mediatek.com Signed-off-by: Greg Kroah-Hartman Stable-dep-of: b6155eaf6b05 ("usb: common: debug: Check non-standard control requests") Signed-off-by: Sasha Levin --- drivers/usb/common/common.c | 35 +++++++++++++++++++++ drivers/usb/common/debug.c | 22 +++++++++++-- include/linux/usb/ch9.h | 61 ------------------------------------- 3 files changed, 55 insertions(+), 63 deletions(-) diff --git a/drivers/usb/common/common.c b/drivers/usb/common/common.c index 675e8a4e683a..347fb3d3894a 100644 --- a/drivers/usb/common/common.c +++ b/drivers/usb/common/common.c @@ -25,6 +25,12 @@ static const char *const ep_type_names[] = { [USB_ENDPOINT_XFER_INT] = "intr", }; +/** + * usb_ep_type_string() - Returns human readable-name of the endpoint type. + * @ep_type: The endpoint type to return human-readable name for. If it's not + * any of the types: USB_ENDPOINT_XFER_{CONTROL, ISOC, BULK, INT}, + * usually got by usb_endpoint_type(), the string 'unknown' will be returned. + */ const char *usb_ep_type_string(int ep_type) { if (ep_type < 0 || ep_type >= ARRAY_SIZE(ep_type_names)) @@ -76,6 +82,12 @@ static const char *const ssp_rate[] = { [USB_SSP_GEN_2x2] = "super-speed-plus-gen2x2", }; +/** + * usb_speed_string() - Returns human readable-name of the speed. + * @speed: The speed to return human-readable name for. If it's not + * any of the speeds defined in usb_device_speed enum, string for + * USB_SPEED_UNKNOWN will be returned. + */ const char *usb_speed_string(enum usb_device_speed speed) { if (speed < 0 || speed >= ARRAY_SIZE(speed_names)) @@ -84,6 +96,14 @@ const char *usb_speed_string(enum usb_device_speed speed) } EXPORT_SYMBOL_GPL(usb_speed_string); +/** + * usb_get_maximum_speed - Get maximum requested speed for a given USB + * controller. + * @dev: Pointer to the given USB controller device + * + * The function gets the maximum speed string from property "maximum-speed", + * and returns the corresponding enum usb_device_speed. + */ enum usb_device_speed usb_get_maximum_speed(struct device *dev) { const char *maximum_speed; @@ -102,6 +122,15 @@ enum usb_device_speed usb_get_maximum_speed(struct device *dev) } EXPORT_SYMBOL_GPL(usb_get_maximum_speed); +/** + * usb_get_maximum_ssp_rate - Get the signaling rate generation and lane count + * of a SuperSpeed Plus capable device. + * @dev: Pointer to the given USB controller device + * + * If the string from "maximum-speed" property is super-speed-plus-genXxY where + * 'X' is the generation number and 'Y' is the number of lanes, then this + * function returns the corresponding enum usb_ssp_rate. + */ enum usb_ssp_rate usb_get_maximum_ssp_rate(struct device *dev) { const char *maximum_speed; @@ -116,6 +145,12 @@ enum usb_ssp_rate usb_get_maximum_ssp_rate(struct device *dev) } EXPORT_SYMBOL_GPL(usb_get_maximum_ssp_rate); +/** + * usb_state_string - Returns human readable name for the state. + * @state: The state to return a human-readable name for. If it's not + * any of the states devices in usb_device_state_string enum, + * the string UNKNOWN will be returned. + */ const char *usb_state_string(enum usb_device_state state) { static const char *const names[] = { diff --git a/drivers/usb/common/debug.c b/drivers/usb/common/debug.c index ba849c7bc5c7..a76a086b9c54 100644 --- a/drivers/usb/common/debug.c +++ b/drivers/usb/common/debug.c @@ -207,8 +207,26 @@ static void usb_decode_set_isoch_delay(__u8 wValue, char *str, size_t size) snprintf(str, size, "Set Isochronous Delay(Delay = %d ns)", wValue); } -/* - * usb_decode_ctrl - returns a string representation of ctrl request +/** + * usb_decode_ctrl - Returns human readable representation of control request. + * @str: buffer to return a human-readable representation of control request. + * This buffer should have about 200 bytes. + * @size: size of str buffer. + * @bRequestType: matches the USB bmRequestType field + * @bRequest: matches the USB bRequest field + * @wValue: matches the USB wValue field (CPU byte order) + * @wIndex: matches the USB wIndex field (CPU byte order) + * @wLength: matches the USB wLength field (CPU byte order) + * + * Function returns decoded, formatted and human-readable description of + * control request packet. + * + * The usage scenario for this is for tracepoints, so function as a return + * use the same value as in parameters. This approach allows to use this + * function in TP_printk + * + * Important: wValue, wIndex, wLength parameters before invoking this function + * should be processed by le16_to_cpu macro. */ const char *usb_decode_ctrl(char *str, size_t size, __u8 bRequestType, __u8 bRequest, __u16 wValue, __u16 wIndex, diff --git a/include/linux/usb/ch9.h b/include/linux/usb/ch9.h index 74debc824645..1cffa34740b0 100644 --- a/include/linux/usb/ch9.h +++ b/include/linux/usb/ch9.h @@ -45,76 +45,15 @@ enum usb_ssp_rate { USB_SSP_GEN_2x2, }; -/** - * usb_ep_type_string() - Returns human readable-name of the endpoint type. - * @ep_type: The endpoint type to return human-readable name for. If it's not - * any of the types: USB_ENDPOINT_XFER_{CONTROL, ISOC, BULK, INT}, - * usually got by usb_endpoint_type(), the string 'unknown' will be returned. - */ extern const char *usb_ep_type_string(int ep_type); - -/** - * usb_speed_string() - Returns human readable-name of the speed. - * @speed: The speed to return human-readable name for. If it's not - * any of the speeds defined in usb_device_speed enum, string for - * USB_SPEED_UNKNOWN will be returned. - */ extern const char *usb_speed_string(enum usb_device_speed speed); - -/** - * usb_get_maximum_speed - Get maximum requested speed for a given USB - * controller. - * @dev: Pointer to the given USB controller device - * - * The function gets the maximum speed string from property "maximum-speed", - * and returns the corresponding enum usb_device_speed. - */ extern enum usb_device_speed usb_get_maximum_speed(struct device *dev); - -/** - * usb_get_maximum_ssp_rate - Get the signaling rate generation and lane count - * of a SuperSpeed Plus capable device. - * @dev: Pointer to the given USB controller device - * - * If the string from "maximum-speed" property is super-speed-plus-genXxY where - * 'X' is the generation number and 'Y' is the number of lanes, then this - * function returns the corresponding enum usb_ssp_rate. - */ extern enum usb_ssp_rate usb_get_maximum_ssp_rate(struct device *dev); - -/** - * usb_state_string - Returns human readable name for the state. - * @state: The state to return a human-readable name for. If it's not - * any of the states devices in usb_device_state_string enum, - * the string UNKNOWN will be returned. - */ extern const char *usb_state_string(enum usb_device_state state); - unsigned int usb_decode_interval(const struct usb_endpoint_descriptor *epd, enum usb_device_speed speed); #ifdef CONFIG_TRACING -/** - * usb_decode_ctrl - Returns human readable representation of control request. - * @str: buffer to return a human-readable representation of control request. - * This buffer should have about 200 bytes. - * @size: size of str buffer. - * @bRequestType: matches the USB bmRequestType field - * @bRequest: matches the USB bRequest field - * @wValue: matches the USB wValue field (CPU byte order) - * @wIndex: matches the USB wIndex field (CPU byte order) - * @wLength: matches the USB wLength field (CPU byte order) - * - * Function returns decoded, formatted and human-readable description of - * control request packet. - * - * The usage scenario for this is for tracepoints, so function as a return - * use the same value as in parameters. This approach allows to use this - * function in TP_printk - * - * Important: wValue, wIndex, wLength parameters before invoking this function - * should be processed by le16_to_cpu macro. - */ extern const char *usb_decode_ctrl(char *str, size_t size, __u8 bRequestType, __u8 bRequest, __u16 wValue, __u16 wIndex, __u16 wLength); From beec2f02555c7e9d47fd455f40d7bf7c46fb4b8d Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Wed, 27 Jul 2022 18:38:01 -0700 Subject: [PATCH 0194/1477] usb: common: debug: Check non-standard control requests [ Upstream commit b6155eaf6b05e558218b44b88a6cad03f15a586c ] Previously usb_decode_ctrl() only decodes standard control requests, but it was used for non-standard requests also. If it's non-standard or unknown standard bRequest, print the Setup data values. Fixes: af32423a2d86 ("usb: dwc3: trace: decode ctrl request") Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/8d6a30f2f2f953eff833a5bc5aac640a4cc2fc9f.1658971571.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/common/debug.c | 96 +++++++++++++++++++++++++------------- 1 file changed, 64 insertions(+), 32 deletions(-) diff --git a/drivers/usb/common/debug.c b/drivers/usb/common/debug.c index a76a086b9c54..f0c0e8db7038 100644 --- a/drivers/usb/common/debug.c +++ b/drivers/usb/common/debug.c @@ -207,30 +207,28 @@ static void usb_decode_set_isoch_delay(__u8 wValue, char *str, size_t size) snprintf(str, size, "Set Isochronous Delay(Delay = %d ns)", wValue); } -/** - * usb_decode_ctrl - Returns human readable representation of control request. - * @str: buffer to return a human-readable representation of control request. - * This buffer should have about 200 bytes. - * @size: size of str buffer. - * @bRequestType: matches the USB bmRequestType field - * @bRequest: matches the USB bRequest field - * @wValue: matches the USB wValue field (CPU byte order) - * @wIndex: matches the USB wIndex field (CPU byte order) - * @wLength: matches the USB wLength field (CPU byte order) - * - * Function returns decoded, formatted and human-readable description of - * control request packet. - * - * The usage scenario for this is for tracepoints, so function as a return - * use the same value as in parameters. This approach allows to use this - * function in TP_printk - * - * Important: wValue, wIndex, wLength parameters before invoking this function - * should be processed by le16_to_cpu macro. - */ -const char *usb_decode_ctrl(char *str, size_t size, __u8 bRequestType, - __u8 bRequest, __u16 wValue, __u16 wIndex, - __u16 wLength) +static void usb_decode_ctrl_generic(char *str, size_t size, __u8 bRequestType, + __u8 bRequest, __u16 wValue, __u16 wIndex, + __u16 wLength) +{ + u8 recip = bRequestType & USB_RECIP_MASK; + u8 type = bRequestType & USB_TYPE_MASK; + + snprintf(str, size, + "Type=%s Recipient=%s Dir=%s bRequest=%u wValue=%u wIndex=%u wLength=%u", + (type == USB_TYPE_STANDARD) ? "Standard" : + (type == USB_TYPE_VENDOR) ? "Vendor" : + (type == USB_TYPE_CLASS) ? "Class" : "Unknown", + (recip == USB_RECIP_DEVICE) ? "Device" : + (recip == USB_RECIP_INTERFACE) ? "Interface" : + (recip == USB_RECIP_ENDPOINT) ? "Endpoint" : "Unknown", + (bRequestType & USB_DIR_IN) ? "IN" : "OUT", + bRequest, wValue, wIndex, wLength); +} + +static void usb_decode_ctrl_standard(char *str, size_t size, __u8 bRequestType, + __u8 bRequest, __u16 wValue, __u16 wIndex, + __u16 wLength) { switch (bRequest) { case USB_REQ_GET_STATUS: @@ -271,14 +269,48 @@ const char *usb_decode_ctrl(char *str, size_t size, __u8 bRequestType, usb_decode_set_isoch_delay(wValue, str, size); break; default: - snprintf(str, size, "%02x %02x %02x %02x %02x %02x %02x %02x", - bRequestType, bRequest, - (u8)(cpu_to_le16(wValue) & 0xff), - (u8)(cpu_to_le16(wValue) >> 8), - (u8)(cpu_to_le16(wIndex) & 0xff), - (u8)(cpu_to_le16(wIndex) >> 8), - (u8)(cpu_to_le16(wLength) & 0xff), - (u8)(cpu_to_le16(wLength) >> 8)); + usb_decode_ctrl_generic(str, size, bRequestType, bRequest, + wValue, wIndex, wLength); + break; + } +} + +/** + * usb_decode_ctrl - Returns human readable representation of control request. + * @str: buffer to return a human-readable representation of control request. + * This buffer should have about 200 bytes. + * @size: size of str buffer. + * @bRequestType: matches the USB bmRequestType field + * @bRequest: matches the USB bRequest field + * @wValue: matches the USB wValue field (CPU byte order) + * @wIndex: matches the USB wIndex field (CPU byte order) + * @wLength: matches the USB wLength field (CPU byte order) + * + * Function returns decoded, formatted and human-readable description of + * control request packet. + * + * The usage scenario for this is for tracepoints, so function as a return + * use the same value as in parameters. This approach allows to use this + * function in TP_printk + * + * Important: wValue, wIndex, wLength parameters before invoking this function + * should be processed by le16_to_cpu macro. + */ +const char *usb_decode_ctrl(char *str, size_t size, __u8 bRequestType, + __u8 bRequest, __u16 wValue, __u16 wIndex, + __u16 wLength) +{ + switch (bRequestType & USB_TYPE_MASK) { + case USB_TYPE_STANDARD: + usb_decode_ctrl_standard(str, size, bRequestType, bRequest, + wValue, wIndex, wLength); + break; + case USB_TYPE_VENDOR: + case USB_TYPE_CLASS: + default: + usb_decode_ctrl_generic(str, size, bRequestType, bRequest, + wValue, wIndex, wLength); + break; } return str; From b95f4f905461731c6ee825225b3b47e2dc270be9 Mon Sep 17 00:00:00 2001 From: Liang He Date: Tue, 28 Jun 2022 22:10:38 +0800 Subject: [PATCH 0195/1477] clk: meson: Hold reference returned by of_get_parent() [ Upstream commit 89ab396d712f7c91fe94f55cff23460426f5fc81 ] We should hold the reference returned by of_get_parent() and use it to call of_node_put() for refcount balance. Fixes: 88e2da81241e ("clk: meson: aoclk: refactor common code into dedicated file") Fixes: 6682bd4d443f ("clk: meson: factorise meson64 peripheral clock controller drivers") Fixes: bb6eddd1d28c ("clk: meson: meson8b: use the HHI syscon if available") Signed-off-by: Liang He Link: https://lore.kernel.org/r/20220628141038.168383-1-windhl@126.com Reviewed-by: Neil Armstrong Reviewed-by: Martin Blumenstingl Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/meson/meson-aoclk.c | 5 ++++- drivers/clk/meson/meson-eeclk.c | 5 ++++- drivers/clk/meson/meson8b.c | 5 ++++- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/clk/meson/meson-aoclk.c b/drivers/clk/meson/meson-aoclk.c index 3a6d84cd6601..67d8a0d30221 100644 --- a/drivers/clk/meson/meson-aoclk.c +++ b/drivers/clk/meson/meson-aoclk.c @@ -36,6 +36,7 @@ int meson_aoclkc_probe(struct platform_device *pdev) struct meson_aoclk_reset_controller *rstc; struct meson_aoclk_data *data; struct device *dev = &pdev->dev; + struct device_node *np; struct regmap *regmap; int ret, clkid; @@ -47,7 +48,9 @@ int meson_aoclkc_probe(struct platform_device *pdev) if (!rstc) return -ENOMEM; - regmap = syscon_node_to_regmap(of_get_parent(dev->of_node)); + np = of_get_parent(dev->of_node); + regmap = syscon_node_to_regmap(np); + of_node_put(np); if (IS_ERR(regmap)) { dev_err(dev, "failed to get regmap\n"); return PTR_ERR(regmap); diff --git a/drivers/clk/meson/meson-eeclk.c b/drivers/clk/meson/meson-eeclk.c index a7cb1e7aedc4..18ae38787268 100644 --- a/drivers/clk/meson/meson-eeclk.c +++ b/drivers/clk/meson/meson-eeclk.c @@ -17,6 +17,7 @@ int meson_eeclkc_probe(struct platform_device *pdev) { const struct meson_eeclkc_data *data; struct device *dev = &pdev->dev; + struct device_node *np; struct regmap *map; int ret, i; @@ -25,7 +26,9 @@ int meson_eeclkc_probe(struct platform_device *pdev) return -EINVAL; /* Get the hhi system controller node */ - map = syscon_node_to_regmap(of_get_parent(dev->of_node)); + np = of_get_parent(dev->of_node); + map = syscon_node_to_regmap(np); + of_node_put(np); if (IS_ERR(map)) { dev_err(dev, "failed to get HHI regmap\n"); diff --git a/drivers/clk/meson/meson8b.c b/drivers/clk/meson/meson8b.c index 862f0756b50f..1da9d212f8b7 100644 --- a/drivers/clk/meson/meson8b.c +++ b/drivers/clk/meson/meson8b.c @@ -3735,13 +3735,16 @@ static void __init meson8b_clkc_init_common(struct device_node *np, struct clk_hw_onecell_data *clk_hw_onecell_data) { struct meson8b_clk_reset *rstc; + struct device_node *parent_np; const char *notifier_clk_name; struct clk *notifier_clk; void __iomem *clk_base; struct regmap *map; int i, ret; - map = syscon_node_to_regmap(of_get_parent(np)); + parent_np = of_get_parent(np); + map = syscon_node_to_regmap(parent_np); + of_node_put(parent_np); if (IS_ERR(map)) { pr_info("failed to get HHI regmap - Trying obsolete regs\n"); From baadc6f58fa8a1b24bc463bf3431ab165fb636ef Mon Sep 17 00:00:00 2001 From: Liang He Date: Tue, 28 Jun 2022 22:31:55 +0800 Subject: [PATCH 0196/1477] clk: oxnas: Hold reference returned by of_get_parent() [ Upstream commit 1d6aa08c54cd0e005210ab8e3b1e92ede70f8a4f ] In oxnas_stdclk_probe(), we need to hold the reference returned by of_get_parent() and use it to call of_node_put() for refcount balance. Fixes: 0bbd72b4c64f ("clk: Add Oxford Semiconductor OXNAS Standard Clocks") Signed-off-by: Liang He Link: https://lore.kernel.org/r/20220628143155.170550-1-windhl@126.com Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/clk-oxnas.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/clk/clk-oxnas.c b/drivers/clk/clk-oxnas.c index 78d5ea669fea..2fe36f579ac5 100644 --- a/drivers/clk/clk-oxnas.c +++ b/drivers/clk/clk-oxnas.c @@ -207,7 +207,7 @@ static const struct of_device_id oxnas_stdclk_dt_ids[] = { static int oxnas_stdclk_probe(struct platform_device *pdev) { - struct device_node *np = pdev->dev.of_node; + struct device_node *np = pdev->dev.of_node, *parent_np; const struct oxnas_stdclk_data *data; const struct of_device_id *id; struct regmap *regmap; @@ -219,7 +219,9 @@ static int oxnas_stdclk_probe(struct platform_device *pdev) return -ENODEV; data = id->data; - regmap = syscon_node_to_regmap(of_get_parent(np)); + parent_np = of_get_parent(np); + regmap = syscon_node_to_regmap(parent_np); + of_node_put(parent_np); if (IS_ERR(regmap)) { dev_err(&pdev->dev, "failed to have parent regmap\n"); return PTR_ERR(regmap); From dc190b46c63f9018cb4ed7155eaade9f1ba4139e Mon Sep 17 00:00:00 2001 From: Liang He Date: Tue, 28 Jun 2022 22:38:51 +0800 Subject: [PATCH 0197/1477] clk: qoriq: Hold reference returned by of_get_parent() [ Upstream commit a8ea4273bc26256ce3cce83164f0f51c5bf6e127 ] In legacy_init_clockgen(), we need to hold the reference returned by of_get_parent() and use it to call of_node_put() for refcount balance. Beside, in create_sysclk(), we need to call of_node_put() on 'sysclk' also for refcount balance. Fixes: 0dfc86b3173f ("clk: qoriq: Move chip-specific knowledge into driver") Signed-off-by: Liang He Link: https://lore.kernel.org/r/20220628143851.171299-1-windhl@126.com Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/clk-qoriq.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/clk/clk-qoriq.c b/drivers/clk/clk-qoriq.c index 46101c6a20f2..585b9ac11881 100644 --- a/drivers/clk/clk-qoriq.c +++ b/drivers/clk/clk-qoriq.c @@ -1038,8 +1038,13 @@ static void __init _clockgen_init(struct device_node *np, bool legacy); */ static void __init legacy_init_clockgen(struct device_node *np) { - if (!clockgen.node) - _clockgen_init(of_get_parent(np), true); + if (!clockgen.node) { + struct device_node *parent_np; + + parent_np = of_get_parent(np); + _clockgen_init(parent_np, true); + of_node_put(parent_np); + } } /* Legacy node */ @@ -1134,6 +1139,7 @@ static struct clk * __init create_sysclk(const char *name) sysclk = of_get_child_by_name(clockgen.node, "sysclk"); if (sysclk) { clk = sysclk_from_fixed(sysclk, name); + of_node_put(sysclk); if (!IS_ERR(clk)) return clk; } From 57141b1dd689caede9dc5ecf054fe314cda0d344 Mon Sep 17 00:00:00 2001 From: Liang He Date: Fri, 8 Jul 2022 16:49:00 +0800 Subject: [PATCH 0198/1477] clk: berlin: Add of_node_put() for of_get_parent() [ Upstream commit 37c381b812dcbfde9c3f1f3d3e75fdfc1b40d5bc ] In berlin2_clock_setup() and berlin2q_clock_setup(), we need to call of_node_put() for the reference returned by of_get_parent() which has increased the refcount. We should call *_put() in fail path or when it is not used anymore. Fixes: 26b3b6b959b2 ("clk: berlin: prepare simple-mfd conversion") Signed-off-by: Liang He Link: https://lore.kernel.org/r/20220708084900.311684-1-windhl@126.com Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/berlin/bg2.c | 5 ++++- drivers/clk/berlin/bg2q.c | 6 +++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/clk/berlin/bg2.c b/drivers/clk/berlin/bg2.c index bccdfa00fd37..67a9edbba29c 100644 --- a/drivers/clk/berlin/bg2.c +++ b/drivers/clk/berlin/bg2.c @@ -500,12 +500,15 @@ static void __init berlin2_clock_setup(struct device_node *np) int n, ret; clk_data = kzalloc(struct_size(clk_data, hws, MAX_CLKS), GFP_KERNEL); - if (!clk_data) + if (!clk_data) { + of_node_put(parent_np); return; + } clk_data->num = MAX_CLKS; hws = clk_data->hws; gbase = of_iomap(parent_np, 0); + of_node_put(parent_np); if (!gbase) return; diff --git a/drivers/clk/berlin/bg2q.c b/drivers/clk/berlin/bg2q.c index e9518d35f262..dd2784bb75b6 100644 --- a/drivers/clk/berlin/bg2q.c +++ b/drivers/clk/berlin/bg2q.c @@ -286,19 +286,23 @@ static void __init berlin2q_clock_setup(struct device_node *np) int n, ret; clk_data = kzalloc(struct_size(clk_data, hws, MAX_CLKS), GFP_KERNEL); - if (!clk_data) + if (!clk_data) { + of_node_put(parent_np); return; + } clk_data->num = MAX_CLKS; hws = clk_data->hws; gbase = of_iomap(parent_np, 0); if (!gbase) { + of_node_put(parent_np); pr_err("%pOF: Unable to map global base\n", np); return; } /* BG2Q CPU PLL is not part of global registers */ cpupll_base = of_iomap(parent_np, 1); + of_node_put(parent_np); if (!cpupll_base) { pr_err("%pOF: Unable to map cpupll base\n", np); iounmap(gbase); From 59e90c4d9861048d8efdbd0660721a09ed487791 Mon Sep 17 00:00:00 2001 From: Liang He Date: Mon, 4 Jul 2022 08:47:29 +0800 Subject: [PATCH 0199/1477] clk: sprd: Hold reference returned by of_get_parent() [ Upstream commit 91e6455bf715fb1558a0bf8f645ec1c131254a3c ] We should hold the reference returned by of_get_parent() and use it to call of_node_put() for refcount balance. Fixes: f95e8c7923d1 ("clk: sprd: support to get regmap from parent node") Signed-off-by: Liang He Link: https://lore.kernel.org/r/20220704004729.272481-1-windhl@126.com Reviewed-by: Orson Zhai Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/sprd/common.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/clk/sprd/common.c b/drivers/clk/sprd/common.c index d620bbbcdfc8..ce81e4087a8f 100644 --- a/drivers/clk/sprd/common.c +++ b/drivers/clk/sprd/common.c @@ -41,7 +41,7 @@ int sprd_clk_regmap_init(struct platform_device *pdev, { void __iomem *base; struct device *dev = &pdev->dev; - struct device_node *node = dev->of_node; + struct device_node *node = dev->of_node, *np; struct regmap *regmap; if (of_find_property(node, "sprd,syscon", NULL)) { @@ -50,9 +50,10 @@ int sprd_clk_regmap_init(struct platform_device *pdev, pr_err("%s: failed to get syscon regmap\n", __func__); return PTR_ERR(regmap); } - } else if (of_device_is_compatible(of_get_parent(dev->of_node), - "syscon")) { - regmap = device_node_to_regmap(of_get_parent(dev->of_node)); + } else if (of_device_is_compatible(np = of_get_parent(node), "syscon") || + (of_node_put(np), 0)) { + regmap = device_node_to_regmap(np); + of_node_put(np); if (IS_ERR(regmap)) { dev_err(dev, "failed to get regmap from its parent.\n"); return PTR_ERR(regmap); From f487137a53b1a0692211f7ae82c0a7f87c30bdbe Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Mon, 23 May 2022 18:26:08 +0400 Subject: [PATCH 0200/1477] clk: tegra: Fix refcount leak in tegra210_clock_init [ Upstream commit 56c78cb1f00a9dde8cd762131ce8f4c5eb046fbb ] of_find_matching_node() returns a node pointer with refcount incremented, we should use of_node_put() on it when not need anymore. Add missing of_node_put() to avoid refcount leak. Fixes: 6b301a059eb2 ("clk: tegra: Add support for Tegra210 clocks") Signed-off-by: Miaoqian Lin Link: https://lore.kernel.org/r/20220523142608.65074-1-linmq006@gmail.com Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/tegra/clk-tegra210.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/tegra/clk-tegra210.c b/drivers/clk/tegra/clk-tegra210.c index 68cbb98af567..1a0016d07f88 100644 --- a/drivers/clk/tegra/clk-tegra210.c +++ b/drivers/clk/tegra/clk-tegra210.c @@ -3697,6 +3697,7 @@ static void __init tegra210_clock_init(struct device_node *np) } pmc_base = of_iomap(node, 0); + of_node_put(node); if (!pmc_base) { pr_err("Can't map pmc registers\n"); WARN_ON(1); From c01bfd23cc13a420b3f6a36bcab98410f49d480d Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Mon, 23 May 2022 18:38:34 +0400 Subject: [PATCH 0201/1477] clk: tegra: Fix refcount leak in tegra114_clock_init [ Upstream commit db16a80c76ea395766913082b1e3f939dde29b2c ] of_find_matching_node() returns a node pointer with refcount incremented, we should use of_node_put() on it when not need anymore. Add missing of_node_put() to avoid refcount leak. Fixes: 2cb5efefd6f7 ("clk: tegra: Implement clocks for Tegra114") Signed-off-by: Miaoqian Lin Link: https://lore.kernel.org/r/20220523143834.7587-1-linmq006@gmail.com Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/tegra/clk-tegra114.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/tegra/clk-tegra114.c b/drivers/clk/tegra/clk-tegra114.c index bc9e47a4cb60..4e2b26e3e573 100644 --- a/drivers/clk/tegra/clk-tegra114.c +++ b/drivers/clk/tegra/clk-tegra114.c @@ -1317,6 +1317,7 @@ static void __init tegra114_clock_init(struct device_node *np) } pmc_base = of_iomap(node, 0); + of_node_put(node); if (!pmc_base) { pr_err("Can't map pmc registers\n"); WARN_ON(1); From 70f0a0a27d79f689defc5f5f0bd47d07813e6dea Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Mon, 23 May 2022 19:28:11 +0400 Subject: [PATCH 0202/1477] clk: tegra20: Fix refcount leak in tegra20_clock_init [ Upstream commit 4e343bafe03ff68a62f48f8235cf98f2c685468b ] of_find_matching_node() returns a node pointer with refcount incremented, we should use of_node_put() on it when not need anymore. Add missing of_node_put() to avoid refcount leak. Fixes: 37c26a906527 ("clk: tegra: add clock support for Tegra20") Signed-off-by: Miaoqian Lin Link: https://lore.kernel.org/r/20220523152811.19692-1-linmq006@gmail.com Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/tegra/clk-tegra20.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/tegra/clk-tegra20.c b/drivers/clk/tegra/clk-tegra20.c index 3efc651b42e3..d60ee6e318a5 100644 --- a/drivers/clk/tegra/clk-tegra20.c +++ b/drivers/clk/tegra/clk-tegra20.c @@ -1128,6 +1128,7 @@ static void __init tegra20_clock_init(struct device_node *np) } pmc_base = of_iomap(node, 0); + of_node_put(node); if (!pmc_base) { pr_err("Can't map pmc registers\n"); BUG(); From 962f22e7f7698f7718d95bd9b63e41fb8cca01a9 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Mon, 4 Apr 2022 08:52:32 +0000 Subject: [PATCH 0203/1477] HSI: omap_ssi: Fix refcount leak in ssi_probe [ Upstream commit 9a2ea132df860177b33c9fd421b26c4e9a0a9396 ] When returning or breaking early from a for_each_available_child_of_node() loop, we need to explicitly call of_node_put() on the child node to possibly release the node. Fixes: b209e047bc74 ("HSI: Introduce OMAP SSI driver") Signed-off-by: Miaoqian Lin Signed-off-by: Sebastian Reichel Signed-off-by: Sasha Levin --- drivers/hsi/controllers/omap_ssi_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hsi/controllers/omap_ssi_core.c b/drivers/hsi/controllers/omap_ssi_core.c index 44a3f5660c10..eb9820158318 100644 --- a/drivers/hsi/controllers/omap_ssi_core.c +++ b/drivers/hsi/controllers/omap_ssi_core.c @@ -524,6 +524,7 @@ static int ssi_probe(struct platform_device *pd) if (!childpdev) { err = -ENODEV; dev_err(&pd->dev, "failed to create ssi controller port\n"); + of_node_put(child); goto out3; } } From 1f683bff1a9c7b833b66caa6f1ea07c0d37ac342 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Fri, 26 Aug 2022 12:12:27 +0200 Subject: [PATCH 0204/1477] HSI: omap_ssi_port: Fix dma_map_sg error check [ Upstream commit 551e325bbd3fb8b5a686ac1e6cf76e5641461cf2 ] dma_map_sg return 0 on error, in case of error return -EIO to caller. Cc: Sebastian Reichel Cc: linux-kernel@vger.kernel.org (open list) Fixes: b209e047bc74 ("HSI: Introduce OMAP SSI driver") Signed-off-by: Jack Wang Signed-off-by: Sebastian Reichel Signed-off-by: Sasha Levin --- drivers/hsi/controllers/omap_ssi_port.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/hsi/controllers/omap_ssi_port.c b/drivers/hsi/controllers/omap_ssi_port.c index a0cb5be246e1..b9495b720f1b 100644 --- a/drivers/hsi/controllers/omap_ssi_port.c +++ b/drivers/hsi/controllers/omap_ssi_port.c @@ -230,10 +230,10 @@ static int ssi_start_dma(struct hsi_msg *msg, int lch) if (msg->ttype == HSI_MSG_READ) { err = dma_map_sg(&ssi->device, msg->sgt.sgl, msg->sgt.nents, DMA_FROM_DEVICE); - if (err < 0) { + if (!err) { dev_dbg(&ssi->device, "DMA map SG failed !\n"); pm_runtime_put_autosuspend(omap_port->pdev); - return err; + return -EIO; } csdp = SSI_DST_BURST_4x32_BIT | SSI_DST_MEMORY_PORT | SSI_SRC_SINGLE_ACCESS0 | SSI_SRC_PERIPHERAL_PORT | @@ -247,10 +247,10 @@ static int ssi_start_dma(struct hsi_msg *msg, int lch) } else { err = dma_map_sg(&ssi->device, msg->sgt.sgl, msg->sgt.nents, DMA_TO_DEVICE); - if (err < 0) { + if (!err) { dev_dbg(&ssi->device, "DMA map SG failed !\n"); pm_runtime_put_autosuspend(omap_port->pdev); - return err; + return -EIO; } csdp = SSI_SRC_BURST_4x32_BIT | SSI_SRC_MEMORY_PORT | SSI_DST_SINGLE_ACCESS0 | SSI_DST_PERIPHERAL_PORT | From 28cdf6c6fb7a2501eb2db511e75ae83a03e11aca Mon Sep 17 00:00:00 2001 From: Liang He Date: Wed, 20 Jul 2022 16:30:03 +0200 Subject: [PATCH 0205/1477] media: exynos4-is: fimc-is: Add of_node_put() when breaking out of loop [ Upstream commit 211f8304fa21aaedc2c247f0c9d6c7f1aaa61ad7 ] In fimc_is_register_subdevs(), we need to call of_node_put() for the reference 'i2c_bus' when breaking out of the for_each_compatible_node() which has increased the refcount. Fixes: 9a761e436843 ("[media] exynos4-is: Add Exynos4x12 FIMC-IS driver") Signed-off-by: Liang He Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/platform/exynos4-is/fimc-is.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/media/platform/exynos4-is/fimc-is.c b/drivers/media/platform/exynos4-is/fimc-is.c index dc2a144cd29b..b52d2203eac5 100644 --- a/drivers/media/platform/exynos4-is/fimc-is.c +++ b/drivers/media/platform/exynos4-is/fimc-is.c @@ -213,6 +213,7 @@ static int fimc_is_register_subdevs(struct fimc_is *is) if (ret < 0 || index >= FIMC_IS_SENSORS_NUM) { of_node_put(child); + of_node_put(i2c_bus); return ret; } index++; From 551b87976a0c3a164a75b59a34650a0b4e564b57 Mon Sep 17 00:00:00 2001 From: Shubhrajyoti Datta Date: Fri, 29 Jul 2022 17:17:45 +0530 Subject: [PATCH 0206/1477] tty: xilinx_uartps: Fix the ignore_status [ Upstream commit b8a6c3b3d4654fba19881cc77da61eac29f57cae ] Currently the ignore_status is not considered in the isr. Add a check to add the ignore_status. Fixes: 61ec9016988f ("tty/serial: add support for Xilinx PS UART") Signed-off-by: Shubhrajyoti Datta Link: https://lore.kernel.org/r/20220729114748.18332-5-shubhrajyoti.datta@xilinx.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/xilinx_uartps.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/tty/serial/xilinx_uartps.c b/drivers/tty/serial/xilinx_uartps.c index b5a8afbc452b..f7dfa123907a 100644 --- a/drivers/tty/serial/xilinx_uartps.c +++ b/drivers/tty/serial/xilinx_uartps.c @@ -375,6 +375,8 @@ static irqreturn_t cdns_uart_isr(int irq, void *dev_id) isrstatus &= ~CDNS_UART_IXR_TXEMPTY; } + isrstatus &= port->read_status_mask; + isrstatus &= ~port->ignore_status_mask; /* * Skip RX processing if RX is disabled as RXEMPTY will never be set * as read bytes will not be removed from the FIFO. From 40aa0999a3e43b14e925db835aeb7535dbdad671 Mon Sep 17 00:00:00 2001 From: Xu Qiang Date: Thu, 18 Aug 2022 08:57:53 +0200 Subject: [PATCH 0207/1477] media: meson: vdec: add missing clk_disable_unprepare on error in vdec_hevc_start() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 4029372233e13e281f8c387f279f9f064ced3810 ] Add the missing clk_disable_unprepare() before return from vdec_hevc_start() in the error handling case. Fixes: 823a7300340e (“media: meson: vdec: add common HEVC decoder support”) Signed-off-by: Xu Qiang Reviewed-by: Neil Armstrong Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/staging/media/meson/vdec/vdec_hevc.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/staging/media/meson/vdec/vdec_hevc.c b/drivers/staging/media/meson/vdec/vdec_hevc.c index 9530e580e57a..afced435c907 100644 --- a/drivers/staging/media/meson/vdec/vdec_hevc.c +++ b/drivers/staging/media/meson/vdec/vdec_hevc.c @@ -167,8 +167,12 @@ static int vdec_hevc_start(struct amvdec_session *sess) clk_set_rate(core->vdec_hevc_clk, 666666666); ret = clk_prepare_enable(core->vdec_hevc_clk); - if (ret) + if (ret) { + if (core->platform->revision == VDEC_REVISION_G12A || + core->platform->revision == VDEC_REVISION_SM1) + clk_disable_unprepare(core->vdec_hevcf_clk); return ret; + } if (core->platform->revision == VDEC_REVISION_SM1) regmap_update_bits(core->regmap_ao, AO_RTI_GEN_PWR_SLEEP0, From 2630cc88327a5557aa0d9cc63be95e3c6e0a55b3 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Wed, 1 Jun 2022 06:25:14 +0200 Subject: [PATCH 0208/1477] media: xilinx: vipp: Fix refcount leak in xvip_graph_dma_init [ Upstream commit 1c78f19c3a0ea312a8178a6bfd8934eb93e9b10a ] of_get_child_by_name() returns a node pointer with refcount incremented, we should use of_node_put() on it when not need anymore. Add missing of_node_put() to avoid refcount leak. Fixes: df3305156f98 ("[media] v4l: xilinx: Add Xilinx Video IP core") Signed-off-by: Miaoqian Lin Signed-off-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/platform/xilinx/xilinx-vipp.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/media/platform/xilinx/xilinx-vipp.c b/drivers/media/platform/xilinx/xilinx-vipp.c index cc2856efea59..f2b0c490187c 100644 --- a/drivers/media/platform/xilinx/xilinx-vipp.c +++ b/drivers/media/platform/xilinx/xilinx-vipp.c @@ -472,7 +472,7 @@ static int xvip_graph_dma_init(struct xvip_composite_device *xdev) { struct device_node *ports; struct device_node *port; - int ret; + int ret = 0; ports = of_get_child_by_name(xdev->dev->of_node, "ports"); if (ports == NULL) { @@ -482,13 +482,14 @@ static int xvip_graph_dma_init(struct xvip_composite_device *xdev) for_each_child_of_node(ports, port) { ret = xvip_graph_dma_init_one(xdev, port); - if (ret < 0) { + if (ret) { of_node_put(port); - return ret; + break; } } - return 0; + of_node_put(ports); + return ret; } static void xvip_graph_cleanup(struct xvip_composite_device *xdev) From cdce36a88def550773142a34ef727a830cad96a8 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Sun, 21 Aug 2022 21:16:13 -0400 Subject: [PATCH 0209/1477] RDMA/rxe: Fix "kernel NULL pointer dereference" error [ Upstream commit a625ca30eff806395175ebad3ac1399014bdb280 ] When rxe_queue_init in the function rxe_qp_init_req fails, both qp->req.task.func and qp->req.task.arg are not initialized. Because of creation of qp fails, the function rxe_create_qp will call rxe_qp_do_cleanup to handle allocated resource. Before calling __rxe_do_task, both qp->req.task.func and qp->req.task.arg should be checked. Fixes: 8700e3e7c485 ("Soft RoCE driver") Link: https://lore.kernel.org/r/20220822011615.805603-2-yanjun.zhu@linux.dev Reported-by: syzbot+ab99dc4c6e961eed8b8e@syzkaller.appspotmail.com Signed-off-by: Zhu Yanjun Reviewed-by: Li Zhijian Reviewed-by: Bob Pearson Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/sw/rxe/rxe_qp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 2847ab4d9a5f..6acef6e923de 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -775,7 +775,9 @@ void rxe_qp_destroy(struct rxe_qp *qp) rxe_cleanup_task(&qp->comp.task); /* flush out any receive wr's or pending requests */ - __rxe_do_task(&qp->req.task); + if (qp->req.task.func) + __rxe_do_task(&qp->req.task); + if (qp->sq.queue) { __rxe_do_task(&qp->comp.task); __rxe_do_task(&qp->req.task); From cf3bb86edd8fdcbd8a4743f212ef9b8fd6644350 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Sun, 21 Aug 2022 21:16:14 -0400 Subject: [PATCH 0210/1477] RDMA/rxe: Fix the error caused by qp->sk [ Upstream commit 548ce2e66725dcba4e27d1e8ac468d5dd17fd509 ] When sock_create_kern in the function rxe_qp_init_req fails, qp->sk is set to NULL. Then the function rxe_create_qp will call rxe_qp_do_cleanup to handle allocated resource. Before handling qp->sk, this variable should be checked. Fixes: 8700e3e7c485 ("Soft RoCE driver") Link: https://lore.kernel.org/r/20220822011615.805603-3-yanjun.zhu@linux.dev Signed-off-by: Zhu Yanjun Reviewed-by: Li Zhijian Reviewed-by: Bob Pearson Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/sw/rxe/rxe_qp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 6acef6e923de..2e4b008f0387 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -817,8 +817,10 @@ static void rxe_qp_do_cleanup(struct work_struct *work) free_rd_atomic_resources(qp); - kernel_sock_shutdown(qp->sk, SHUT_RDWR); - sock_release(qp->sk); + if (qp->sk) { + kernel_sock_shutdown(qp->sk, SHUT_RDWR); + sock_release(qp->sk); + } } /* called when the last reference to the qp is dropped */ From 7ba19a60c74fb0057d4daef2fa2cbfc9522f3ba1 Mon Sep 17 00:00:00 2001 From: Hangyu Hua Date: Wed, 24 Aug 2022 16:26:00 +0800 Subject: [PATCH 0211/1477] misc: ocxl: fix possible refcount leak in afu_ioctl() [ Upstream commit c3b69ba5114c860d730870c03ab4ee45276e5e35 ] eventfd_ctx_put need to be called to put the refcount that gotten by eventfd_ctx_fdget when ocxl_irq_set_handler fails. Fixes: 060146614643 ("ocxl: move event_fd handling to frontend") Acked-by: Frederic Barrat Signed-off-by: Hangyu Hua Link: https://lore.kernel.org/r/20220824082600.36159-1-hbh25y@gmail.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/misc/ocxl/file.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/misc/ocxl/file.c b/drivers/misc/ocxl/file.c index c742ab02ae18..e094809b54ff 100644 --- a/drivers/misc/ocxl/file.c +++ b/drivers/misc/ocxl/file.c @@ -259,6 +259,8 @@ static long afu_ioctl(struct file *file, unsigned int cmd, if (IS_ERR(ev_ctx)) return PTR_ERR(ev_ctx); rc = ocxl_irq_set_handler(ctx, irq_id, irq_handler, irq_free, ev_ctx); + if (rc) + eventfd_ctx_put(ev_ctx); break; case OCXL_IOCTL_GET_METADATA: From f59861946fa51bcc1f305809e4ebc1013b0ee61c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 1 Sep 2022 08:18:45 +0300 Subject: [PATCH 0212/1477] fpga: prevent integer overflow in dfl_feature_ioctl_set_irq() [ Upstream commit 939bc5453b8cbdde9f1e5110ce8309aedb1b501a ] The "hdr.count * sizeof(s32)" multiplication can overflow on 32 bit systems leading to memory corruption. Use array_size() to fix that. Fixes: 322b598be4d9 ("fpga: dfl: introduce interrupt trigger setting API") Signed-off-by: Dan Carpenter Acked-by: Xu Yilun Link: https://lore.kernel.org/r/YxBAtYCM38dM7yzI@kili Signed-off-by: Xu Yilun Signed-off-by: Sasha Levin --- drivers/fpga/dfl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/fpga/dfl.c b/drivers/fpga/dfl.c index b450870b75ed..eb8a6e329af9 100644 --- a/drivers/fpga/dfl.c +++ b/drivers/fpga/dfl.c @@ -1857,7 +1857,7 @@ long dfl_feature_ioctl_set_irq(struct platform_device *pdev, return -EINVAL; fds = memdup_user((void __user *)(arg + sizeof(hdr)), - hdr.count * sizeof(s32)); + array_size(hdr.count, sizeof(s32))); if (IS_ERR(fds)) return PTR_ERR(fds); From d5065ca461a4a0abd8682b6934cbe228a7f2d8c1 Mon Sep 17 00:00:00 2001 From: Jie Hai Date: Tue, 30 Aug 2022 14:22:45 +0800 Subject: [PATCH 0213/1477] dmaengine: hisilicon: Disable channels when unregister hisi_dma [ Upstream commit e3bdaa04ada31f46d0586df83a2789b8913053c5 ] When hisi_dma is unloaded or unbinded, all of channels should be disabled. This patch disables DMA channels when driver is unloaded or unbinded. Fixes: e9f08b65250d ("dmaengine: hisilicon: Add Kunpeng DMA engine support") Signed-off-by: Jie Hai Acked-by: Zhou Wang Link: https://lore.kernel.org/r/20220830062251.52993-2-haijie1@huawei.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/hisi_dma.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/dma/hisi_dma.c b/drivers/dma/hisi_dma.c index 3e83769615d1..7cedf91e86a9 100644 --- a/drivers/dma/hisi_dma.c +++ b/drivers/dma/hisi_dma.c @@ -185,7 +185,8 @@ static void hisi_dma_reset_qp_point(struct hisi_dma_dev *hdma_dev, u32 index) hisi_dma_chan_write(hdma_dev->base, HISI_DMA_CQ_HEAD_PTR, index, 0); } -static void hisi_dma_reset_hw_chan(struct hisi_dma_chan *chan) +static void hisi_dma_reset_or_disable_hw_chan(struct hisi_dma_chan *chan, + bool disable) { struct hisi_dma_dev *hdma_dev = chan->hdma_dev; u32 index = chan->qp_num, tmp; @@ -206,8 +207,11 @@ static void hisi_dma_reset_hw_chan(struct hisi_dma_chan *chan) hisi_dma_do_reset(hdma_dev, index); hisi_dma_reset_qp_point(hdma_dev, index); hisi_dma_pause_dma(hdma_dev, index, false); - hisi_dma_enable_dma(hdma_dev, index, true); - hisi_dma_unmask_irq(hdma_dev, index); + + if (!disable) { + hisi_dma_enable_dma(hdma_dev, index, true); + hisi_dma_unmask_irq(hdma_dev, index); + } ret = readl_relaxed_poll_timeout(hdma_dev->base + HISI_DMA_Q_FSM_STS + index * HISI_DMA_OFFSET, tmp, @@ -223,7 +227,7 @@ static void hisi_dma_free_chan_resources(struct dma_chan *c) struct hisi_dma_chan *chan = to_hisi_dma_chan(c); struct hisi_dma_dev *hdma_dev = chan->hdma_dev; - hisi_dma_reset_hw_chan(chan); + hisi_dma_reset_or_disable_hw_chan(chan, false); vchan_free_chan_resources(&chan->vc); memset(chan->sq, 0, sizeof(struct hisi_dma_sqe) * hdma_dev->chan_depth); @@ -399,7 +403,7 @@ static void hisi_dma_enable_qp(struct hisi_dma_dev *hdma_dev, u32 qp_index) static void hisi_dma_disable_qp(struct hisi_dma_dev *hdma_dev, u32 qp_index) { - hisi_dma_reset_hw_chan(&hdma_dev->chan[qp_index]); + hisi_dma_reset_or_disable_hw_chan(&hdma_dev->chan[qp_index], true); } static void hisi_dma_enable_qps(struct hisi_dma_dev *hdma_dev) From d3fd838536dff43adb4dda34fd15ea3a574125f9 Mon Sep 17 00:00:00 2001 From: Jie Hai Date: Tue, 30 Aug 2022 14:22:46 +0800 Subject: [PATCH 0214/1477] dmaengine: hisilicon: Fix CQ head update [ Upstream commit 94477a79cf80e8ab55b68f14bc579a12ddea1e0b ] After completion of data transfer of one or multiple descriptors, the completion status and the current head pointer to submission queue are written into the CQ and interrupt can be generated to inform the software. In interrupt process CQ is read and cq_head is updated. hisi_dma_irq updates cq_head only when the completion status is success. When an abnormal interrupt reports, cq_head will not update which will cause subsequent interrupt processes read the error CQ and never report the correct status. This patch updates cq_head whenever CQ is accessed. Fixes: e9f08b65250d ("dmaengine: hisilicon: Add Kunpeng DMA engine support") Signed-off-by: Jie Hai Acked-by: Zhou Wang Link: https://lore.kernel.org/r/20220830062251.52993-3-haijie1@huawei.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/hisi_dma.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/dma/hisi_dma.c b/drivers/dma/hisi_dma.c index 7cedf91e86a9..08ec90dd4c46 100644 --- a/drivers/dma/hisi_dma.c +++ b/drivers/dma/hisi_dma.c @@ -442,12 +442,10 @@ static irqreturn_t hisi_dma_irq(int irq, void *data) desc = chan->desc; cqe = chan->cq + chan->cq_head; if (desc) { + chan->cq_head = (chan->cq_head + 1) % hdma_dev->chan_depth; + hisi_dma_chan_write(hdma_dev->base, HISI_DMA_CQ_HEAD_PTR, + chan->qp_num, chan->cq_head); if (FIELD_GET(STATUS_MASK, cqe->w0) == STATUS_SUCC) { - chan->cq_head = (chan->cq_head + 1) % - hdma_dev->chan_depth; - hisi_dma_chan_write(hdma_dev->base, - HISI_DMA_CQ_HEAD_PTR, chan->qp_num, - chan->cq_head); vchan_cookie_complete(&desc->vd); } else { dev_err(&hdma_dev->pdev->dev, "task error!\n"); From af12e209a9d559394d35875ba0e6c80407605888 Mon Sep 17 00:00:00 2001 From: Jie Hai Date: Tue, 30 Aug 2022 14:22:47 +0800 Subject: [PATCH 0215/1477] dmaengine: hisilicon: Add multi-thread support for a DMA channel [ Upstream commit 2cbb95883c990d0002a77e13d3278913ab26ad79 ] When we get a DMA channel and try to use it in multiple threads it will cause oops and hanging the system. % echo 100 > /sys/module/dmatest/parameters/threads_per_chan % echo 100 > /sys/module/dmatest/parameters/iterations % echo 1 > /sys/module/dmatest/parameters/run [383493.327077] Unable to handle kernel paging request at virtual address dead000000000108 [383493.335103] Mem abort info: [383493.335103] ESR = 0x96000044 [383493.335105] EC = 0x25: DABT (current EL), IL = 32 bits [383493.335107] SET = 0, FnV = 0 [383493.335108] EA = 0, S1PTW = 0 [383493.335109] FSC = 0x04: level 0 translation fault [383493.335110] Data abort info: [383493.335111] ISV = 0, ISS = 0x00000044 [383493.364739] CM = 0, WnR = 1 [383493.367793] [dead000000000108] address between user and kernel address ranges [383493.375021] Internal error: Oops: 96000044 [#1] PREEMPT SMP [383493.437574] CPU: 63 PID: 27895 Comm: dma0chan0-copy2 Kdump: loaded Tainted: GO 5.17.0-rc4+ #2 [383493.457851] pstate: 204000c9 (nzCv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [383493.465331] pc : vchan_tx_submit+0x64/0xa0 [383493.469957] lr : vchan_tx_submit+0x34/0xa0 This occurs because the transmission timed out, and that's due to data race. Each thread rewrite channels's descriptor as soon as device_issue_pending is called. It leads to the situation that the driver thinks that it uses the right descriptor in interrupt handler while channels's descriptor has been changed by other thread. The descriptor which in fact reported interrupt will not be handled any more, as well as its tx->callback. That's why timeout reports. With current fixes channels' descriptor changes it's value only when it has been used. A new descriptor is acquired from vc->desc_issued queue that is already filled with descriptors that are ready to be sent. Threads have no direct access to DMA channel descriptor. In case of channel's descriptor is busy, try to submit to HW again when a descriptor is completed. In this case, vc->desc_issued may be empty when hisi_dma_start_transfer is called, so delete error reporting on this. Now it is just possible to queue a descriptor for further processing. Fixes: e9f08b65250d ("dmaengine: hisilicon: Add Kunpeng DMA engine support") Signed-off-by: Jie Hai Acked-by: Zhou Wang Link: https://lore.kernel.org/r/20220830062251.52993-4-haijie1@huawei.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/hisi_dma.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/dma/hisi_dma.c b/drivers/dma/hisi_dma.c index 08ec90dd4c46..8f1651367310 100644 --- a/drivers/dma/hisi_dma.c +++ b/drivers/dma/hisi_dma.c @@ -276,7 +276,6 @@ static void hisi_dma_start_transfer(struct hisi_dma_chan *chan) vd = vchan_next_desc(&chan->vc); if (!vd) { - dev_err(&hdma_dev->pdev->dev, "no issued task!\n"); chan->desc = NULL; return; } @@ -308,7 +307,7 @@ static void hisi_dma_issue_pending(struct dma_chan *c) spin_lock_irqsave(&chan->vc.lock, flags); - if (vchan_issue_pending(&chan->vc)) + if (vchan_issue_pending(&chan->vc) && !chan->desc) hisi_dma_start_transfer(chan); spin_unlock_irqrestore(&chan->vc.lock, flags); @@ -447,11 +446,10 @@ static irqreturn_t hisi_dma_irq(int irq, void *data) chan->qp_num, chan->cq_head); if (FIELD_GET(STATUS_MASK, cqe->w0) == STATUS_SUCC) { vchan_cookie_complete(&desc->vd); + hisi_dma_start_transfer(chan); } else { dev_err(&hdma_dev->pdev->dev, "task error!\n"); } - - chan->desc = NULL; } spin_unlock_irqrestore(&chan->vc.lock, flags); From 5047bd3bd739e3d7cc825d342363c6b0d0b46549 Mon Sep 17 00:00:00 2001 From: Jim Cromie Date: Sun, 4 Sep 2022 15:40:38 -0600 Subject: [PATCH 0216/1477] dyndbg: fix static_branch manipulation [ Upstream commit ee879be38bc87f8cedc79ae2742958db6533ca59 ] In https://lore.kernel.org/lkml/20211209150910.GA23668@axis.com/ Vincent's patch commented on, and worked around, a bug toggling static_branch's, when a 2nd PRINTK-ish flag was added. The bug results in a premature static_branch_disable when the 1st of 2 flags was disabled. The cited commit computed newflags, but then in the JUMP_LABEL block, failed to use that result, instead using just one of the terms in it. Using newflags instead made the code work properly. This is Vincents test-case, reduced. It needs the 2nd flag to demonstrate the bug, but it's explanatory here. pt_test() { echo 5 > /sys/module/dynamic_debug/verbose site="module tcp" # just one callsite echo " $site =_ " > /proc/dynamic_debug/control # clear it # A B ~A ~B for flg in +T +p "-T #broke here" -p; do echo " $site $flg " > /proc/dynamic_debug/control done; # A B ~B ~A for flg in +T +p "-p #broke here" -T; do echo " $site $flg " > /proc/dynamic_debug/control done } pt_test Fixes: 84da83a6ffc0 dyndbg: combine flags & mask into a struct, simplify with it CC: vincent.whitchurch@axis.com Acked-by: Jason Baron Acked-by: Daniel Vetter Signed-off-by: Jim Cromie Link: https://lore.kernel.org/r/20220904214134.408619-2-jim.cromie@gmail.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- lib/dynamic_debug.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 921d0a654243..e67655d7b7cb 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -207,10 +207,11 @@ static int ddebug_change(const struct ddebug_query *query, continue; #ifdef CONFIG_JUMP_LABEL if (dp->flags & _DPRINTK_FLAGS_PRINT) { - if (!(modifiers->flags & _DPRINTK_FLAGS_PRINT)) + if (!(newflags & _DPRINTK_FLAGS_PRINT)) static_branch_disable(&dp->key.dd_key_true); - } else if (modifiers->flags & _DPRINTK_FLAGS_PRINT) + } else if (newflags & _DPRINTK_FLAGS_PRINT) { static_branch_enable(&dp->key.dd_key_true); + } #endif dp->flags = newflags; v2pr_info("changed %s:%d [%s]%s =%s\n", From c0e206da44e572f1ef0d82f1a42e71b166c0746a Mon Sep 17 00:00:00 2001 From: Jim Cromie Date: Sun, 4 Sep 2022 15:40:39 -0600 Subject: [PATCH 0217/1477] dyndbg: fix module.dyndbg handling [ Upstream commit 85d6b66d31c35158364058ee98fb69ab5bb6a6b1 ] For CONFIG_DYNAMIC_DEBUG=N, the ddebug_dyndbg_module_param_cb() stub-fn is too permissive: bash-5.1# modprobe drm JUNKdyndbg bash-5.1# modprobe drm dyndbgJUNK [ 42.933220] dyndbg param is supported only in CONFIG_DYNAMIC_DEBUG builds [ 42.937484] ACPI: bus type drm_connector registered This caused no ill effects, because unknown parameters are either ignored by default with an "unknown parameter" warning, or ignored because dyndbg allows its no-effect use on non-dyndbg builds. But since the code has an explicit feedback message, it should be issued accurately. Fix with strcmp for exact param-name match. Fixes: b48420c1d301 dynamic_debug: make dynamic-debug work for module initialization Reported-by: Rasmus Villemoes Acked-by: Jason Baron Acked-by: Daniel Vetter Signed-off-by: Jim Cromie Link: https://lore.kernel.org/r/20220904214134.408619-3-jim.cromie@gmail.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- include/linux/dynamic_debug.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h index a57ee75342cf..b0b23679b2c2 100644 --- a/include/linux/dynamic_debug.h +++ b/include/linux/dynamic_debug.h @@ -196,7 +196,7 @@ static inline int ddebug_remove_module(const char *mod) static inline int ddebug_dyndbg_module_param_cb(char *param, char *val, const char *modname) { - if (strstr(param, "dyndbg")) { + if (!strcmp(param, "dyndbg")) { /* avoid pr_warn(), which wants pr_fmt() fully defined */ printk(KERN_WARNING "dyndbg param is supported only in " "CONFIG_DYNAMIC_DEBUG builds\n"); From 1d6598558914b68415ba56bc85a30d02222c9587 Mon Sep 17 00:00:00 2001 From: Jim Cromie Date: Sun, 4 Sep 2022 15:40:44 -0600 Subject: [PATCH 0218/1477] dyndbg: let query-modname override actual module name [ Upstream commit e75ef56f74965f426dd819a41336b640ffdd8fbc ] dyndbg's control-parser: ddebug_parse_query(), requires that search terms: module, func, file, lineno, are used only once in a query; a thing cannot be named both foo and bar. The cited commit added an overriding module modname, taken from the module loader, which is authoritative. So it set query.module 1st, which disallowed its use in the query-string. But now, its useful to allow a module-load to enable classes across a whole (or part of) a subsystem at once. # enable (dynamic-debug in) drm only modprobe drm dyndbg="class DRM_UT_CORE +p" # get drm_helper too modprobe drm dyndbg="class DRM_UT_CORE module drm* +p" # get everything that knows DRM_UT_CORE modprobe drm dyndbg="class DRM_UT_CORE module * +p" # also for boot-args: drm.dyndbg="class DRM_UT_CORE module * +p" So convert the override into a default, by filling it only when/after the query-string omitted the module. NB: the query class FOO handling is forthcoming. Fixes: 8e59b5cfb9a6 dynamic_debug: add modname arg to exec_query callchain Acked-by: Jason Baron Acked-by: Daniel Vetter Signed-off-by: Jim Cromie Link: https://lore.kernel.org/r/20220904214134.408619-8-jim.cromie@gmail.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- lib/dynamic_debug.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index e67655d7b7cb..02a1a6496375 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -380,10 +380,6 @@ static int ddebug_parse_query(char *words[], int nwords, return -EINVAL; } - if (modname) - /* support $modname.dyndbg= */ - query->module = modname; - for (i = 0; i < nwords; i += 2) { char *keyword = words[i]; char *arg = words[i+1]; @@ -424,6 +420,13 @@ static int ddebug_parse_query(char *words[], int nwords, if (rc) return rc; } + if (!query->module && modname) + /* + * support $modname.dyndbg=, when + * not given in the query itself + */ + query->module = modname; + vpr_info_dq(query, "parsed"); return 0; } From d06cc0e11d5bac31ac60f552bc93a9a6300bcbe2 Mon Sep 17 00:00:00 2001 From: Jim Cromie Date: Sun, 4 Sep 2022 15:40:46 -0600 Subject: [PATCH 0219/1477] dyndbg: drop EXPORTed dynamic_debug_exec_queries [ Upstream commit e26ef3af964acfea311403126acee8c56c89e26b ] This exported fn is unused, and will not be needed. Lets dump it. The export was added to let drm control pr_debugs, as part of using them to avoid drm_debug_enabled overheads. But its better to just implement the drm.debug bitmap interface, then its available for everyone. Fixes: a2d375eda771 ("dyndbg: refine export, rename to dynamic_debug_exec_queries()") Fixes: 4c0d77828d4f ("dyndbg: export ddebug_exec_queries") Acked-by: Jason Baron Acked-by: Daniel Vetter Signed-off-by: Jim Cromie Link: https://lore.kernel.org/r/20220904214134.408619-10-jim.cromie@gmail.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- include/linux/dynamic_debug.h | 9 --------- lib/dynamic_debug.c | 29 ----------------------------- 2 files changed, 38 deletions(-) diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h index b0b23679b2c2..c0c6ea9ea7e3 100644 --- a/include/linux/dynamic_debug.h +++ b/include/linux/dynamic_debug.h @@ -50,9 +50,6 @@ struct _ddebug { #if defined(CONFIG_DYNAMIC_DEBUG_CORE) -/* exported for module authors to exercise >control */ -int dynamic_debug_exec_queries(const char *query, const char *modname); - int ddebug_add_module(struct _ddebug *tab, unsigned int n, const char *modname); extern int ddebug_remove_module(const char *mod_name); @@ -216,12 +213,6 @@ static inline int ddebug_dyndbg_module_param_cb(char *param, char *val, rowsize, groupsize, buf, len, ascii); \ } while (0) -static inline int dynamic_debug_exec_queries(const char *query, const char *modname) -{ - pr_warn("kernel not built with CONFIG_DYNAMIC_DEBUG_CORE\n"); - return 0; -} - #endif /* !CONFIG_DYNAMIC_DEBUG_CORE */ #endif diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 02a1a6496375..10a50c03074e 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -552,35 +552,6 @@ static int ddebug_exec_queries(char *query, const char *modname) return nfound; } -/** - * dynamic_debug_exec_queries - select and change dynamic-debug prints - * @query: query-string described in admin-guide/dynamic-debug-howto - * @modname: string containing module name, usually &module.mod_name - * - * This uses the >/proc/dynamic_debug/control reader, allowing module - * authors to modify their dynamic-debug callsites. The modname is - * canonically struct module.mod_name, but can also be null or a - * module-wildcard, for example: "drm*". - */ -int dynamic_debug_exec_queries(const char *query, const char *modname) -{ - int rc; - char *qry; /* writable copy of query */ - - if (!query) { - pr_err("non-null query/command string expected\n"); - return -EINVAL; - } - qry = kstrndup(query, PAGE_SIZE, GFP_KERNEL); - if (!qry) - return -ENOMEM; - - rc = ddebug_exec_queries(qry, modname); - kfree(qry); - return rc; -} -EXPORT_SYMBOL_GPL(dynamic_debug_exec_queries); - #define PREFIX_SIZE 64 static int remaining(int wrote) From f87f720811323a83f701679632b479b1e92fdccd Mon Sep 17 00:00:00 2001 From: William Dean Date: Fri, 22 Jul 2022 17:16:44 +0800 Subject: [PATCH 0220/1477] mtd: devices: docg3: check the return value of devm_ioremap() in the probe [ Upstream commit 26e784433e6c65735cd6d93a8db52531970d9a60 ] The function devm_ioremap() in docg3_probe() can fail, so its return value should be checked. Fixes: 82402aeb8c81e ("mtd: docg3: Use devm_*() functions") Reported-by: Hacash Robot Signed-off-by: William Dean Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20220722091644.2937953-1-williamsukatube@163.com Signed-off-by: Sasha Levin --- drivers/mtd/devices/docg3.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/devices/docg3.c b/drivers/mtd/devices/docg3.c index a030792115bc..fa42473d04c1 100644 --- a/drivers/mtd/devices/docg3.c +++ b/drivers/mtd/devices/docg3.c @@ -1975,9 +1975,14 @@ static int __init docg3_probe(struct platform_device *pdev) dev_err(dev, "No I/O memory resource defined\n"); return ret; } - base = devm_ioremap(dev, ress->start, DOC_IOSPACE_SIZE); ret = -ENOMEM; + base = devm_ioremap(dev, ress->start, DOC_IOSPACE_SIZE); + if (!base) { + dev_err(dev, "devm_ioremap dev failed\n"); + return ret; + } + cascade = devm_kcalloc(dev, DOC_MAX_NBFLOORS, sizeof(*cascade), GFP_KERNEL); if (!cascade) From 3a9d7d8dcf9803783fbc1066bb6d022e75af232b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Thu, 7 Jul 2022 20:43:28 +0200 Subject: [PATCH 0221/1477] mtd: rawnand: fsl_elbc: Fix none ECC mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 049e43b9fd8fd2966940485da163d67e96ee3fea ] Commit f6424c22aa36 ("mtd: rawnand: fsl_elbc: Make SW ECC work") added support for specifying ECC mode via DTS and skipping autodetection. But it broke explicit specification of HW ECC mode in DTS as correct settings for HW ECC mode are applied only when NONE mode or nothing was specified in DTS file. Also it started aliasing NONE mode to be same as when ECC mode was not specified and disallowed usage of ON_DIE mode. Fix all these issues. Use autodetection of ECC mode only in case when mode was really not specified in DTS file by checking that ecc value is invalid. Set HW ECC settings either when HW ECC was specified in DTS or it was autodetected. And do not fail when ON_DIE mode is set. Fixes: f6424c22aa36 ("mtd: rawnand: fsl_elbc: Make SW ECC work") Signed-off-by: Pali Rohár Reviewed-by: Marek Behún Reviewed-by: Marek Behún Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20220707184328.3845-1-pali@kernel.org Signed-off-by: Sasha Levin --- drivers/mtd/nand/raw/fsl_elbc_nand.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/drivers/mtd/nand/raw/fsl_elbc_nand.c b/drivers/mtd/nand/raw/fsl_elbc_nand.c index b2af7f81fdf8..c174b6dc3c6b 100644 --- a/drivers/mtd/nand/raw/fsl_elbc_nand.c +++ b/drivers/mtd/nand/raw/fsl_elbc_nand.c @@ -727,36 +727,40 @@ static int fsl_elbc_attach_chip(struct nand_chip *chip) struct fsl_lbc_regs __iomem *lbc = ctrl->regs; unsigned int al; - switch (chip->ecc.engine_type) { /* * if ECC was not chosen in DT, decide whether to use HW or SW ECC from * CS Base Register */ - case NAND_ECC_ENGINE_TYPE_NONE: + if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_INVALID) { /* If CS Base Register selects full hardware ECC then use it */ if ((in_be32(&lbc->bank[priv->bank].br) & BR_DECC) == BR_DECC_CHK_GEN) { - chip->ecc.read_page = fsl_elbc_read_page; - chip->ecc.write_page = fsl_elbc_write_page; - chip->ecc.write_subpage = fsl_elbc_write_subpage; - chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST; - mtd_set_ooblayout(mtd, &fsl_elbc_ooblayout_ops); - chip->ecc.size = 512; - chip->ecc.bytes = 3; - chip->ecc.strength = 1; } else { /* otherwise fall back to default software ECC */ chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; chip->ecc.algo = NAND_ECC_ALGO_HAMMING; } + } + + switch (chip->ecc.engine_type) { + /* if HW ECC was chosen, setup ecc and oob layout */ + case NAND_ECC_ENGINE_TYPE_ON_HOST: + chip->ecc.read_page = fsl_elbc_read_page; + chip->ecc.write_page = fsl_elbc_write_page; + chip->ecc.write_subpage = fsl_elbc_write_subpage; + mtd_set_ooblayout(mtd, &fsl_elbc_ooblayout_ops); + chip->ecc.size = 512; + chip->ecc.bytes = 3; + chip->ecc.strength = 1; break; - /* if SW ECC was chosen in DT, we do not need to set anything here */ + /* if none or SW ECC was chosen, we do not need to set anything here */ + case NAND_ECC_ENGINE_TYPE_NONE: case NAND_ECC_ENGINE_TYPE_SOFT: + case NAND_ECC_ENGINE_TYPE_ON_DIE: break; - /* should we also implement *_ECC_ENGINE_CONTROLLER to do as above? */ default: return -EINVAL; } From e3917c85f41ef1df64e27dc0e46ab0d803c5e73e Mon Sep 17 00:00:00 2001 From: Bernard Metzler Date: Tue, 20 Sep 2022 10:12:02 +0200 Subject: [PATCH 0222/1477] RDMA/siw: Always consume all skbuf data in sk_data_ready() upcall. [ Upstream commit 754209850df8367c954ac1de7671c7430b1f342c ] For header and trailer/padding processing, siw did not consume new skb data until minimum amount present to fill current header or trailer structure, including potential payload padding. Not consuming any data during upcall may cause a receive stall, since tcp_read_sock() is not upcalling again if no new data arrive. A NFSoRDMA client got stuck at RDMA Write reception of unaligned payload, if the current skb did contain only the expected 3 padding bytes, but not the 4 bytes CRC trailer. Expecting 4 more bytes already arrived in another skb, and not consuming those 3 bytes in the current upcall left the Write incomplete, waiting for the CRC forever. Fixes: 8b6a361b8c48 ("rdma/siw: receive path") Reported-by: Olga Kornievskaia Tested-by: Olga Kornievskaia Signed-off-by: Bernard Metzler Link: https://lore.kernel.org/r/20220920081202.223629-1-bmt@zurich.ibm.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/sw/siw/siw_qp_rx.c | 29 +++++++++++++++------------ 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/sw/siw/siw_qp_rx.c b/drivers/infiniband/sw/siw/siw_qp_rx.c index 875ea6f1b04a..fd721cc19682 100644 --- a/drivers/infiniband/sw/siw/siw_qp_rx.c +++ b/drivers/infiniband/sw/siw/siw_qp_rx.c @@ -961,27 +961,28 @@ out: static int siw_get_trailer(struct siw_qp *qp, struct siw_rx_stream *srx) { struct sk_buff *skb = srx->skb; + int avail = min(srx->skb_new, srx->fpdu_part_rem); u8 *tbuf = (u8 *)&srx->trailer.crc - srx->pad; __wsum crc_in, crc_own = 0; siw_dbg_qp(qp, "expected %d, available %d, pad %u\n", srx->fpdu_part_rem, srx->skb_new, srx->pad); - if (srx->skb_new < srx->fpdu_part_rem) + skb_copy_bits(skb, srx->skb_offset, tbuf, avail); + + srx->skb_new -= avail; + srx->skb_offset += avail; + srx->skb_copied += avail; + srx->fpdu_part_rem -= avail; + + if (srx->fpdu_part_rem) return -EAGAIN; - skb_copy_bits(skb, srx->skb_offset, tbuf, srx->fpdu_part_rem); - - if (srx->mpa_crc_hd && srx->pad) - crypto_shash_update(srx->mpa_crc_hd, tbuf, srx->pad); - - srx->skb_new -= srx->fpdu_part_rem; - srx->skb_offset += srx->fpdu_part_rem; - srx->skb_copied += srx->fpdu_part_rem; - if (!srx->mpa_crc_hd) return 0; + if (srx->pad) + crypto_shash_update(srx->mpa_crc_hd, tbuf, srx->pad); /* * CRC32 is computed, transmitted and received directly in NBO, * so there's never a reason to convert byte order. @@ -1083,10 +1084,9 @@ static int siw_get_hdr(struct siw_rx_stream *srx) * completely received. */ if (iwarp_pktinfo[opcode].hdr_len > sizeof(struct iwarp_ctrl_tagged)) { - bytes = iwarp_pktinfo[opcode].hdr_len - MIN_DDP_HDR; + int hdrlen = iwarp_pktinfo[opcode].hdr_len; - if (srx->skb_new < bytes) - return -EAGAIN; + bytes = min_t(int, hdrlen - MIN_DDP_HDR, srx->skb_new); skb_copy_bits(skb, srx->skb_offset, (char *)c_hdr + srx->fpdu_part_rcvd, bytes); @@ -1096,6 +1096,9 @@ static int siw_get_hdr(struct siw_rx_stream *srx) srx->skb_new -= bytes; srx->skb_offset += bytes; srx->skb_copied += bytes; + + if (srx->fpdu_part_rcvd < hdrlen) + return -EAGAIN; } /* From fc61a0c8200ab7c058f69df94d3fb32edbde1dce Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Fri, 16 Sep 2022 14:28:32 +0200 Subject: [PATCH 0223/1477] ata: fix ata_id_sense_reporting_enabled() and ata_id_has_sense_reporting() [ Upstream commit 690aa8c3ae308bc696ec8b1b357b995193927083 ] ACS-5 section 7.13.6.41 Words 85..87, 120: Commands and feature sets supported or enabled states that: If bit 15 of word 86 is set to one, bit 14 of word 119 is set to one, and bit 15 of word 119 is cleared to zero, then word 119 is valid. If bit 15 of word 86 is set to one, bit 14 of word 120 is set to one, and bit 15 of word 120 is cleared to zero, then word 120 is valid. (This text also exists in really old ACS standards, e.g. ACS-3.) Currently, ata_id_sense_reporting_enabled() and ata_id_has_sense_reporting() both check bit 15 of word 86, but neither of them check that bit 14 of word 119 is set to one, or that bit 15 of word 119 is cleared to zero. Additionally, make ata_id_sense_reporting_enabled() return false if !ata_id_has_sense_reporting(), similar to how e.g. ata_id_flush_ext_enabled() returns false if !ata_id_has_flush_ext(). Fixes: e87fd28cf9a2 ("libata: Implement support for sense data reporting") Signed-off-by: Niklas Cassel Signed-off-by: Damien Le Moal Signed-off-by: Sasha Levin --- include/linux/ata.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/include/linux/ata.h b/include/linux/ata.h index 6e67aded28f8..734cc646ce35 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -770,16 +770,21 @@ static inline bool ata_id_has_read_log_dma_ext(const u16 *id) static inline bool ata_id_has_sense_reporting(const u16 *id) { - if (!(id[ATA_ID_CFS_ENABLE_2] & (1 << 15))) + if (!(id[ATA_ID_CFS_ENABLE_2] & BIT(15))) return false; - return id[ATA_ID_COMMAND_SET_3] & (1 << 6); + if ((id[ATA_ID_COMMAND_SET_3] & (BIT(15) | BIT(14))) != BIT(14)) + return false; + return id[ATA_ID_COMMAND_SET_3] & BIT(6); } static inline bool ata_id_sense_reporting_enabled(const u16 *id) { - if (!(id[ATA_ID_CFS_ENABLE_2] & (1 << 15))) + if (!ata_id_has_sense_reporting(id)) return false; - return id[ATA_ID_COMMAND_SET_4] & (1 << 6); + /* ata_id_has_sense_reporting() == true, word 86 must have bit 15 set */ + if ((id[ATA_ID_COMMAND_SET_4] & (BIT(15) | BIT(14))) != BIT(14)) + return false; + return id[ATA_ID_COMMAND_SET_4] & BIT(6); } /** From 3c34a91c8aa7ba97822b80e748e3ce0a47bc8807 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Fri, 16 Sep 2022 14:28:33 +0200 Subject: [PATCH 0224/1477] ata: fix ata_id_has_devslp() [ Upstream commit 9c6e09a434e1317e09b78b3b69cd384022ec9a03 ] ACS-5 section 7.13.6.36 Word 78: Serial ATA features supported states that: If word 76 is not 0000h or FFFFh, word 78 reports the features supported by the device. If this word is not supported, the word shall be cleared to zero. (This text also exists in really old ACS standards, e.g. ACS-3.) Additionally, move the macro to the other ATA_ID_FEATURE_SUPP macros (which already have this check), thus making it more likely that the next ATA_ID_FEATURE_SUPP macro that is added will include this check. Fixes: 65fe1f0f66a5 ("ahci: implement aggressive SATA device sleep support") Signed-off-by: Niklas Cassel Signed-off-by: Damien Le Moal Signed-off-by: Sasha Levin --- include/linux/ata.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/linux/ata.h b/include/linux/ata.h index 734cc646ce35..8b884cd3a232 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -565,6 +565,10 @@ struct ata_bmdma_prd { ((((id)[ATA_ID_SATA_CAPABILITY] != 0x0000) && \ ((id)[ATA_ID_SATA_CAPABILITY] != 0xffff)) && \ ((id)[ATA_ID_FEATURE_SUPP] & (1 << 2))) +#define ata_id_has_devslp(id) \ + ((((id)[ATA_ID_SATA_CAPABILITY] != 0x0000) && \ + ((id)[ATA_ID_SATA_CAPABILITY] != 0xffff)) && \ + ((id)[ATA_ID_FEATURE_SUPP] & (1 << 8))) #define ata_id_iordy_disable(id) ((id)[ATA_ID_CAPABILITY] & (1 << 10)) #define ata_id_has_iordy(id) ((id)[ATA_ID_CAPABILITY] & (1 << 11)) #define ata_id_u32(id,n) \ @@ -577,7 +581,6 @@ struct ata_bmdma_prd { #define ata_id_cdb_intr(id) (((id)[ATA_ID_CONFIG] & 0x60) == 0x20) #define ata_id_has_da(id) ((id)[ATA_ID_SATA_CAPABILITY_2] & (1 << 4)) -#define ata_id_has_devslp(id) ((id)[ATA_ID_FEATURE_SUPP] & (1 << 8)) #define ata_id_has_ncq_autosense(id) \ ((id)[ATA_ID_FEATURE_SUPP] & (1 << 7)) From f5a6fa1877f409985cf3a30e347205bec43c7880 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Fri, 16 Sep 2022 14:28:34 +0200 Subject: [PATCH 0225/1477] ata: fix ata_id_has_ncq_autosense() [ Upstream commit a5fb6bf853148974dbde092ec1bde553bea5e49f ] ACS-5 section 7.13.6.36 Word 78: Serial ATA features supported states that: If word 76 is not 0000h or FFFFh, word 78 reports the features supported by the device. If this word is not supported, the word shall be cleared to zero. (This text also exists in really old ACS standards, e.g. ACS-3.) Additionally, move the macro to the other ATA_ID_FEATURE_SUPP macros (which already have this check), thus making it more likely that the next ATA_ID_FEATURE_SUPP macro that is added will include this check. Fixes: 5b01e4b9efa0 ("libata: Implement NCQ autosense") Signed-off-by: Niklas Cassel Signed-off-by: Damien Le Moal Signed-off-by: Sasha Levin --- include/linux/ata.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/linux/ata.h b/include/linux/ata.h index 8b884cd3a232..94f7872da983 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -569,6 +569,10 @@ struct ata_bmdma_prd { ((((id)[ATA_ID_SATA_CAPABILITY] != 0x0000) && \ ((id)[ATA_ID_SATA_CAPABILITY] != 0xffff)) && \ ((id)[ATA_ID_FEATURE_SUPP] & (1 << 8))) +#define ata_id_has_ncq_autosense(id) \ + ((((id)[ATA_ID_SATA_CAPABILITY] != 0x0000) && \ + ((id)[ATA_ID_SATA_CAPABILITY] != 0xffff)) && \ + ((id)[ATA_ID_FEATURE_SUPP] & (1 << 7))) #define ata_id_iordy_disable(id) ((id)[ATA_ID_CAPABILITY] & (1 << 10)) #define ata_id_has_iordy(id) ((id)[ATA_ID_CAPABILITY] & (1 << 11)) #define ata_id_u32(id,n) \ @@ -581,8 +585,6 @@ struct ata_bmdma_prd { #define ata_id_cdb_intr(id) (((id)[ATA_ID_CONFIG] & 0x60) == 0x20) #define ata_id_has_da(id) ((id)[ATA_ID_SATA_CAPABILITY_2] & (1 << 4)) -#define ata_id_has_ncq_autosense(id) \ - ((id)[ATA_ID_FEATURE_SUPP] & (1 << 7)) static inline bool ata_id_has_hipm(const u16 *id) { From f5325f3202b878c7352c9940d55f4b667b3c9eef Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Fri, 16 Sep 2022 14:28:35 +0200 Subject: [PATCH 0226/1477] ata: fix ata_id_has_dipm() [ Upstream commit 630624cb1b5826d753ac8e01a0e42de43d66dedf ] ACS-5 section 7.13.6.36 Word 78: Serial ATA features supported states that: If word 76 is not 0000h or FFFFh, word 78 reports the features supported by the device. If this word is not supported, the word shall be cleared to zero. (This text also exists in really old ACS standards, e.g. ACS-3.) The problem with ata_id_has_dipm() is that the while it performs a check against 0 and 0xffff, it performs the check against ATA_ID_FEATURE_SUPP (word 78), the same word where the feature bit is stored. Fix this by performing the check against ATA_ID_SATA_CAPABILITY (word 76), like required by the spec. The feature bit check itself is of course still performed against ATA_ID_FEATURE_SUPP (word 78). Additionally, move the macro to the other ATA_ID_FEATURE_SUPP macros (which already have this check), thus making it more likely that the next ATA_ID_FEATURE_SUPP macro that is added will include this check. Fixes: ca77329fb713 ("[libata] Link power management infrastructure") Signed-off-by: Niklas Cassel Signed-off-by: Damien Le Moal Signed-off-by: Sasha Levin --- include/linux/ata.h | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/include/linux/ata.h b/include/linux/ata.h index 94f7872da983..6d2d31b03b4d 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -573,6 +573,10 @@ struct ata_bmdma_prd { ((((id)[ATA_ID_SATA_CAPABILITY] != 0x0000) && \ ((id)[ATA_ID_SATA_CAPABILITY] != 0xffff)) && \ ((id)[ATA_ID_FEATURE_SUPP] & (1 << 7))) +#define ata_id_has_dipm(id) \ + ((((id)[ATA_ID_SATA_CAPABILITY] != 0x0000) && \ + ((id)[ATA_ID_SATA_CAPABILITY] != 0xffff)) && \ + ((id)[ATA_ID_FEATURE_SUPP] & (1 << 3))) #define ata_id_iordy_disable(id) ((id)[ATA_ID_CAPABILITY] & (1 << 10)) #define ata_id_has_iordy(id) ((id)[ATA_ID_CAPABILITY] & (1 << 11)) #define ata_id_u32(id,n) \ @@ -596,17 +600,6 @@ static inline bool ata_id_has_hipm(const u16 *id) return val & (1 << 9); } -static inline bool ata_id_has_dipm(const u16 *id) -{ - u16 val = id[ATA_ID_FEATURE_SUPP]; - - if (val == 0 || val == 0xffff) - return false; - - return val & (1 << 3); -} - - static inline bool ata_id_has_fua(const u16 *id) { if ((id[ATA_ID_CFSSE] & 0xC000) != 0x4000) From 7bd5f3b4a805036892da047010b43d067603b139 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 28 Jul 2022 10:12:12 +0300 Subject: [PATCH 0227/1477] mtd: rawnand: meson: fix bit map use in meson_nfc_ecc_correct() [ Upstream commit 3e4ad3212cf22687410b1e8f4e68feec50646113 ] The meson_nfc_ecc_correct() function accidentally does a right shift instead of a left shift so it only works for BIT(0). Also use BIT_ULL() because "correct_bitmap" is a u64 and we want to avoid shift wrapping bugs. Fixes: 8fae856c5350 ("mtd: rawnand: meson: add support for Amlogic NAND flash controller") Signed-off-by: Dan Carpenter Acked-by: Liang Yang Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/YuI2zF1hP65+LE7r@kili Signed-off-by: Sasha Levin --- drivers/mtd/nand/raw/meson_nand.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/nand/raw/meson_nand.c b/drivers/mtd/nand/raw/meson_nand.c index 327a2257ec26..38f490088d76 100644 --- a/drivers/mtd/nand/raw/meson_nand.c +++ b/drivers/mtd/nand/raw/meson_nand.c @@ -454,7 +454,7 @@ static int meson_nfc_ecc_correct(struct nand_chip *nand, u32 *bitflips, if (ECC_ERR_CNT(*info) != ECC_UNCORRECTABLE) { mtd->ecc_stats.corrected += ECC_ERR_CNT(*info); *bitflips = max_t(u32, *bitflips, ECC_ERR_CNT(*info)); - *correct_bitmap |= 1 >> i; + *correct_bitmap |= BIT_ULL(i); continue; } if ((nand->options & NAND_NEED_SCRAMBLING) && @@ -800,7 +800,7 @@ static int meson_nfc_read_page_hwecc(struct nand_chip *nand, u8 *buf, u8 *data = buf + i * ecc->size; u8 *oob = nand->oob_poi + i * (ecc->bytes + 2); - if (correct_bitmap & (1 << i)) + if (correct_bitmap & BIT_ULL(i)) continue; ret = nand_check_erased_ecc_chunk(data, ecc->size, oob, ecc->bytes + 2, From 76694e9ce0b2238c0a5f3ba54f9361dd3770ec78 Mon Sep 17 00:00:00 2001 From: Saurabh Sengar Date: Tue, 23 Aug 2022 11:51:04 -0700 Subject: [PATCH 0228/1477] md: Replace snprintf with scnprintf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 1727fd5015d8f93474148f94e34cda5aa6ad4a43 ] Current code produces a warning as shown below when total characters in the constituent block device names plus the slashes exceeds 200. snprintf() returns the number of characters generated from the given input, which could cause the expression “200 – len” to wrap around to a large positive number. Fix this by using scnprintf() instead, which returns the actual number of characters written into the buffer. [ 1513.267938] ------------[ cut here ]------------ [ 1513.267943] WARNING: CPU: 15 PID: 37247 at /lib/vsprintf.c:2509 vsnprintf+0x2c8/0x510 [ 1513.267944] Modules linked in: [ 1513.267969] CPU: 15 PID: 37247 Comm: mdadm Not tainted 5.4.0-1085-azure #90~18.04.1-Ubuntu [ 1513.267969] Hardware name: Microsoft Corporation Virtual Machine/Virtual Machine, BIOS Hyper-V UEFI Release v4.1 05/09/2022 [ 1513.267971] RIP: 0010:vsnprintf+0x2c8/0x510 <-snip-> [ 1513.267982] Call Trace: [ 1513.267986] snprintf+0x45/0x70 [ 1513.267990] ? disk_name+0x71/0xa0 [ 1513.267993] dump_zones+0x114/0x240 [raid0] [ 1513.267996] ? _cond_resched+0x19/0x40 [ 1513.267998] raid0_run+0x19e/0x270 [raid0] [ 1513.268000] md_run+0x5e0/0xc50 [ 1513.268003] ? security_capable+0x3f/0x60 [ 1513.268005] do_md_run+0x19/0x110 [ 1513.268006] md_ioctl+0x195e/0x1f90 [ 1513.268007] blkdev_ioctl+0x91f/0x9f0 [ 1513.268010] block_ioctl+0x3d/0x50 [ 1513.268012] do_vfs_ioctl+0xa9/0x640 [ 1513.268014] ? __fput+0x162/0x260 [ 1513.268016] ksys_ioctl+0x75/0x80 [ 1513.268017] __x64_sys_ioctl+0x1a/0x20 [ 1513.268019] do_syscall_64+0x5e/0x200 [ 1513.268021] entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 766038846e875 ("md/raid0: replace printk() with pr_*()") Reviewed-by: Michael Kelley Acked-by: Guoqing Jiang Signed-off-by: Saurabh Sengar Signed-off-by: Song Liu Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index a4c0cafa6010..a20332e755e8 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -48,7 +48,7 @@ static void dump_zones(struct mddev *mddev) int len = 0; for (k = 0; k < conf->strip_zone[j].nb_dev; k++) - len += snprintf(line+len, 200-len, "%s%s", k?"/":"", + len += scnprintf(line+len, 200-len, "%s%s", k?"/":"", bdevname(conf->devlist[j*raid_disks + k]->bdev, b)); pr_debug("md: zone%d=[%s]\n", j, line); From a1263294b55c948842a2c058a47fb330223b0f6e Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Thu, 25 Aug 2022 09:46:27 -0600 Subject: [PATCH 0229/1477] md/raid5: Ensure stripe_fill happens on non-read IO with journal [ Upstream commit e2eed85bc75138a9eeb63863d20f8904ac42a577 ] When doing degrade/recover tests using the journal a kernel BUG is hit at drivers/md/raid5.c:4381 in handle_parity_checks5(): BUG_ON(!test_bit(R5_UPTODATE, &dev->flags)); This was found to occur because handle_stripe_fill() was skipped for stripes in the journal due to a condition in that function. Thus blocks were not fetched and R5_UPTODATE was not set when the code reached handle_parity_checks5(). To fix this, don't skip handle_stripe_fill() unless the stripe is for read. Fixes: 07e83364845e ("md/r5cache: shift complex rmw from read path to write path") Link: https://lore.kernel.org/linux-raid/e05c4239-41a9-d2f7-3cfa-4aa9d2cea8c1@deltatee.com/ Suggested-by: Song Liu Signed-off-by: Logan Gunthorpe Signed-off-by: Song Liu Signed-off-by: Sasha Levin --- drivers/md/raid5.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 01c7edf32936..00995e60d46b 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3936,7 +3936,7 @@ static void handle_stripe_fill(struct stripe_head *sh, * back cache (prexor with orig_page, and then xor with * page) in the read path */ - if (s->injournal && s->failed) { + if (s->to_read && s->injournal && s->failed) { if (test_bit(STRIPE_R5C_CACHING, &sh->state)) r5c_make_stripe_write_out(sh); goto out; From 360386e11c8d2e3b7e34749b5ee206c04282806c Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Thu, 8 Sep 2022 13:09:02 +0300 Subject: [PATCH 0230/1477] RDMA/cm: Use SLID in the work completion as the DLID in responder side [ Upstream commit b7d95040c13f61a4a6a859c5355faf583eff9658 ] The responder should always use WC's SLID as the dlid, to follow the IB SPEC section "13.5.4.2 COMMON RESPONSE ACTIONS": A responder always takes the following actions in constructing a response packet: - The SLID of the received packet is used as the DLID in the response packet. Fixes: ac3a949fb2ff ("IB/CM: Set appropriate slid and dlid when handling CM request") Signed-off-by: Mark Zhang Reviewed-by: Mark Bloch Link: https://lore.kernel.org/r/cd17c240231e059d2fc07c17dfe555d548b917eb.1662631201.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/core/cm.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 3cc7a23fa69f..3133b6be6cab 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1643,14 +1643,13 @@ static void cm_path_set_rec_type(struct ib_device *ib_device, u8 port_num, static void cm_format_path_lid_from_req(struct cm_req_msg *req_msg, struct sa_path_rec *primary_path, - struct sa_path_rec *alt_path) + struct sa_path_rec *alt_path, + struct ib_wc *wc) { u32 lid; if (primary_path->rec_type != SA_PATH_REC_TYPE_OPA) { - sa_path_set_dlid(primary_path, - IBA_GET(CM_REQ_PRIMARY_LOCAL_PORT_LID, - req_msg)); + sa_path_set_dlid(primary_path, wc->slid); sa_path_set_slid(primary_path, IBA_GET(CM_REQ_PRIMARY_REMOTE_PORT_LID, req_msg)); @@ -1687,7 +1686,8 @@ static void cm_format_path_lid_from_req(struct cm_req_msg *req_msg, static void cm_format_paths_from_req(struct cm_req_msg *req_msg, struct sa_path_rec *primary_path, - struct sa_path_rec *alt_path) + struct sa_path_rec *alt_path, + struct ib_wc *wc) { primary_path->dgid = *IBA_GET_MEM_PTR(CM_REQ_PRIMARY_LOCAL_PORT_GID, req_msg); @@ -1745,7 +1745,7 @@ static void cm_format_paths_from_req(struct cm_req_msg *req_msg, if (sa_path_is_roce(alt_path)) alt_path->roce.route_resolved = false; } - cm_format_path_lid_from_req(req_msg, primary_path, alt_path); + cm_format_path_lid_from_req(req_msg, primary_path, alt_path, wc); } static u16 cm_get_bth_pkey(struct cm_work *work) @@ -2163,7 +2163,7 @@ static int cm_req_handler(struct cm_work *work) if (cm_req_has_alt_path(req_msg)) work->path[1].rec_type = work->path[0].rec_type; cm_format_paths_from_req(req_msg, &work->path[0], - &work->path[1]); + &work->path[1], work->mad_recv_wc->wc); if (cm_id_priv->av.ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE) sa_path_set_dmac(&work->path[0], cm_id_priv->av.ah_attr.roce.dmac); From dac769dd7dc812bc001e14fa2733b62f1c555155 Mon Sep 17 00:00:00 2001 From: Daisuke Matsuda Date: Wed, 21 Sep 2022 17:08:43 +0900 Subject: [PATCH 0231/1477] IB: Set IOVA/LENGTH on IB_MR in core/uverbs layers [ Upstream commit 241f9a27e0fc0eaf23e3d52c8450f10648cd11f1 ] Set 'iova' and 'length' on ib_mr in ib_uverbs and ib_core layers to let all drivers have the members filled. Also, this commit removes redundancy in the respective drivers. Previously, commit 04c0a5fcfcf65 ("IB/uverbs: Set IOVA on IB MR in uverbs layer") changed to set 'iova', but seems to have missed 'length' and the ib_core layer at that time. Fixes: 04c0a5fcfcf65 ("IB/uverbs: Set IOVA on IB MR in uverbs layer") Signed-off-by: Daisuke Matsuda Link: https://lore.kernel.org/r/20220921080844.1616883-1-matsuda-daisuke@fujitsu.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/uverbs_cmd.c | 5 ++++- drivers/infiniband/core/verbs.c | 2 ++ drivers/infiniband/hw/hns/hns_roce_mr.c | 1 - drivers/infiniband/hw/mlx4/mr.c | 1 - 4 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 466026825dd7..d7c90da9ce7f 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -749,6 +749,7 @@ static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs) mr->uobject = uobj; atomic_inc(&pd->usecnt); mr->iova = cmd.hca_va; + mr->length = cmd.length; rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR); rdma_restrack_set_name(&mr->res, NULL); @@ -832,8 +833,10 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs) atomic_dec(&old_pd->usecnt); } - if (cmd.flags & IB_MR_REREG_TRANS) + if (cmd.flags & IB_MR_REREG_TRANS) { mr->iova = cmd.hca_va; + mr->length = cmd.length; + } memset(&resp, 0, sizeof(resp)); resp.lkey = mr->lkey; diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 597e889ba831..5889639e90a1 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -2082,6 +2082,8 @@ struct ib_mr *ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mr->pd = pd; mr->dm = NULL; atomic_inc(&pd->usecnt); + mr->iova = virt_addr; + mr->length = length; rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR); rdma_restrack_parent_name(&mr->res, &pd->res); diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 027ec8413ac2..6d7cc724862f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -286,7 +286,6 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, goto err_alloc_pbl; mr->ibmr.rkey = mr->ibmr.lkey = mr->key; - mr->ibmr.length = length; return &mr->ibmr; diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 426fed005d53..811b4bb34524 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -439,7 +439,6 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, goto err_mr; mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key; - mr->ibmr.length = length; mr->ibmr.page_size = 1U << shift; return &mr->ibmr; From cc952e3bf61cf83e51dab80aba656da22dbc6d00 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 21 Sep 2022 15:34:47 +0300 Subject: [PATCH 0232/1477] xhci: Don't show warning for reinit on known broken suspend [ Upstream commit 484d6f7aa3283d082c87654b7fe7a7f725423dfb ] commit 8b328f8002bc ("xhci: re-initialize the HC during resume if HCE was set") introduced a new warning message when the host controller error was set and re-initializing. This is expected behavior on some designs which already set `xhci->broken_suspend` so the new warning is alarming to some users. Modify the code to only show the warning if this was a surprising behavior to the XHCI driver. Link: https://bugzilla.kernel.org/show_bug.cgi?id=216470 Fixes: 8b328f8002bc ("xhci: re-initialize the HC during resume if HCE was set") Reported-by: Artem S. Tashkinov Signed-off-by: Mario Limonciello Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20220921123450.671459-4-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/host/xhci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 7b16b6b45af7..8918e6ae5c4b 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -1163,7 +1163,8 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) /* re-initialize the HC on Restore Error, or Host Controller Error */ if (temp & (STS_SRE | STS_HCE)) { reinit_xhc = true; - xhci_warn(xhci, "xHC error in resume, USBSTS 0x%x, Reinit\n", temp); + if (!xhci->broken_suspend) + xhci_warn(xhci, "xHC error in resume, USBSTS 0x%x, Reinit\n", temp); } if (reinit_xhc) { From 7efdd91d54cbf42a8079ebbdc677a16984b2f9b7 Mon Sep 17 00:00:00 2001 From: Albert Briscoe Date: Sun, 11 Sep 2022 15:37:55 -0700 Subject: [PATCH 0233/1477] usb: gadget: function: fix dangling pnp_string in f_printer.c [ Upstream commit 24b7ba2f88e04800b54d462f376512e8c41b8a3c ] When opts->pnp_string is changed with configfs, new memory is allocated for the string. It does not, however, update dev->pnp_string, even though the memory is freed. When rquesting the string, the host then gets old or corrupted data rather than the new string. The ieee 1284 id string should be allowed to change while the device is connected. The bug was introduced in commit fdc01cc286be ("usb: gadget: printer: Remove pnp_string static buffer"), which changed opts->pnp_string from a char[] to a char*. This patch changes dev->pnp_string from a char* to a char** pointing to opts->pnp_string. Fixes: fdc01cc286be ("usb: gadget: printer: Remove pnp_string static buffer") Signed-off-by: Albert Briscoe Link: https://lore.kernel.org/r/20220911223753.20417-1-albertsbriscoe@gmail.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/gadget/function/f_printer.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/usb/gadget/function/f_printer.c b/drivers/usb/gadget/function/f_printer.c index 236ecc968998..c13bb29a160e 100644 --- a/drivers/usb/gadget/function/f_printer.c +++ b/drivers/usb/gadget/function/f_printer.c @@ -87,7 +87,7 @@ struct printer_dev { u8 printer_cdev_open; wait_queue_head_t wait; unsigned q_len; - char *pnp_string; /* We don't own memory! */ + char **pnp_string; /* We don't own memory! */ struct usb_function function; }; @@ -999,16 +999,16 @@ static int printer_func_setup(struct usb_function *f, if ((wIndex>>8) != dev->interface) break; - if (!dev->pnp_string) { + if (!*dev->pnp_string) { value = 0; break; } - value = strlen(dev->pnp_string); + value = strlen(*dev->pnp_string); buf[0] = (value >> 8) & 0xFF; buf[1] = value & 0xFF; - memcpy(buf + 2, dev->pnp_string, value); + memcpy(buf + 2, *dev->pnp_string, value); DBG(dev, "1284 PNP String: %x %s\n", value, - dev->pnp_string); + *dev->pnp_string); break; case GET_PORT_STATUS: /* Get Port Status */ @@ -1471,7 +1471,7 @@ static struct usb_function *gprinter_alloc(struct usb_function_instance *fi) kref_init(&dev->kref); ++opts->refcnt; dev->minor = opts->minor; - dev->pnp_string = opts->pnp_string; + dev->pnp_string = &opts->pnp_string; dev->q_len = opts->q_len; mutex_unlock(&opts->lock); From 71ffe5111f0ffa2fd43c14fd176c6f05d4e82212 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 22 Sep 2022 14:22:47 +0300 Subject: [PATCH 0234/1477] drivers: serial: jsm: fix some leaks in probe [ Upstream commit 1d5859ef229e381f4db38dce8ed58e4bf862006b ] This error path needs to unwind instead of just returning directly. Fixes: 03a8482c17dd ("drivers: serial: jsm: Enable support for Digi Classic adapters") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/YyxFh1+lOeZ9WfKO@kili Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/jsm/jsm_driver.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/jsm/jsm_driver.c b/drivers/tty/serial/jsm/jsm_driver.c index cd30da0ef083..b5b61e598b53 100644 --- a/drivers/tty/serial/jsm/jsm_driver.c +++ b/drivers/tty/serial/jsm/jsm_driver.c @@ -212,7 +212,8 @@ static int jsm_probe_one(struct pci_dev *pdev, const struct pci_device_id *ent) break; default: - return -ENXIO; + rc = -ENXIO; + goto out_kfree_brd; } rc = request_irq(brd->irq, brd->bd_ops->intr, IRQF_SHARED, "JSM", brd); From 3fbfa5e3cc0dcc11345a4eb3da1ee4e6e6a61efe Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 28 Apr 2021 09:30:52 +0200 Subject: [PATCH 0235/1477] serial: 8250: Add an empty line and remove some useless {} [ Upstream commit 26f7591632d74f637f346f5d642d8ebe6b433fc9 ] This fixes the following checkpatch.pl warnings: WARNING: Missing a blank line after declarations WARNING: braces {} are not necessary for any arm of this statement Reviewed-by: Andy Shevchenko Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/257ffd691b4a062ad017333c9430d69da6dbd29a.1619594713.git.christophe.jaillet@wanadoo.fr Signed-off-by: Greg Kroah-Hartman Stable-dep-of: 039d4926379b ("serial: 8250: Toggle IER bits on only after irq has been set up") Signed-off-by: Sasha Levin --- drivers/tty/serial/8250/8250_core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index 98ce484f1089..aae9d26ce4f4 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -332,9 +332,9 @@ static int univ8250_setup_irq(struct uart_8250_port *up) * hardware interrupt, we use a timer-based system. The original * driver used to do this with IRQ0. */ - if (!port->irq) { + if (!port->irq) mod_timer(&up->timer, jiffies + uart_poll_timeout(port)); - } else + else retval = serial_link_irq_chain(up); return retval; @@ -766,6 +766,7 @@ void serial8250_suspend_port(int line) if (!console_suspend_enabled && uart_console(port) && port->type != PORT_8250) { unsigned char canary = 0xa5; + serial_out(up, UART_SCR, canary); if (serial_in(up, UART_SCR) == canary) up->canary = canary; From 572fb97fce35a3cc5694b9c8a7e83c97d11461b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Thu, 22 Sep 2022 10:00:05 +0300 Subject: [PATCH 0236/1477] serial: 8250: Toggle IER bits on only after irq has been set up MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 039d4926379b1d1c17b51cf21c500a5eed86899e ] Invoking TIOCVHANGUP on 8250_mid port on Ice Lake-D and then reopening the port triggers these faults during serial8250_do_startup(): DMAR: DRHD: handling fault status reg 3 DMAR: [DMA Write NO_PASID] Request device [00:1a.0] fault addr 0x0 [fault reason 0x05] PTE Write access is not set If the IRQ hasn't been set up yet, the UART will have zeroes in its MSI address/data registers. Disabling the IRQ at the interrupt controller won't stop the UART from performing a DMA write to the address programmed in its MSI address register (zero) when it wants to signal an interrupt. The UARTs (in Ice Lake-D) implement PCI 2.1 style MSI without masking capability, so there is no way to mask the interrupt at the source PCI function level, except disabling the MSI capability entirely, but that would cause it to fall back to INTx# assertion, and the PCI specification prohibits disabling the MSI capability as a way to mask a function's interrupt service request. The MSI address register is zeroed by the hangup as the irq is freed. The interrupt is signalled during serial8250_do_startup() performing a THRE test that temporarily toggles THRI in IER. The THRE test currently occurs before UART's irq (and MSI address) is properly set up. Refactor serial8250_do_startup() such that irq is set up before the THRE test. The current irq setup code is intermixed with the timer setup code. As THRE test must be performed prior to the timer setup, extract it into own function and call it only after the THRE test. The ->setup_timer() needs to be part of the struct uart_8250_ops in order to not create circular dependency between 8250 and 8250_base modules. Fixes: 40b36daad0ac ("[PATCH] 8250 UART backup timer") Reported-by: Lennert Buytenhek Tested-by: Lennert Buytenhek Reviewed-by: Andy Shevchenko Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20220922070005.2965-1-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/8250/8250_core.c | 16 +++++++++++----- drivers/tty/serial/8250/8250_port.c | 8 +++++--- include/linux/serial_8250.h | 1 + 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index aae9d26ce4f4..0a7e9491b4d1 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -310,10 +310,9 @@ static void serial8250_backup_timeout(struct timer_list *t) jiffies + uart_poll_timeout(&up->port) + HZ / 5); } -static int univ8250_setup_irq(struct uart_8250_port *up) +static void univ8250_setup_timer(struct uart_8250_port *up) { struct uart_port *port = &up->port; - int retval = 0; /* * The above check will only give an accurate result the first time @@ -334,10 +333,16 @@ static int univ8250_setup_irq(struct uart_8250_port *up) */ if (!port->irq) mod_timer(&up->timer, jiffies + uart_poll_timeout(port)); - else - retval = serial_link_irq_chain(up); +} - return retval; +static int univ8250_setup_irq(struct uart_8250_port *up) +{ + struct uart_port *port = &up->port; + + if (port->irq) + return serial_link_irq_chain(up); + + return 0; } static void univ8250_release_irq(struct uart_8250_port *up) @@ -393,6 +398,7 @@ static struct uart_ops univ8250_port_ops; static const struct uart_8250_ops univ8250_driver_ops = { .setup_irq = univ8250_setup_irq, .release_irq = univ8250_release_irq, + .setup_timer = univ8250_setup_timer, }; static struct uart_8250_port serial8250_ports[UART_NR]; diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 9d60418e4adb..3caa4aa193f6 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -2276,6 +2276,10 @@ int serial8250_do_startup(struct uart_port *port) if (port->irq && (up->port.flags & UPF_SHARE_IRQ)) up->port.irqflags |= IRQF_SHARED; + retval = up->ops->setup_irq(up); + if (retval) + goto out; + if (port->irq && !(up->port.flags & UPF_NO_THRE_TEST)) { unsigned char iir1; @@ -2318,9 +2322,7 @@ int serial8250_do_startup(struct uart_port *port) } } - retval = up->ops->setup_irq(up); - if (retval) - goto out; + up->ops->setup_timer(up); /* * Now, initialize the UART diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index 2b70f736b091..92f3b778d8c2 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -74,6 +74,7 @@ struct uart_8250_port; struct uart_8250_ops { int (*setup_irq)(struct uart_8250_port *); void (*release_irq)(struct uart_8250_port *); + void (*setup_timer)(struct uart_8250_port *); }; struct uart_8250_em485 { From 9a56ade124d4891a31ab1300c57665f07f5b24d5 Mon Sep 17 00:00:00 2001 From: Sherry Sun Date: Tue, 20 Sep 2022 19:17:03 +0800 Subject: [PATCH 0237/1477] tty: serial: fsl_lpuart: disable dma rx/tx use flags in lpuart_dma_shutdown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 316ae95c175a7d770d1bfe4c011192712f57aa4a ] lpuart_dma_shutdown tears down lpuart dma, but lpuart_flush_buffer can still occur which in turn tries to access dma apis if lpuart_dma_tx_use flag is true. At this point since dma is torn down, these dma apis can abort. Set lpuart_dma_tx_use and the corresponding rx flag lpuart_dma_rx_use to false in lpuart_dma_shutdown so that dmas are not accessed after they are relinquished. Otherwise, when try to kill btattach, kernel may panic. This patch may fix this issue. root@imx8ulpevk:~# btattach -B /dev/ttyLP2 -S 115200 ^C[ 90.182296] Internal error: synchronous external abort: 96000210 [#1] PREEMPT SMP [ 90.189806] Modules linked in: moal(O) mlan(O) [ 90.194258] CPU: 0 PID: 503 Comm: btattach Tainted: G O 5.15.32-06136-g34eecdf2f9e4 #37 [ 90.203554] Hardware name: NXP i.MX8ULP 9X9 EVK (DT) [ 90.208513] pstate: 600000c5 (nZCv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 90.215470] pc : fsl_edma3_disable_request+0x8/0x60 [ 90.220358] lr : fsl_edma3_terminate_all+0x34/0x20c [ 90.225237] sp : ffff800013f0bac0 [ 90.228548] x29: ffff800013f0bac0 x28: 0000000000000001 x27: ffff000008404800 [ 90.235681] x26: ffff000008404960 x25: ffff000008404a08 x24: ffff000008404a00 [ 90.242813] x23: ffff000008404a60 x22: 0000000000000002 x21: 0000000000000000 [ 90.249946] x20: ffff800013f0baf8 x19: ffff00000559c800 x18: 0000000000000000 [ 90.257078] x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 [ 90.264211] x14: 0000000000000003 x13: 0000000000000000 x12: 0000000000000040 [ 90.271344] x11: ffff00000600c248 x10: ffff800013f0bb10 x9 : ffff000057bcb090 [ 90.278477] x8 : fffffc0000241a08 x7 : ffff00000534ee00 x6 : ffff000008404804 [ 90.285609] x5 : 0000000000000000 x4 : 0000000000000000 x3 : ffff0000055b3480 [ 90.292742] x2 : ffff8000135c0000 x1 : ffff00000534ee00 x0 : ffff00000559c800 [ 90.299876] Call trace: [ 90.302321] fsl_edma3_disable_request+0x8/0x60 [ 90.306851] lpuart_flush_buffer+0x40/0x160 [ 90.311037] uart_flush_buffer+0x88/0x120 [ 90.315050] tty_driver_flush_buffer+0x20/0x30 [ 90.319496] hci_uart_flush+0x44/0x90 [ 90.323162] +0x34/0x12c [ 90.327253] tty_ldisc_close+0x38/0x70 [ 90.331005] tty_ldisc_release+0xa8/0x190 [ 90.335018] tty_release_struct+0x24/0x8c [ 90.339022] tty_release+0x3ec/0x4c0 [ 90.342593] __fput+0x70/0x234 [ 90.345652] ____fput+0x14/0x20 [ 90.348790] task_work_run+0x84/0x17c [ 90.352455] do_exit+0x310/0x96c [ 90.355688] do_group_exit+0x3c/0xa0 [ 90.359259] __arm64_sys_exit_group+0x1c/0x20 [ 90.363609] invoke_syscall+0x48/0x114 [ 90.367362] el0_svc_common.constprop.0+0xd4/0xfc [ 90.372068] do_el0_svc+0x2c/0x94 [ 90.375379] el0_svc+0x28/0x80 [ 90.378438] el0t_64_sync_handler+0xa8/0x130 [ 90.382711] el0t_64_sync+0x1a0/0x1a4 [ 90.386376] Code: 17ffffda d503201f d503233f f9409802 (b9400041) [ 90.392467] ---[ end trace 2f60524b4a43f1f6 ]--- [ 90.397073] note: btattach[503] exited with preempt_count 1 [ 90.402636] Fixing recursive fault but reboot is needed! Fixes: 6250cc30c4c4 ("tty: serial: fsl_lpuart: Use scatter/gather DMA for Tx") Reviewed-by: Ilpo Järvinen Signed-off-by: Thara Gopinath Signed-off-by: Sherry Sun Link: https://lore.kernel.org/r/20220920111703.1532-1-sherry.sun@nxp.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/fsl_lpuart.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index a2c4eab0b470..269d1e3a025d 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -1725,6 +1725,7 @@ static void lpuart_dma_shutdown(struct lpuart_port *sport) if (sport->lpuart_dma_rx_use) { del_timer_sync(&sport->lpuart_timer); lpuart_dma_rx_free(&sport->port); + sport->lpuart_dma_rx_use = false; } if (sport->lpuart_dma_tx_use) { @@ -1733,6 +1734,7 @@ static void lpuart_dma_shutdown(struct lpuart_port *sport) sport->dma_tx_in_progress = false; dmaengine_terminate_all(sport->dma_tx_chan); } + sport->lpuart_dma_tx_use = false; } if (sport->dma_tx_chan) From 878f9871668f4b46eea1e3644b8bce1fcdfc8b10 Mon Sep 17 00:00:00 2001 From: Dongliang Mu Date: Wed, 14 Sep 2022 13:13:33 +0800 Subject: [PATCH 0238/1477] phy: qualcomm: call clk_disable_unprepare in the error handling [ Upstream commit c3966ced8eb8dc53b6c8d7f97d32cc8a2107d83e ] Smatch reports the following error: drivers/phy/qualcomm/phy-qcom-usb-hsic.c:82 qcom_usb_hsic_phy_power_on() warn: 'uphy->cal_clk' from clk_prepare_enable() not released on lines: 58. drivers/phy/qualcomm/phy-qcom-usb-hsic.c:82 qcom_usb_hsic_phy_power_on() warn: 'uphy->cal_sleep_clk' from clk_prepare_enable() not released on lines: 58. drivers/phy/qualcomm/phy-qcom-usb-hsic.c:82 qcom_usb_hsic_phy_power_on() warn: 'uphy->phy_clk' from clk_prepare_enable() not released on lines: 58. Fix this by calling proper clk_disable_unprepare calls. Fixes: 0b56e9a7e835 ("phy: Group vendor specific phy drivers") Signed-off-by: Dongliang Mu Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20220914051334.69282-1-dzm91@hust.edu.cn Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/phy/qualcomm/phy-qcom-usb-hsic.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-usb-hsic.c b/drivers/phy/qualcomm/phy-qcom-usb-hsic.c index 04d18d52f700..d4741c2dbbb5 100644 --- a/drivers/phy/qualcomm/phy-qcom-usb-hsic.c +++ b/drivers/phy/qualcomm/phy-qcom-usb-hsic.c @@ -54,8 +54,10 @@ static int qcom_usb_hsic_phy_power_on(struct phy *phy) /* Configure pins for HSIC functionality */ pins_default = pinctrl_lookup_state(uphy->pctl, PINCTRL_STATE_DEFAULT); - if (IS_ERR(pins_default)) - return PTR_ERR(pins_default); + if (IS_ERR(pins_default)) { + ret = PTR_ERR(pins_default); + goto err_ulpi; + } ret = pinctrl_select_state(uphy->pctl, pins_default); if (ret) From 95ac62e8545be2b0a8cae0beef7c682e2e470e48 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Mon, 12 Sep 2022 19:04:31 +0200 Subject: [PATCH 0239/1477] staging: vt6655: fix some erroneous memory clean-up loops [ Upstream commit 2a2db520e3ca5aafba7c211abfd397666c9b5f9d ] In some initialization functions of this driver, memory is allocated with 'i' acting as an index variable and increasing from 0. The commit in "Fixes" introduces some clean-up codes in case of allocation failure, which free memory in reverse order with 'i' decreasing to 0. However, there are some problems: - The case i=0 is left out. Thus memory is leaked. - In case memory allocation fails right from the start, the memory freeing loops will start with i=-1 and invalid memory locations will be accessed. One of these loops has been fixed in commit c8ff91535880 ("staging: vt6655: fix potential memory leak"). Fix the remaining erroneous loops. Link: https://lore.kernel.org/linux-staging/Yx9H1zSpxmNqx6Xc@kadam/ Fixes: 5341ee0adb17 ("staging: vt6655: check for memory allocation failures") Reported-by: Dan Carpenter Tested-by: Philipp Hortmann Signed-off-by: Nam Cao Link: https://lore.kernel.org/r/20220912170429.29852-1-namcaov@gmail.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/staging/vt6655/device_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/staging/vt6655/device_main.c b/drivers/staging/vt6655/device_main.c index 09ab6d6f2429..0dd70173a754 100644 --- a/drivers/staging/vt6655/device_main.c +++ b/drivers/staging/vt6655/device_main.c @@ -564,7 +564,7 @@ err_free_rd: kfree(desc->rd_info); err_free_desc: - while (--i) { + while (i--) { desc = &priv->aRD0Ring[i]; device_free_rx_buf(priv, desc); kfree(desc->rd_info); @@ -610,7 +610,7 @@ err_free_rd: kfree(desc->rd_info); err_free_desc: - while (--i) { + while (i--) { desc = &priv->aRD1Ring[i]; device_free_rx_buf(priv, desc); kfree(desc->rd_info); @@ -715,7 +715,7 @@ static int device_init_td1_ring(struct vnt_private *priv) return 0; err_free_desc: - while (--i) { + while (i--) { desc = &priv->apTD1Rings[i]; kfree(desc->td_info); } From ab5a3e714437bbd944cf1d6d1cec819c3ed826aa Mon Sep 17 00:00:00 2001 From: "Guilherme G. Piccoli" Date: Fri, 9 Sep 2022 17:07:55 -0300 Subject: [PATCH 0240/1477] firmware: google: Test spinlock on panic path to avoid lockups [ Upstream commit 3e081438b8e639cc76ef1a5ce0c1bd8a154082c7 ] Currently the gsmi driver registers a panic notifier as well as reboot and die notifiers. The callbacks registered are called in atomic and very limited context - for instance, panic disables preemption and local IRQs, also all secondary CPUs (not executing the panic path) are shutdown. With that said, taking a spinlock in this scenario is a dangerous invitation for lockup scenarios. So, fix that by checking if the spinlock is free to acquire in the panic notifier callback - if not, bail-out and avoid a potential hang. Fixes: 74c5b31c6618 ("driver: Google EFI SMI") Cc: Andrew Morton Cc: Ard Biesheuvel Cc: David Gow Cc: Greg Kroah-Hartman Cc: Julius Werner Cc: Petr Mladek Reviewed-by: Evan Green Signed-off-by: Guilherme G. Piccoli Link: https://lore.kernel.org/r/20220909200755.189679-1-gpiccoli@igalia.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/firmware/google/gsmi.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/firmware/google/gsmi.c b/drivers/firmware/google/gsmi.c index 7d9367b22010..c1cd5ca875ca 100644 --- a/drivers/firmware/google/gsmi.c +++ b/drivers/firmware/google/gsmi.c @@ -680,6 +680,15 @@ static struct notifier_block gsmi_die_notifier = { static int gsmi_panic_callback(struct notifier_block *nb, unsigned long reason, void *arg) { + + /* + * Panic callbacks are executed with all other CPUs stopped, + * so we must not attempt to spin waiting for gsmi_dev.lock + * to be released. + */ + if (spin_is_locked(&gsmi_dev.lock)) + return NOTIFY_DONE; + gsmi_shutdown_reason(GSMI_SHUTDOWN_PANIC); return NOTIFY_DONE; } From 8f740c11d89157bd74849ae388288075e05e2a34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Sat, 24 Sep 2022 12:43:24 +0200 Subject: [PATCH 0241/1477] serial: 8250: Fix restoring termios speed after suspend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 379a33786d489ab81885ff0b3935cfeb36137fea ] Since commit edc6afc54968 ("tty: switch to ktermios and new framework") termios speed is no longer stored only in c_cflag member but also in new additional c_ispeed and c_ospeed members. If BOTHER flag is set in c_cflag then termios speed is stored only in these new members. Since commit 027b57170bf8 ("serial: core: Fix initializing and restoring termios speed") termios speed is available also in struct console. So properly restore also c_ispeed and c_ospeed members after suspend to fix restoring termios speed which is not represented by Bnnn constant. Fixes: 4516d50aabed ("serial: 8250: Use canary to restart console after suspend") Signed-off-by: Pali Rohár Link: https://lore.kernel.org/r/20220924104324.4035-1-pali@kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/8250/8250_port.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 3caa4aa193f6..71d143c00248 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -3288,8 +3288,13 @@ static void serial8250_console_restore(struct uart_8250_port *up) unsigned int baud, quot, frac = 0; termios.c_cflag = port->cons->cflag; - if (port->state->port.tty && termios.c_cflag == 0) + termios.c_ispeed = port->cons->ispeed; + termios.c_ospeed = port->cons->ospeed; + if (port->state->port.tty && termios.c_cflag == 0) { termios.c_cflag = port->state->port.tty->termios.c_cflag; + termios.c_ispeed = port->state->port.tty->termios.c_ispeed; + termios.c_ospeed = port->state->port.tty->termios.c_ospeed; + } baud = serial8250_get_baud_rate(port, &termios, NULL); quot = serial8250_get_divisor(port, baud, &frac); From a9e5176ead6de64f572ad5c87a72825d9d3c82ae Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Tue, 20 Sep 2022 22:42:13 +0800 Subject: [PATCH 0242/1477] scsi: libsas: Fix use-after-free bug in smp_execute_task_sg() [ Upstream commit 46ba53c30666717cb06c2b3c5d896301cd00d0c0 ] When executing SMP task failed, the smp_execute_task_sg() calls del_timer() to delete "slow_task->timer". However, if the timer handler sas_task_internal_timedout() is running, the del_timer() in smp_execute_task_sg() will not stop it and a UAF will happen. The process is shown below: (thread 1) | (thread 2) smp_execute_task_sg() | sas_task_internal_timedout() ... | del_timer() | ... | ... sas_free_task(task) | kfree(task->slow_task) //FREE| | task->slow_task->... //USE Fix by calling del_timer_sync() in smp_execute_task_sg(), which makes sure the timer handler have finished before the "task->slow_task" is deallocated. Link: https://lore.kernel.org/r/20220920144213.10536-1-duoming@zju.edu.cn Fixes: 2908d778ab3e ("[SCSI] aic94xx: new driver") Reviewed-by: Jason Yan Signed-off-by: Duoming Zhou Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/libsas/sas_expander.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index 8d6bcc19359f..51485d0251f2 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -85,7 +85,7 @@ static int smp_execute_task_sg(struct domain_device *dev, res = i->dft->lldd_execute_task(task, GFP_KERNEL); if (res) { - del_timer(&task->slow_task->timer); + del_timer_sync(&task->slow_task->timer); pr_notice("executing SMP task failed:%d\n", res); break; } From 884a788f065578bb640382279a83d1df433b13e6 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 7 Sep 2022 17:17:00 -0500 Subject: [PATCH 0243/1477] scsi: iscsi: iscsi_tcp: Fix null-ptr-deref while calling getpeername() [ Upstream commit 57569c37f0add1b6489e1a1563c71519daf732cf ] Fix a NULL pointer crash that occurs when we are freeing the socket at the same time we access it via sysfs. The problem is that: 1. iscsi_sw_tcp_conn_get_param() and iscsi_sw_tcp_host_get_param() take the frwd_lock and do sock_hold() then drop the frwd_lock. sock_hold() does a get on the "struct sock". 2. iscsi_sw_tcp_release_conn() does sockfd_put() which does the last put on the "struct socket" and that does __sock_release() which sets the sock->ops to NULL. 3. iscsi_sw_tcp_conn_get_param() and iscsi_sw_tcp_host_get_param() then call kernel_getpeername() which accesses the NULL sock->ops. Above we do a get on the "struct sock", but we needed a get on the "struct socket". Originally, we just held the frwd_lock the entire time but in commit bcf3a2953d36 ("scsi: iscsi: iscsi_tcp: Avoid holding spinlock while calling getpeername()") we switched to refcount based because the network layer changed and started taking a mutex in that path, so we could no longer hold the frwd_lock. Instead of trying to maintain multiple refcounts, this just has us use a mutex for accessing the socket in the interface code paths. Link: https://lore.kernel.org/r/20220907221700.10302-1-michael.christie@oracle.com Fixes: bcf3a2953d36 ("scsi: iscsi: iscsi_tcp: Avoid holding spinlock while calling getpeername()") Signed-off-by: Mike Christie Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/iscsi_tcp.c | 73 ++++++++++++++++++++++++++++------------ drivers/scsi/iscsi_tcp.h | 2 ++ 2 files changed, 54 insertions(+), 21 deletions(-) diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c index df47557a02a3..6485c1aa9e74 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c @@ -558,6 +558,8 @@ iscsi_sw_tcp_conn_create(struct iscsi_cls_session *cls_session, tcp_conn = conn->dd_data; tcp_sw_conn = tcp_conn->dd_data; + mutex_init(&tcp_sw_conn->sock_lock); + tfm = crypto_alloc_ahash("crc32c", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(tfm)) goto free_conn; @@ -592,11 +594,15 @@ free_conn: static void iscsi_sw_tcp_release_conn(struct iscsi_conn *conn) { - struct iscsi_session *session = conn->session; struct iscsi_tcp_conn *tcp_conn = conn->dd_data; struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data; struct socket *sock = tcp_sw_conn->sock; + /* + * The iscsi transport class will make sure we are not called in + * parallel with start, stop, bind and destroys. However, this can be + * called twice if userspace does a stop then a destroy. + */ if (!sock) return; @@ -604,9 +610,9 @@ static void iscsi_sw_tcp_release_conn(struct iscsi_conn *conn) iscsi_sw_tcp_conn_restore_callbacks(conn); sock_put(sock->sk); - spin_lock_bh(&session->frwd_lock); + mutex_lock(&tcp_sw_conn->sock_lock); tcp_sw_conn->sock = NULL; - spin_unlock_bh(&session->frwd_lock); + mutex_unlock(&tcp_sw_conn->sock_lock); sockfd_put(sock); } @@ -658,7 +664,6 @@ iscsi_sw_tcp_conn_bind(struct iscsi_cls_session *cls_session, struct iscsi_cls_conn *cls_conn, uint64_t transport_eph, int is_leading) { - struct iscsi_session *session = cls_session->dd_data; struct iscsi_conn *conn = cls_conn->dd_data; struct iscsi_tcp_conn *tcp_conn = conn->dd_data; struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data; @@ -678,10 +683,10 @@ iscsi_sw_tcp_conn_bind(struct iscsi_cls_session *cls_session, if (err) goto free_socket; - spin_lock_bh(&session->frwd_lock); + mutex_lock(&tcp_sw_conn->sock_lock); /* bind iSCSI connection and socket */ tcp_sw_conn->sock = sock; - spin_unlock_bh(&session->frwd_lock); + mutex_unlock(&tcp_sw_conn->sock_lock); /* setup Socket parameters */ sk = sock->sk; @@ -717,8 +722,15 @@ static int iscsi_sw_tcp_conn_set_param(struct iscsi_cls_conn *cls_conn, break; case ISCSI_PARAM_DATADGST_EN: iscsi_set_param(cls_conn, param, buf, buflen); + + mutex_lock(&tcp_sw_conn->sock_lock); + if (!tcp_sw_conn->sock) { + mutex_unlock(&tcp_sw_conn->sock_lock); + return -ENOTCONN; + } tcp_sw_conn->sendpage = conn->datadgst_en ? sock_no_sendpage : tcp_sw_conn->sock->ops->sendpage; + mutex_unlock(&tcp_sw_conn->sock_lock); break; case ISCSI_PARAM_MAX_R2T: return iscsi_tcp_set_max_r2t(conn, buf); @@ -733,8 +745,8 @@ static int iscsi_sw_tcp_conn_get_param(struct iscsi_cls_conn *cls_conn, enum iscsi_param param, char *buf) { struct iscsi_conn *conn = cls_conn->dd_data; - struct iscsi_tcp_conn *tcp_conn = conn->dd_data; - struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data; + struct iscsi_sw_tcp_conn *tcp_sw_conn; + struct iscsi_tcp_conn *tcp_conn; struct sockaddr_in6 addr; struct socket *sock; int rc; @@ -744,21 +756,36 @@ static int iscsi_sw_tcp_conn_get_param(struct iscsi_cls_conn *cls_conn, case ISCSI_PARAM_CONN_ADDRESS: case ISCSI_PARAM_LOCAL_PORT: spin_lock_bh(&conn->session->frwd_lock); - if (!tcp_sw_conn || !tcp_sw_conn->sock) { + if (!conn->session->leadconn) { spin_unlock_bh(&conn->session->frwd_lock); return -ENOTCONN; } - sock = tcp_sw_conn->sock; - sock_hold(sock->sk); + /* + * The conn has been setup and bound, so just grab a ref + * incase a destroy runs while we are in the net layer. + */ + iscsi_get_conn(conn->cls_conn); spin_unlock_bh(&conn->session->frwd_lock); + tcp_conn = conn->dd_data; + tcp_sw_conn = tcp_conn->dd_data; + + mutex_lock(&tcp_sw_conn->sock_lock); + sock = tcp_sw_conn->sock; + if (!sock) { + rc = -ENOTCONN; + goto sock_unlock; + } + if (param == ISCSI_PARAM_LOCAL_PORT) rc = kernel_getsockname(sock, (struct sockaddr *)&addr); else rc = kernel_getpeername(sock, (struct sockaddr *)&addr); - sock_put(sock->sk); +sock_unlock: + mutex_unlock(&tcp_sw_conn->sock_lock); + iscsi_put_conn(conn->cls_conn); if (rc < 0) return rc; @@ -796,17 +823,21 @@ static int iscsi_sw_tcp_host_get_param(struct Scsi_Host *shost, } tcp_conn = conn->dd_data; tcp_sw_conn = tcp_conn->dd_data; - sock = tcp_sw_conn->sock; - if (!sock) { - spin_unlock_bh(&session->frwd_lock); - return -ENOTCONN; - } - sock_hold(sock->sk); + /* + * The conn has been setup and bound, so just grab a ref + * incase a destroy runs while we are in the net layer. + */ + iscsi_get_conn(conn->cls_conn); spin_unlock_bh(&session->frwd_lock); - rc = kernel_getsockname(sock, - (struct sockaddr *)&addr); - sock_put(sock->sk); + mutex_lock(&tcp_sw_conn->sock_lock); + sock = tcp_sw_conn->sock; + if (!sock) + rc = -ENOTCONN; + else + rc = kernel_getsockname(sock, (struct sockaddr *)&addr); + mutex_unlock(&tcp_sw_conn->sock_lock); + iscsi_put_conn(conn->cls_conn); if (rc < 0) return rc; diff --git a/drivers/scsi/iscsi_tcp.h b/drivers/scsi/iscsi_tcp.h index 791453195099..1731956326e2 100644 --- a/drivers/scsi/iscsi_tcp.h +++ b/drivers/scsi/iscsi_tcp.h @@ -28,6 +28,8 @@ struct iscsi_sw_tcp_send { struct iscsi_sw_tcp_conn { struct socket *sock; + /* Taken when accessing the sock from the netlink/sysfs interface */ + struct mutex sock_lock; struct iscsi_sw_tcp_send out; /* old values for socket callbacks */ From ed8e6011b9532ccaab6c434dcc08894e6b933706 Mon Sep 17 00:00:00 2001 From: Robert Marko Date: Fri, 19 Aug 2022 00:06:22 +0200 Subject: [PATCH 0244/1477] clk: qcom: apss-ipq6018: mark apcs_alias0_core_clk as critical [ Upstream commit 86e78995c93ee182433f965babfccd48417d4dcf ] While fixing up the driver I noticed that my IPQ8074 board was hanging after CPUFreq switched the frequency during boot, WDT would eventually reset it. So mark apcs_alias0_core_clk as critical since its the clock feeding the CPU cluster and must never be disabled. Fixes: 5e77b4ef1b19 ("clk: qcom: Add ipq6018 apss clock controller") Signed-off-by: Robert Marko Reviewed-by: Dmitry Baryshkov Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220818220628.339366-3-robimarko@gmail.com Signed-off-by: Sasha Levin --- drivers/clk/qcom/apss-ipq6018.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/qcom/apss-ipq6018.c b/drivers/clk/qcom/apss-ipq6018.c index d78ff2f310bf..b5d93657e1ee 100644 --- a/drivers/clk/qcom/apss-ipq6018.c +++ b/drivers/clk/qcom/apss-ipq6018.c @@ -57,7 +57,7 @@ static struct clk_branch apcs_alias0_core_clk = { .parent_hws = (const struct clk_hw *[]){ &apcs_alias0_clk_src.clkr.hw }, .num_parents = 1, - .flags = CLK_SET_RATE_PARENT, + .flags = CLK_SET_RATE_PARENT | CLK_IS_CRITICAL, .ops = &clk_branch2_ops, }, }, From 382a5fc49e6eee5b05b4e91744b7a5fd9f9bcbbb Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Tue, 11 Jan 2022 15:34:11 +0800 Subject: [PATCH 0245/1477] fsi: core: Check error number after calling ida_simple_get [ Upstream commit 35af9fb49bc5c6d61ef70b501c3a56fe161cce3e ] If allocation fails, the ida_simple_get() will return error number. So master->idx could be error number and be used in dev_set_name(). Therefore, it should be better to check it and return error if fails, like the ida_simple_get() in __fsi_get_new_minor(). Fixes: 09aecfab93b8 ("drivers/fsi: Add fsi master definition") Signed-off-by: Jiasheng Jiang Reviewed-by: Eddie James Link: https://lore.kernel.org/r/20220111073411.614138-1-jiasheng@iscas.ac.cn Signed-off-by: Joel Stanley Signed-off-by: Sasha Levin --- drivers/fsi/fsi-core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/fsi/fsi-core.c b/drivers/fsi/fsi-core.c index 59ddc9fd5bca..92e6eebd1851 100644 --- a/drivers/fsi/fsi-core.c +++ b/drivers/fsi/fsi-core.c @@ -1309,6 +1309,9 @@ int fsi_master_register(struct fsi_master *master) mutex_init(&master->scan_lock); master->idx = ida_simple_get(&master_ida, 0, INT_MAX, GFP_KERNEL); + if (master->idx < 0) + return master->idx; + dev_set_name(&master->dev, "fsi%d", master->idx); master->dev.class = &fsi_master_class; From 28868b940b53c63f62c2cf8fbdc1ea539448c105 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 1 Aug 2022 14:42:02 +0300 Subject: [PATCH 0246/1477] mfd: intel_soc_pmic: Fix an error handling path in intel_soc_pmic_i2c_probe() [ Upstream commit 48749cabba109397b4e7dd556e85718ec0ec114d ] The commit in Fixes: has added a pwm_add_table() call in the probe() and a pwm_remove_table() call in the remove(), but forget to update the error handling path of the probe. Add the missing pwm_remove_table() call. Fixes: a3aa9a93df9f ("mfd: intel_soc_pmic_core: ADD PWM lookup table for CRC PMIC based PWM") Signed-off-by: Christophe JAILLET Signed-off-by: Andy Shevchenko Reviewed-by: Hans de Goede Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20220801114211.36267-1-andriy.shevchenko@linux.intel.com Signed-off-by: Sasha Levin --- drivers/mfd/intel_soc_pmic_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mfd/intel_soc_pmic_core.c b/drivers/mfd/intel_soc_pmic_core.c index ddd64f9e3341..926653e1f603 100644 --- a/drivers/mfd/intel_soc_pmic_core.c +++ b/drivers/mfd/intel_soc_pmic_core.c @@ -95,6 +95,7 @@ static int intel_soc_pmic_i2c_probe(struct i2c_client *i2c, return 0; err_del_irq_chip: + pwm_remove_table(crc_pwm_lookup, ARRAY_SIZE(crc_pwm_lookup)); regmap_del_irq_chip(pmic->irq, pmic->irq_chip_data); return ret; } From 08d40518033dc3663b1bd334a7481cd6b116f49b Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 31 Jul 2022 14:06:23 +0200 Subject: [PATCH 0247/1477] mfd: fsl-imx25: Fix an error handling path in mx25_tsadc_setup_irq() [ Upstream commit 3fa9e4cfb55da512ebfd57336fde468830719298 ] If devm_of_platform_populate() fails, some resources need to be released. Introduce a mx25_tsadc_unset_irq() function that undoes mx25_tsadc_setup_irq() and call it both from the new error handling path of the probe and in the remove function. Fixes: a55196eff6d6 ("mfd: fsl-imx25: Use devm_of_platform_populate()") Signed-off-by: Christophe JAILLET Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/d404e04828fc06bcfddf81f9f3e9b4babbe35415.1659269156.git.christophe.jaillet@wanadoo.fr Signed-off-by: Sasha Levin --- drivers/mfd/fsl-imx25-tsadc.c | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/drivers/mfd/fsl-imx25-tsadc.c b/drivers/mfd/fsl-imx25-tsadc.c index a016b39fe9b0..95103b2cc471 100644 --- a/drivers/mfd/fsl-imx25-tsadc.c +++ b/drivers/mfd/fsl-imx25-tsadc.c @@ -84,6 +84,19 @@ static int mx25_tsadc_setup_irq(struct platform_device *pdev, return 0; } +static int mx25_tsadc_unset_irq(struct platform_device *pdev) +{ + struct mx25_tsadc *tsadc = platform_get_drvdata(pdev); + int irq = platform_get_irq(pdev, 0); + + if (irq) { + irq_set_chained_handler_and_data(irq, NULL, NULL); + irq_domain_remove(tsadc->domain); + } + + return 0; +} + static void mx25_tsadc_setup_clk(struct platform_device *pdev, struct mx25_tsadc *tsadc) { @@ -171,18 +184,21 @@ static int mx25_tsadc_probe(struct platform_device *pdev) platform_set_drvdata(pdev, tsadc); - return devm_of_platform_populate(dev); + ret = devm_of_platform_populate(dev); + if (ret) + goto err_irq; + + return 0; + +err_irq: + mx25_tsadc_unset_irq(pdev); + + return ret; } static int mx25_tsadc_remove(struct platform_device *pdev) { - struct mx25_tsadc *tsadc = platform_get_drvdata(pdev); - int irq = platform_get_irq(pdev, 0); - - if (irq) { - irq_set_chained_handler_and_data(irq, NULL, NULL); - irq_domain_remove(tsadc->domain); - } + mx25_tsadc_unset_irq(pdev); return 0; } From f7b4388636227f543a88d7389017adb4f4ef5b65 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 31 Jul 2022 11:55:27 +0200 Subject: [PATCH 0248/1477] mfd: lp8788: Fix an error handling path in lp8788_probe() [ Upstream commit becfdcd75126b20b8ec10066c5e85b34f8994ad5 ] Should an error occurs in mfd_add_devices(), some resources need to be released, as already done in the .remove() function. Add an error handling path and a lp8788_irq_exit() call to undo a previous lp8788_irq_init(). Fixes: eea6b7cc53aa ("mfd: Add lp8788 mfd driver") Signed-off-by: Christophe JAILLET Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/18398722da9df9490722d853e4797350189ae79b.1659261275.git.christophe.jaillet@wanadoo.fr Signed-off-by: Sasha Levin --- drivers/mfd/lp8788.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/mfd/lp8788.c b/drivers/mfd/lp8788.c index 768d556b3fe9..5c3d642c8e3a 100644 --- a/drivers/mfd/lp8788.c +++ b/drivers/mfd/lp8788.c @@ -195,8 +195,16 @@ static int lp8788_probe(struct i2c_client *cl, const struct i2c_device_id *id) if (ret) return ret; - return mfd_add_devices(lp->dev, -1, lp8788_devs, - ARRAY_SIZE(lp8788_devs), NULL, 0, NULL); + ret = mfd_add_devices(lp->dev, -1, lp8788_devs, + ARRAY_SIZE(lp8788_devs), NULL, 0, NULL); + if (ret) + goto err_exit_irq; + + return 0; + +err_exit_irq: + lp8788_irq_exit(lp); + return ret; } static int lp8788_remove(struct i2c_client *cl) From b425e03c9639cdeaccf1165909cb56a1ddbf6e6d Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 31 Jul 2022 11:55:38 +0200 Subject: [PATCH 0249/1477] mfd: lp8788: Fix an error handling path in lp8788_irq_init() and lp8788_irq_init() [ Upstream commit 557244f6284f30613f2d61f14b579303165876c3 ] In lp8788_irq_init(), if an error occurs after a successful irq_domain_add_linear() call, it must be undone by a corresponding irq_domain_remove() call. irq_domain_remove() should also be called in lp8788_irq_exit() for the same reason. Fixes: eea6b7cc53aa ("mfd: Add lp8788 mfd driver") Signed-off-by: Christophe JAILLET Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/bcd5a72c9c1c383dd6324680116426e32737655a.1659261275.git.christophe.jaillet@wanadoo.fr Signed-off-by: Sasha Levin --- drivers/mfd/lp8788-irq.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mfd/lp8788-irq.c b/drivers/mfd/lp8788-irq.c index 348439a3fbbd..39006297f3d2 100644 --- a/drivers/mfd/lp8788-irq.c +++ b/drivers/mfd/lp8788-irq.c @@ -175,6 +175,7 @@ int lp8788_irq_init(struct lp8788 *lp, int irq) IRQF_TRIGGER_FALLING | IRQF_ONESHOT, "lp8788-irq", irqd); if (ret) { + irq_domain_remove(lp->irqdm); dev_err(lp->dev, "failed to create a thread for IRQ_N\n"); return ret; } @@ -188,4 +189,6 @@ void lp8788_irq_exit(struct lp8788 *lp) { if (lp->irq) free_irq(lp->irq, lp->irqdm); + if (lp->irqdm) + irq_domain_remove(lp->irqdm); } From 3469dd8e22ffb0dc446dd0ec36eb5ef51cb5a3d3 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 11 Aug 2022 13:53:05 +0300 Subject: [PATCH 0250/1477] mfd: fsl-imx25: Fix check for platform_get_irq() errors [ Upstream commit 75db7907355ca5e2ff606e9dd3e86b6c3a455fe2 ] The mx25_tsadc_remove() function assumes all non-zero returns are success but the platform_get_irq() function returns negative on error and positive non-zero values on success. It never returns zero, but if it did then treat that as a success. Fixes: 18f773937968 ("mfd: fsl-imx25: Clean up irq settings during removal") Signed-off-by: Dan Carpenter Reviewed-by: Martin Kaiser Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/YvTfkbVQWYKMKS/t@kili Signed-off-by: Sasha Levin --- drivers/mfd/fsl-imx25-tsadc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mfd/fsl-imx25-tsadc.c b/drivers/mfd/fsl-imx25-tsadc.c index 95103b2cc471..5f1f6f3a0696 100644 --- a/drivers/mfd/fsl-imx25-tsadc.c +++ b/drivers/mfd/fsl-imx25-tsadc.c @@ -69,7 +69,7 @@ static int mx25_tsadc_setup_irq(struct platform_device *pdev, int irq; irq = platform_get_irq(pdev, 0); - if (irq <= 0) + if (irq < 0) return irq; tsadc->domain = irq_domain_add_simple(np, 2, 0, &mx25_tsadc_domain_ops, @@ -89,7 +89,7 @@ static int mx25_tsadc_unset_irq(struct platform_device *pdev) struct mx25_tsadc *tsadc = platform_get_drvdata(pdev); int irq = platform_get_irq(pdev, 0); - if (irq) { + if (irq >= 0) { irq_set_chained_handler_and_data(irq, NULL, NULL); irq_domain_remove(tsadc->domain); } From 6e58f2469ec5c3a4325bbeec0b0bd3e37d21e138 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Tue, 13 Sep 2022 17:11:12 +0800 Subject: [PATCH 0251/1477] mfd: sm501: Add check for platform_driver_register() [ Upstream commit 8325a6c24ad78b8c1acc3c42b098ee24105d68e5 ] As platform_driver_register() can return error numbers, it should be better to check platform_driver_register() and deal with the exception. Fixes: b6d6454fdb66 ("[PATCH] mfd: SM501 core driver") Signed-off-by: Jiasheng Jiang Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20220913091112.1739138-1-jiasheng@iscas.ac.cn Signed-off-by: Sasha Levin --- drivers/mfd/sm501.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/mfd/sm501.c b/drivers/mfd/sm501.c index 6d2f4a0a901d..37ad72d8cde2 100644 --- a/drivers/mfd/sm501.c +++ b/drivers/mfd/sm501.c @@ -1720,7 +1720,12 @@ static struct platform_driver sm501_plat_driver = { static int __init sm501_base_init(void) { - platform_driver_register(&sm501_plat_driver); + int ret; + + ret = platform_driver_register(&sm501_plat_driver); + if (ret < 0) + return ret; + return pci_register_driver(&sm501_pci_driver); } From 1dd5148445eb7a32a086e214f6f46ce9aa1c1ca7 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Tue, 27 Sep 2022 12:11:20 +0200 Subject: [PATCH 0252/1477] clk: mediatek: mt8183: mfgcfg: Propagate rate changes to parent [ Upstream commit 9f94f545f258b15bfa6357eb62e1e307b712851e ] The only clock in the MT8183 MFGCFG block feeds the GPU. Propagate its rate change requests to its parent, so that DVFS for the GPU can work properly. Fixes: acddfc2c261b ("clk: mediatek: Add MT8183 clock support") Signed-off-by: Chen-Yu Tsai Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20220927101128.44758-3-angelogioacchino.delregno@collabora.com Signed-off-by: Chen-Yu Tsai Signed-off-by: Sasha Levin --- drivers/clk/mediatek/clk-mt8183-mfgcfg.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/clk/mediatek/clk-mt8183-mfgcfg.c b/drivers/clk/mediatek/clk-mt8183-mfgcfg.c index 37b4162c5882..3a33014eee7f 100644 --- a/drivers/clk/mediatek/clk-mt8183-mfgcfg.c +++ b/drivers/clk/mediatek/clk-mt8183-mfgcfg.c @@ -18,9 +18,9 @@ static const struct mtk_gate_regs mfg_cg_regs = { .sta_ofs = 0x0, }; -#define GATE_MFG(_id, _name, _parent, _shift) \ - GATE_MTK(_id, _name, _parent, &mfg_cg_regs, _shift, \ - &mtk_clk_gate_ops_setclr) +#define GATE_MFG(_id, _name, _parent, _shift) \ + GATE_MTK_FLAGS(_id, _name, _parent, &mfg_cg_regs, _shift, \ + &mtk_clk_gate_ops_setclr, CLK_SET_RATE_PARENT) static const struct mtk_gate mfg_clks[] = { GATE_MFG(CLK_MFG_BG3D, "mfg_bg3d", "mfg_sel", 0) From a01c0c160049637f19390069bdba3e87a7050144 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 19 Sep 2022 09:58:42 -0700 Subject: [PATCH 0253/1477] dmaengine: ioat: stop mod_timer from resurrecting deleted timer in __cleanup() [ Upstream commit 898ec89dbb55b8294695ad71694a0684e62b2a73 ] User reports observing timer event report channel halted but no error observed in CHANERR register. The driver finished self-test and released channel resources. Debug shows that __cleanup() can call mod_timer() after the timer has been deleted and thus resurrect the timer. While harmless, it causes suprious error message to be emitted. Use mod_timer_pending() call to prevent deleted timer from being resurrected. Fixes: 3372de5813e4 ("dmaengine: ioatdma: removal of dma_v3.c and relevant ioat3 references") Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/166360672197.3851724.17040290563764838369.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/ioat/dma.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index 37ff4ec7db76..e2070df6cad2 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -656,7 +656,7 @@ static void __cleanup(struct ioatdma_chan *ioat_chan, dma_addr_t phys_complete) if (active - i == 0) { dev_dbg(to_dev(ioat_chan), "%s: cancel completion timeout\n", __func__); - mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT); + mod_timer_pending(&ioat_chan->timer, jiffies + IDLE_TIMEOUT); } /* microsecond delay by sysfs variable per pending descriptor */ @@ -682,7 +682,7 @@ static void ioat_cleanup(struct ioatdma_chan *ioat_chan) if (chanerr & (IOAT_CHANERR_HANDLE_MASK | IOAT_CHANERR_RECOVER_MASK)) { - mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT); + mod_timer_pending(&ioat_chan->timer, jiffies + IDLE_TIMEOUT); ioat_eh(ioat_chan); } } @@ -879,7 +879,7 @@ static void check_active(struct ioatdma_chan *ioat_chan) } if (test_and_clear_bit(IOAT_CHAN_ACTIVE, &ioat_chan->state)) - mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT); + mod_timer_pending(&ioat_chan->timer, jiffies + IDLE_TIMEOUT); } static void ioat_reboot_chan(struct ioatdma_chan *ioat_chan) From 92f52770a7af6988b3c2bbb9b76a2963e3d950a8 Mon Sep 17 00:00:00 2001 From: David Collins Date: Thu, 29 Sep 2022 17:50:16 -0700 Subject: [PATCH 0254/1477] spmi: pmic-arb: correct duplicate APID to PPID mapping logic [ Upstream commit 1f1693118c2476cb1666ad357edcf3cf48bf9b16 ] Correct the way that duplicate PPID mappings are handled for PMIC arbiter v5. The final APID mapped to a given PPID should be the one which has write owner = APPS EE, if it exists, or if not that, then the first APID mapped to the PPID, if it exists. Fixes: 40f318f0ed67 ("spmi: pmic-arb: add support for HW version 5") Signed-off-by: David Collins Signed-off-by: Fenglin Wu Link: https://lore.kernel.org/r/1655004286-11493-7-git-send-email-quic_fenglinw@quicinc.com Signed-off-by: Stephen Boyd Link: https://lore.kernel.org/r/20220930005019.2663064-8-sboyd@kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/spmi/spmi-pmic-arb.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/spmi/spmi-pmic-arb.c b/drivers/spmi/spmi-pmic-arb.c index bbbd311eda03..e6de2aeece8d 100644 --- a/drivers/spmi/spmi-pmic-arb.c +++ b/drivers/spmi/spmi-pmic-arb.c @@ -887,7 +887,8 @@ static int pmic_arb_read_apid_map_v5(struct spmi_pmic_arb *pmic_arb) * version 5, there is more than one APID mapped to each PPID. * The owner field for each of these mappings specifies the EE which is * allowed to write to the APID. The owner of the last (highest) APID - * for a given PPID will receive interrupts from the PPID. + * which has the IRQ owner bit set for a given PPID will receive + * interrupts from the PPID. */ for (i = 0; ; i++, apidd++) { offset = pmic_arb->ver_ops->apid_map_offset(i); @@ -910,16 +911,16 @@ static int pmic_arb_read_apid_map_v5(struct spmi_pmic_arb *pmic_arb) apid = pmic_arb->ppid_to_apid[ppid] & ~PMIC_ARB_APID_VALID; prev_apidd = &pmic_arb->apid_data[apid]; - if (valid && is_irq_ee && - prev_apidd->write_ee == pmic_arb->ee) { + if (!valid || apidd->write_ee == pmic_arb->ee) { + /* First PPID mapping or one for this EE */ + pmic_arb->ppid_to_apid[ppid] = i | PMIC_ARB_APID_VALID; + } else if (valid && is_irq_ee && + prev_apidd->write_ee == pmic_arb->ee) { /* * Duplicate PPID mapping after the one for this EE; * override the irq owner */ prev_apidd->irq_ee = apidd->irq_ee; - } else if (!valid || is_irq_ee) { - /* First PPID mapping or duplicate for another EE */ - pmic_arb->ppid_to_apid[ppid] = i | PMIC_ARB_APID_VALID; } apidd->ppid = ppid; From 2f19a1050e1b2e63c326550a92f884f79c2f089f Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Fri, 30 Sep 2022 01:53:55 +0300 Subject: [PATCH 0255/1477] clk: vc5: Fix 5P49V6901 outputs disabling when enabling FOD [ Upstream commit c388cc804016cf0f65afdc2362b120aa594ff3e6 ] We have discovered random glitches during the system boot up procedure. The problem investigation led us to the weird outcomes: when none of the Renesas 5P49V6901 ports are explicitly enabled by the kernel driver, the glitches disappeared. It was a mystery since the SoC external clock domains were fed with different 5P49V6901 outputs. The driver code didn't seem like bogus either. We almost despaired to find out a root cause when the solution has been found for a more modern revision of the chip. It turned out the 5P49V6901 clock generator stopped its output for a short period of time during the VC5_OUT_DIV_CONTROL register writing. The same problem was found for the 5P49V6965 revision of the chip and was successfully fixed in commit fc336ae622df ("clk: vc5: fix output disabling when enabling a FOD") by enabling the "bypass_sync" flag hidden inside "Unused Factory Reserved Register". Even though the 5P49V6901 registers description and programming guide doesn't provide any intel regarding that flag, setting it up anyway in the officially unused register completely eliminated the denoted glitches. Thus let's activate the functionality submitted in commit fc336ae622df ("clk: vc5: fix output disabling when enabling a FOD") for the Renesas 5P49V6901 chip too in order to remove the ports implicit inter-dependency. Fixes: dbf6b16f5683 ("clk: vc5: Add support for IDT VersaClock 5P49V6901") Signed-off-by: Serge Semin Reviewed-by: Luca Ceresoli Link: https://lore.kernel.org/r/20220929225402.9696-2-Sergey.Semin@baikalelectronics.ru Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/clk-versaclock5.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/clk-versaclock5.c b/drivers/clk/clk-versaclock5.c index 4e741f94baf0..eb597ea7bb87 100644 --- a/drivers/clk/clk-versaclock5.c +++ b/drivers/clk/clk-versaclock5.c @@ -1116,7 +1116,7 @@ static const struct vc5_chip_info idt_5p49v6901_info = { .model = IDT_VC6_5P49V6901, .clk_fod_cnt = 4, .clk_out_cnt = 5, - .flags = VC5_HAS_PFD_FREQ_DBL, + .flags = VC5_HAS_PFD_FREQ_DBL | VC5_HAS_BYPASS_SYNC_BIT, }; static const struct vc5_chip_info idt_5p49v6965_info = { From 823fd523912ff4fc8edf2a2ac93f57329c90f36e Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Fri, 30 Sep 2022 01:53:56 +0300 Subject: [PATCH 0256/1477] clk: baikal-t1: Fix invalid xGMAC PTP clock divider [ Upstream commit 3c742088686ce922704aec5b11d09bcc5a396589 ] Most likely due to copy-paste mistake the divider has been set to 10 while according to the SoC reference manual it's supposed to be 8 thus having PTP clock frequency of 156.25 MHz. Fixes: 353afa3a8d2e ("clk: Add Baikal-T1 CCU Dividers driver") Signed-off-by: Serge Semin Link: https://lore.kernel.org/r/20220929225402.9696-3-Sergey.Semin@baikalelectronics.ru Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/baikal-t1/clk-ccu-div.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/baikal-t1/clk-ccu-div.c b/drivers/clk/baikal-t1/clk-ccu-div.c index f141fda12b09..ea77eec40ddd 100644 --- a/drivers/clk/baikal-t1/clk-ccu-div.c +++ b/drivers/clk/baikal-t1/clk-ccu-div.c @@ -207,7 +207,7 @@ static const struct ccu_div_info sys_info[] = { CCU_DIV_GATE_INFO(CCU_SYS_XGMAC_REF_CLK, "sys_xgmac_ref_clk", "eth_clk", CCU_SYS_XGMAC_BASE, 8), CCU_DIV_FIXED_INFO(CCU_SYS_XGMAC_PTP_CLK, "sys_xgmac_ptp_clk", - "eth_clk", 10), + "eth_clk", 8), CCU_DIV_GATE_INFO(CCU_SYS_USB_CLK, "sys_usb_clk", "eth_clk", CCU_SYS_USB_BASE, 10), CCU_DIV_VAR_INFO(CCU_SYS_PVT_CLK, "sys_pvt_clk", From 5f143f3bc2e0a501e945e4bcda882f2eff90eaac Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Fri, 30 Sep 2022 01:53:57 +0300 Subject: [PATCH 0257/1477] clk: baikal-t1: Add shared xGMAC ref/ptp clocks internal parent [ Upstream commit e2eef312762e0b5a5a70d29fe59a245c0a3cffa0 ] Baikal-T1 CCU reference manual says that both xGMAC reference and xGMAC PTP clocks are generated by two different wrappers with the same constant divider thus each producing a 156.25 MHz signal. But for some reason both of these clock sources are gated by a single switch-flag in the CCU registers space - CCU_SYS_XGMAC_BASE.BIT(0). In order to make the clocks handled independently we need to define a shared parental gate so the base clock signal would be switched off only if both of the child-clocks are disabled. Note the ID is intentionally set to -2 since we are going to add a one more internal clock identifier in the next commit. Fixes: 353afa3a8d2e ("clk: Add Baikal-T1 CCU Dividers driver") Signed-off-by: Serge Semin Link: https://lore.kernel.org/r/20220929225402.9696-4-Sergey.Semin@baikalelectronics.ru Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/baikal-t1/ccu-div.c | 1 + drivers/clk/baikal-t1/ccu-div.h | 6 ++++++ drivers/clk/baikal-t1/clk-ccu-div.c | 8 +++++--- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/clk/baikal-t1/ccu-div.c b/drivers/clk/baikal-t1/ccu-div.c index 4062092d67f9..bbfa3526ee10 100644 --- a/drivers/clk/baikal-t1/ccu-div.c +++ b/drivers/clk/baikal-t1/ccu-div.c @@ -579,6 +579,7 @@ struct ccu_div *ccu_div_hw_register(const struct ccu_div_init_data *div_init) goto err_free_div; } parent_data.fw_name = div_init->parent_name; + parent_data.name = div_init->parent_name; hw_init.parent_data = &parent_data; hw_init.num_parents = 1; diff --git a/drivers/clk/baikal-t1/ccu-div.h b/drivers/clk/baikal-t1/ccu-div.h index 795665caefbd..b6a9c8e45318 100644 --- a/drivers/clk/baikal-t1/ccu-div.h +++ b/drivers/clk/baikal-t1/ccu-div.h @@ -13,6 +13,12 @@ #include #include +/* + * CCU Divider private clock IDs + * @CCU_SYS_XGMAC_CLK: CCU XGMAC internal clock + */ +#define CCU_SYS_XGMAC_CLK -2 + /* * CCU Divider private flags * @CCU_DIV_SKIP_ONE: Due to some reason divider can't be set to 1. diff --git a/drivers/clk/baikal-t1/clk-ccu-div.c b/drivers/clk/baikal-t1/clk-ccu-div.c index ea77eec40ddd..3953ae5664be 100644 --- a/drivers/clk/baikal-t1/clk-ccu-div.c +++ b/drivers/clk/baikal-t1/clk-ccu-div.c @@ -204,10 +204,12 @@ static const struct ccu_div_info sys_info[] = { "eth_clk", CCU_SYS_GMAC1_BASE, 5), CCU_DIV_FIXED_INFO(CCU_SYS_GMAC1_PTP_CLK, "sys_gmac1_ptp_clk", "eth_clk", 10), - CCU_DIV_GATE_INFO(CCU_SYS_XGMAC_REF_CLK, "sys_xgmac_ref_clk", - "eth_clk", CCU_SYS_XGMAC_BASE, 8), + CCU_DIV_GATE_INFO(CCU_SYS_XGMAC_CLK, "sys_xgmac_clk", + "eth_clk", CCU_SYS_XGMAC_BASE, 1), + CCU_DIV_FIXED_INFO(CCU_SYS_XGMAC_REF_CLK, "sys_xgmac_ref_clk", + "sys_xgmac_clk", 8), CCU_DIV_FIXED_INFO(CCU_SYS_XGMAC_PTP_CLK, "sys_xgmac_ptp_clk", - "eth_clk", 8), + "sys_xgmac_clk", 8), CCU_DIV_GATE_INFO(CCU_SYS_USB_CLK, "sys_usb_clk", "eth_clk", CCU_SYS_USB_BASE, 10), CCU_DIV_VAR_INFO(CCU_SYS_PVT_CLK, "sys_pvt_clk", From 9a6087a438ef56e4c6fd904664bd1fe88cf5e0d6 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Fri, 30 Sep 2022 01:53:58 +0300 Subject: [PATCH 0258/1477] clk: baikal-t1: Add SATA internal ref clock buffer [ Upstream commit 081a9b7c74eae4e12b2cb1b86720f836a8f29247 ] It turns out the internal SATA reference clock signal will stay unavailable for the SATA interface consumer until the buffer on it's way is ungated. So aside with having the actual clock divider enabled we need to ungate a buffer placed on the signal way to the SATA controller (most likely some rudiment from the initial SoC release). Seeing the switch flag is placed in the same register as the SATA-ref clock divider at a non-standard ffset, let's implement it as a separate clock controller with the set-rate propagation to the parental clock divider wrapper. As such we'll be able to disable/enable and still change the original clock source rate. Fixes: 353afa3a8d2e ("clk: Add Baikal-T1 CCU Dividers driver") Signed-off-by: Serge Semin Link: https://lore.kernel.org/r/20220929225402.9696-5-Sergey.Semin@baikalelectronics.ru Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/baikal-t1/ccu-div.c | 64 +++++++++++++++++++++++++++++ drivers/clk/baikal-t1/ccu-div.h | 4 ++ drivers/clk/baikal-t1/clk-ccu-div.c | 18 +++++++- 3 files changed, 85 insertions(+), 1 deletion(-) diff --git a/drivers/clk/baikal-t1/ccu-div.c b/drivers/clk/baikal-t1/ccu-div.c index bbfa3526ee10..a6642f3d33d4 100644 --- a/drivers/clk/baikal-t1/ccu-div.c +++ b/drivers/clk/baikal-t1/ccu-div.c @@ -34,6 +34,7 @@ #define CCU_DIV_CTL_CLKDIV_MASK(_width) \ GENMASK((_width) + CCU_DIV_CTL_CLKDIV_FLD - 1, CCU_DIV_CTL_CLKDIV_FLD) #define CCU_DIV_CTL_LOCK_SHIFTED BIT(27) +#define CCU_DIV_CTL_GATE_REF_BUF BIT(28) #define CCU_DIV_CTL_LOCK_NORMAL BIT(31) #define CCU_DIV_RST_DELAY_US 1 @@ -170,6 +171,40 @@ static int ccu_div_gate_is_enabled(struct clk_hw *hw) return !!(val & CCU_DIV_CTL_EN); } +static int ccu_div_buf_enable(struct clk_hw *hw) +{ + struct ccu_div *div = to_ccu_div(hw); + unsigned long flags; + + spin_lock_irqsave(&div->lock, flags); + regmap_update_bits(div->sys_regs, div->reg_ctl, + CCU_DIV_CTL_GATE_REF_BUF, 0); + spin_unlock_irqrestore(&div->lock, flags); + + return 0; +} + +static void ccu_div_buf_disable(struct clk_hw *hw) +{ + struct ccu_div *div = to_ccu_div(hw); + unsigned long flags; + + spin_lock_irqsave(&div->lock, flags); + regmap_update_bits(div->sys_regs, div->reg_ctl, + CCU_DIV_CTL_GATE_REF_BUF, CCU_DIV_CTL_GATE_REF_BUF); + spin_unlock_irqrestore(&div->lock, flags); +} + +static int ccu_div_buf_is_enabled(struct clk_hw *hw) +{ + struct ccu_div *div = to_ccu_div(hw); + u32 val = 0; + + regmap_read(div->sys_regs, div->reg_ctl, &val); + + return !(val & CCU_DIV_CTL_GATE_REF_BUF); +} + static unsigned long ccu_div_var_recalc_rate(struct clk_hw *hw, unsigned long parent_rate) { @@ -323,6 +358,7 @@ static const struct ccu_div_dbgfs_bit ccu_div_bits[] = { CCU_DIV_DBGFS_BIT_ATTR("div_en", CCU_DIV_CTL_EN), CCU_DIV_DBGFS_BIT_ATTR("div_rst", CCU_DIV_CTL_RST), CCU_DIV_DBGFS_BIT_ATTR("div_bypass", CCU_DIV_CTL_SET_CLKDIV), + CCU_DIV_DBGFS_BIT_ATTR("div_buf", CCU_DIV_CTL_GATE_REF_BUF), CCU_DIV_DBGFS_BIT_ATTR("div_lock", CCU_DIV_CTL_LOCK_NORMAL) }; @@ -441,6 +477,9 @@ static void ccu_div_var_debug_init(struct clk_hw *hw, struct dentry *dentry) continue; } + if (!strcmp("div_buf", name)) + continue; + bits[didx] = ccu_div_bits[bidx]; bits[didx].div = div; @@ -477,6 +516,21 @@ static void ccu_div_gate_debug_init(struct clk_hw *hw, struct dentry *dentry) &ccu_div_dbgfs_fixed_clkdiv_fops); } +static void ccu_div_buf_debug_init(struct clk_hw *hw, struct dentry *dentry) +{ + struct ccu_div *div = to_ccu_div(hw); + struct ccu_div_dbgfs_bit *bit; + + bit = kmalloc(sizeof(*bit), GFP_KERNEL); + if (!bit) + return; + + *bit = ccu_div_bits[3]; + bit->div = div; + debugfs_create_file_unsafe(bit->name, ccu_div_dbgfs_mode, dentry, bit, + &ccu_div_dbgfs_bit_fops); +} + static void ccu_div_fixed_debug_init(struct clk_hw *hw, struct dentry *dentry) { struct ccu_div *div = to_ccu_div(hw); @@ -489,6 +543,7 @@ static void ccu_div_fixed_debug_init(struct clk_hw *hw, struct dentry *dentry) #define ccu_div_var_debug_init NULL #define ccu_div_gate_debug_init NULL +#define ccu_div_buf_debug_init NULL #define ccu_div_fixed_debug_init NULL #endif /* !CONFIG_DEBUG_FS */ @@ -520,6 +575,13 @@ static const struct clk_ops ccu_div_gate_ops = { .debug_init = ccu_div_gate_debug_init }; +static const struct clk_ops ccu_div_buf_ops = { + .enable = ccu_div_buf_enable, + .disable = ccu_div_buf_disable, + .is_enabled = ccu_div_buf_is_enabled, + .debug_init = ccu_div_buf_debug_init +}; + static const struct clk_ops ccu_div_fixed_ops = { .recalc_rate = ccu_div_fixed_recalc_rate, .round_rate = ccu_div_fixed_round_rate, @@ -566,6 +628,8 @@ struct ccu_div *ccu_div_hw_register(const struct ccu_div_init_data *div_init) } else if (div_init->type == CCU_DIV_GATE) { hw_init.ops = &ccu_div_gate_ops; div->divider = div_init->divider; + } else if (div_init->type == CCU_DIV_BUF) { + hw_init.ops = &ccu_div_buf_ops; } else if (div_init->type == CCU_DIV_FIXED) { hw_init.ops = &ccu_div_fixed_ops; div->divider = div_init->divider; diff --git a/drivers/clk/baikal-t1/ccu-div.h b/drivers/clk/baikal-t1/ccu-div.h index b6a9c8e45318..4eb49ff4803c 100644 --- a/drivers/clk/baikal-t1/ccu-div.h +++ b/drivers/clk/baikal-t1/ccu-div.h @@ -15,8 +15,10 @@ /* * CCU Divider private clock IDs + * @CCU_SYS_SATA_CLK: CCU SATA internal clock * @CCU_SYS_XGMAC_CLK: CCU XGMAC internal clock */ +#define CCU_SYS_SATA_CLK -1 #define CCU_SYS_XGMAC_CLK -2 /* @@ -37,11 +39,13 @@ * enum ccu_div_type - CCU Divider types * @CCU_DIV_VAR: Clocks gate with variable divider. * @CCU_DIV_GATE: Clocks gate with fixed divider. + * @CCU_DIV_BUF: Clock gate with no divider. * @CCU_DIV_FIXED: Ungateable clock with fixed divider. */ enum ccu_div_type { CCU_DIV_VAR, CCU_DIV_GATE, + CCU_DIV_BUF, CCU_DIV_FIXED }; diff --git a/drivers/clk/baikal-t1/clk-ccu-div.c b/drivers/clk/baikal-t1/clk-ccu-div.c index 3953ae5664be..90f4fda406ee 100644 --- a/drivers/clk/baikal-t1/clk-ccu-div.c +++ b/drivers/clk/baikal-t1/clk-ccu-div.c @@ -76,6 +76,16 @@ .divider = _divider \ } +#define CCU_DIV_BUF_INFO(_id, _name, _pname, _base, _flags) \ + { \ + .id = _id, \ + .name = _name, \ + .parent_name = _pname, \ + .base = _base, \ + .type = CCU_DIV_BUF, \ + .flags = _flags \ + } + #define CCU_DIV_FIXED_INFO(_id, _name, _pname, _divider) \ { \ .id = _id, \ @@ -188,11 +198,14 @@ static const struct ccu_div_rst_map axi_rst_map[] = { * for the SoC devices registers IO-operations. */ static const struct ccu_div_info sys_info[] = { - CCU_DIV_VAR_INFO(CCU_SYS_SATA_REF_CLK, "sys_sata_ref_clk", + CCU_DIV_VAR_INFO(CCU_SYS_SATA_CLK, "sys_sata_clk", "sata_clk", CCU_SYS_SATA_REF_BASE, 4, CLK_SET_RATE_GATE, CCU_DIV_SKIP_ONE | CCU_DIV_LOCK_SHIFTED | CCU_DIV_RESET_DOMAIN), + CCU_DIV_BUF_INFO(CCU_SYS_SATA_REF_CLK, "sys_sata_ref_clk", + "sys_sata_clk", CCU_SYS_SATA_REF_BASE, + CLK_SET_RATE_PARENT), CCU_DIV_VAR_INFO(CCU_SYS_APB_CLK, "sys_apb_clk", "pcie_clk", CCU_SYS_APB_BASE, 5, CLK_IS_CRITICAL, CCU_DIV_RESET_DOMAIN), @@ -398,6 +411,9 @@ static int ccu_div_clk_register(struct ccu_div_data *data) init.base = info->base; init.sys_regs = data->sys_regs; init.divider = info->divider; + } else if (init.type == CCU_DIV_BUF) { + init.base = info->base; + init.sys_regs = data->sys_regs; } else { init.divider = info->divider; } From 9b65fd651334675d17bf681c51602b3739437315 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sun, 4 Sep 2022 16:10:37 +0200 Subject: [PATCH 0259/1477] clk: bcm2835: fix bcm2835_clock_rate_from_divisor declaration [ Upstream commit 0b919a3728691c172312dee99ba654055ccd8c84 ] The return value of bcm2835_clock_rate_from_divisor is always unsigned and also all caller expect this. So fix the declaration accordingly. Fixes: 41691b8862e2 ("clk: bcm2835: Add support for programming the audio domain clocks") Signed-off-by: Stefan Wahren Link: https://lore.kernel.org/r/20220904141037.38816-1-stefan.wahren@i2se.com Reviewed-by: Ivan T. Ivanov Reviewed-by: Florian Fainelli Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/bcm/clk-bcm2835.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/clk/bcm/clk-bcm2835.c b/drivers/clk/bcm/clk-bcm2835.c index 178886823b90..f306b959297d 100644 --- a/drivers/clk/bcm/clk-bcm2835.c +++ b/drivers/clk/bcm/clk-bcm2835.c @@ -968,9 +968,9 @@ static u32 bcm2835_clock_choose_div(struct clk_hw *hw, return div; } -static long bcm2835_clock_rate_from_divisor(struct bcm2835_clock *clock, - unsigned long parent_rate, - u32 div) +static unsigned long bcm2835_clock_rate_from_divisor(struct bcm2835_clock *clock, + unsigned long parent_rate, + u32 div) { const struct bcm2835_clock_data *data = clock->data; u64 temp; From 6d01017247eee3fba399f601b0bcb38e4fb88a72 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Thu, 2 Jun 2022 07:08:36 +0400 Subject: [PATCH 0260/1477] clk: ti: dra7-atl: Fix reference leak in of_dra7_atl_clk_probe [ Upstream commit 9c59a01caba26ec06fefd6ca1f22d5fd1de57d63 ] pm_runtime_get_sync() will increment pm usage counter. Forgetting to putting operation will result in reference leak. Add missing pm_runtime_put_sync in some error paths. Fixes: 9ac33b0ce81f ("CLK: TI: Driver for DRA7 ATL (Audio Tracking Logic)") Signed-off-by: Miaoqian Lin Link: https://lore.kernel.org/r/20220602030838.52057-1-linmq006@gmail.com Reviewed-by: Tony Lindgren Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/ti/clk-dra7-atl.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/clk/ti/clk-dra7-atl.c b/drivers/clk/ti/clk-dra7-atl.c index 8d4c08b034bd..e2e59d78c173 100644 --- a/drivers/clk/ti/clk-dra7-atl.c +++ b/drivers/clk/ti/clk-dra7-atl.c @@ -251,14 +251,16 @@ static int of_dra7_atl_clk_probe(struct platform_device *pdev) if (rc) { pr_err("%s: failed to lookup atl clock %d\n", __func__, i); - return -EINVAL; + ret = -EINVAL; + goto pm_put; } clk = of_clk_get_from_provider(&clkspec); if (IS_ERR(clk)) { pr_err("%s: failed to get atl clock %d from provider\n", __func__, i); - return PTR_ERR(clk); + ret = PTR_ERR(clk); + goto pm_put; } cdesc = to_atl_desc(__clk_get_hw(clk)); @@ -291,8 +293,9 @@ static int of_dra7_atl_clk_probe(struct platform_device *pdev) if (cdesc->enabled) atl_clk_enable(__clk_get_hw(clk)); } - pm_runtime_put_sync(cinfo->dev); +pm_put: + pm_runtime_put_sync(cinfo->dev); return ret; } From b1616599c99a090b50ef87b1465145343c3bfff3 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Thu, 21 Apr 2022 13:34:26 +0930 Subject: [PATCH 0261/1477] clk: ast2600: BCLK comes from EPLL [ Upstream commit b8c1dc9c00b252b3be853720a71b05ed451ddd9f ] This correction was made in the u-boot SDK recently. There are no in-tree users of this clock so the impact is minimal. Fixes: d3d04f6c330a ("clk: Add support for AST2600 SoC") Link: https://github.com/AspeedTech-BMC/u-boot/commit/8ad54a5ae15f27fea5e894cc2539a20d90019717 Signed-off-by: Joel Stanley Link: https://lore.kernel.org/r/20220421040426.171256-1-joel@jms.id.au Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/clk-ast2600.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/clk-ast2600.c b/drivers/clk/clk-ast2600.c index 24dab2312bc6..9c3305bcb27a 100644 --- a/drivers/clk/clk-ast2600.c +++ b/drivers/clk/clk-ast2600.c @@ -622,7 +622,7 @@ static int aspeed_g6_clk_probe(struct platform_device *pdev) regmap_write(map, 0x308, 0x12000); /* 3x3 = 9 */ /* P-Bus (BCLK) clock divider */ - hw = clk_hw_register_divider_table(dev, "bclk", "hpll", 0, + hw = clk_hw_register_divider_table(dev, "bclk", "epll", 0, scu_g6_base + ASPEED_G6_CLK_SELECTION1, 20, 3, 0, ast2600_div_table, &aspeed_g6_clk_lock); From 44c26ceffaa39029336f83010978f380767b2620 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Fri, 26 Aug 2022 12:13:35 +0200 Subject: [PATCH 0262/1477] mailbox: bcm-ferxrm-mailbox: Fix error check for dma_map_sg [ Upstream commit 6b207ce8a96a71e966831e3a13c38143ba9a73c1 ] dma_map_sg return 0 on error, fix the error check, and return -EIO to caller. Fixes: dbc049eee730 ("mailbox: Add driver for Broadcom FlexRM ring manager") Signed-off-by: Jack Wang Signed-off-by: Jassi Brar Signed-off-by: Sasha Levin --- drivers/mailbox/bcm-flexrm-mailbox.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/mailbox/bcm-flexrm-mailbox.c b/drivers/mailbox/bcm-flexrm-mailbox.c index bee33abb5308..e913ed1e34c6 100644 --- a/drivers/mailbox/bcm-flexrm-mailbox.c +++ b/drivers/mailbox/bcm-flexrm-mailbox.c @@ -632,15 +632,15 @@ static int flexrm_spu_dma_map(struct device *dev, struct brcm_message *msg) rc = dma_map_sg(dev, msg->spu.src, sg_nents(msg->spu.src), DMA_TO_DEVICE); - if (rc < 0) - return rc; + if (!rc) + return -EIO; rc = dma_map_sg(dev, msg->spu.dst, sg_nents(msg->spu.dst), DMA_FROM_DEVICE); - if (rc < 0) { + if (!rc) { dma_unmap_sg(dev, msg->spu.src, sg_nents(msg->spu.src), DMA_TO_DEVICE); - return rc; + return -EIO; } return 0; From 592d283a656ddede253e360a5c2e8b4c3c96c4fe Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 2 Sep 2022 18:00:08 +0200 Subject: [PATCH 0263/1477] powerpc/math_emu/efp: Include module.h [ Upstream commit cfe0d370e0788625ce0df3239aad07a2506c1796 ] When building with a recent version of clang, there are a couple of errors around the call to module_init(): arch/powerpc/math-emu/math_efp.c:927:1: error: type specifier missing, defaults to 'int'; ISO C99 and later do not support implicit int [-Wimplicit-int] module_init(spe_mathemu_init); ^ int arch/powerpc/math-emu/math_efp.c:927:13: error: a parameter list without types is only allowed in a function definition module_init(spe_mathemu_init); ^ 2 errors generated. module_init() is a macro, which is not getting expanded because module.h is not included in this file. Add the include so that the macro can expand properly, clearing up the build failure. Fixes: ac6f120369ff ("powerpc/85xx: Workaroudn e500 CPU erratum A005") [chleroy: added fixes tag] Reported-by: kernel test robot Signed-off-by: Nathan Chancellor Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Reviewed-by: Christophe Leroy Link: https://lore.kernel.org/r/8403854a4c187459b2f4da3537f51227b70b9223.1662134272.git.christophe.leroy@csgroup.eu Signed-off-by: Sasha Levin --- arch/powerpc/math-emu/math_efp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/math-emu/math_efp.c b/arch/powerpc/math-emu/math_efp.c index 0a05e51964c1..90111c9e7521 100644 --- a/arch/powerpc/math-emu/math_efp.c +++ b/arch/powerpc/math-emu/math_efp.c @@ -17,6 +17,7 @@ #include #include +#include #include #include From 718e2d802388749e35d6213651f19db1818dbf97 Mon Sep 17 00:00:00 2001 From: Liang He Date: Mon, 4 Jul 2022 22:52:33 +0800 Subject: [PATCH 0264/1477] powerpc/sysdev/fsl_msi: Add missing of_node_put() [ Upstream commit def435c04ee984a5f9ed2711b2bfe946936c6a21 ] In fsl_setup_msi_irqs(), use of_node_put() to drop the reference returned by of_parse_phandle(). Fixes: 895d603f945ba ("powerpc/fsl_msi: add support for the fsl, msi property in PCI nodes") Co-authored-by: Miaoqian Lin Signed-off-by: Liang He Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220704145233.278539-1-windhl@126.com Signed-off-by: Sasha Levin --- arch/powerpc/sysdev/fsl_msi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index 808e7118abfc..d276c5e96445 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -211,8 +211,10 @@ static int fsl_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) dev_err(&pdev->dev, "node %pOF has an invalid fsl,msi phandle %u\n", hose->dn, np->phandle); + of_node_put(np); return -EINVAL; } + of_node_put(np); } for_each_pci_msi_entry(entry, pdev) { From 434db6d17b6bb7638b90387b5606a4c67be12774 Mon Sep 17 00:00:00 2001 From: Liang He Date: Fri, 1 Jul 2022 21:17:50 +0800 Subject: [PATCH 0265/1477] powerpc/pci_dn: Add missing of_node_put() [ Upstream commit 110a1fcb6c4d55144d8179983a475f17a1d6f832 ] In pci_add_device_node_info(), use of_node_put() to drop the reference to 'parent' returned by of_get_parent() to keep refcount balance. Fixes: cca87d303c85 ("powerpc/pci: Refactor pci_dn") Co-authored-by: Miaoqian Lin Signed-off-by: Liang He Signed-off-by: Michael Ellerman Reviewed-by: Tyrel Datwyler Link: https://lore.kernel.org/r/20220701131750.240170-1-windhl@126.com Signed-off-by: Sasha Levin --- arch/powerpc/kernel/pci_dn.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index e99b7c547d7e..b173ba342645 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -330,6 +330,7 @@ struct pci_dn *pci_add_device_node_info(struct pci_controller *hose, INIT_LIST_HEAD(&pdn->list); parent = of_get_parent(dn); pdn->parent = parent ? PCI_DN(parent) : NULL; + of_node_put(parent); if (pdn->parent) list_add_tail(&pdn->list, &pdn->parent->child_list); From 6315998170b481ac263075d2bd4fb4aafc133914 Mon Sep 17 00:00:00 2001 From: Zheng Yongjun Date: Tue, 6 Sep 2022 14:17:03 +0000 Subject: [PATCH 0266/1477] powerpc/powernv: add missing of_node_put() in opal_export_attrs() [ Upstream commit 71a92e99c47900cc164620948b3863382cec4f1a ] After using 'np' returned by of_find_node_by_path(), of_node_put() need be called to decrease the refcount. Fixes: 11fe909d2362 ("powerpc/powernv: Add OPAL exports attributes to sysfs") Signed-off-by: Zheng Yongjun Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220906141703.118192-1-zhengyongjun3@huawei.com Signed-off-by: Sasha Levin --- arch/powerpc/platforms/powernv/opal.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index c61c3b62c8c6..1d05c168c8fb 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -892,6 +892,7 @@ static void opal_export_attrs(void) kobj = kobject_create_and_add("exports", opal_kobj); if (!kobj) { pr_warn("kobject_create_and_add() of exports failed\n"); + of_node_put(np); return; } From 7ae8bed9087a904201ac39b159ef4b1947049465 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 30 Aug 2022 15:37:05 +0200 Subject: [PATCH 0267/1477] x86/hyperv: Fix 'struct hv_enlightened_vmcs' definition [ Upstream commit ea9da788a61e47e7ab9cbad397453e51cd82ac0d ] Section 1.9 of TLFS v6.0b says: "All structures are padded in such a way that fields are aligned naturally (that is, an 8-byte field is aligned to an offset of 8 bytes and so on)". 'struct enlightened_vmcs' has a glitch: ... struct { u32 nested_flush_hypercall:1; /* 836: 0 4 */ u32 msr_bitmap:1; /* 836: 1 4 */ u32 reserved:30; /* 836: 2 4 */ } hv_enlightenments_control; /* 836 4 */ u32 hv_vp_id; /* 840 4 */ u64 hv_vm_id; /* 844 8 */ u64 partition_assist_page; /* 852 8 */ ... And the observed values in 'partition_assist_page' make no sense at all. Fix the layout by padding the structure properly. Fixes: 68d1eb72ee99 ("x86/hyper-v: define struct hv_enlightened_vmcs and clean field bits") Reviewed-by: Maxim Levitsky Reviewed-by: Michael Kelley Signed-off-by: Vitaly Kuznetsov Signed-off-by: Sean Christopherson Link: https://lore.kernel.org/r/20220830133737.1539624-2-vkuznets@redhat.com Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- arch/x86/include/asm/hyperv-tlfs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 0ed20e8bba9e..ae7192b75136 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -474,7 +474,7 @@ struct hv_enlightened_vmcs { u64 guest_rip; u32 hv_clean_fields; - u32 hv_padding_32; + u32 padding32_1; u32 hv_synthetic_controls; struct { u32 nested_flush_hypercall:1; @@ -482,7 +482,7 @@ struct hv_enlightened_vmcs { u32 reserved:30; } __packed hv_enlightenments_control; u32 hv_vp_id; - + u32 padding32_2; u64 hv_vm_id; u64 partition_assist_page; u64 padding64_4[4]; From 2bde4e1e4f017ac6acebab28e6440af9a8c9cc77 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 21 Sep 2022 11:41:02 +1000 Subject: [PATCH 0268/1477] powerpc/64s: Fix GENERIC_CPU build flags for PPC970 / G5 [ Upstream commit 58ec7f06b74e0d6e76c4110afce367c8b5f0837d ] Big-endian GENERIC_CPU supports 970, but builds with -mcpu=power5. POWER5 is ISA v2.02 whereas 970 is v2.01 plus Altivec. 2.02 added the popcntb instruction which a compiler might use. Use -mcpu=power4. Fixes: 471d7ff8b51b ("powerpc/64s: Remove POWER4 support") Signed-off-by: Nicholas Piggin Reviewed-by: Segher Boessenkool Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220921014103.587954-1-npiggin@gmail.com Signed-off-by: Sasha Levin --- arch/powerpc/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 59175651f0b9..612254141296 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -153,7 +153,7 @@ CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=power8 CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=power9,-mtune=power8) else CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=power7,$(call cc-option,-mtune=power5)) -CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mcpu=power5,-mcpu=power4) +CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=power4 endif else ifdef CONFIG_PPC_BOOK3E_64 CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=powerpc64 From 2d285164fbe41a5f8bb1d46a18685b5c4c870870 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Fri, 2 Sep 2022 23:21:02 +0200 Subject: [PATCH 0269/1477] powerpc: Fix SPE Power ISA properties for e500v1 platforms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 37b9345ce7f4ab17538ea62def6f6d430f091355 ] Commit 2eb28006431c ("powerpc/e500v2: Add Power ISA properties to comply with ePAPR 1.1") introduced new include file e500v2_power_isa.dtsi and should have used it for all e500v2 platforms. But apparently it was used also for e500v1 platforms mpc8540, mpc8541, mpc8555 and mpc8560. e500v1 cores compared to e500v2 do not support double precision floating point SPE instructions. Hence power-isa-sp.fd should not be set on e500v1 platforms, which is in e500v2_power_isa.dtsi include file. Fix this issue by introducing a new e500v1_power_isa.dtsi include file and use it in all e500v1 device tree files. Fixes: 2eb28006431c ("powerpc/e500v2: Add Power ISA properties to comply with ePAPR 1.1") Signed-off-by: Pali Rohár Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220902212103.22534-1-pali@kernel.org Signed-off-by: Sasha Levin --- .../boot/dts/fsl/e500v1_power_isa.dtsi | 51 +++++++++++++++++++ arch/powerpc/boot/dts/fsl/mpc8540ads.dts | 2 +- arch/powerpc/boot/dts/fsl/mpc8541cds.dts | 2 +- arch/powerpc/boot/dts/fsl/mpc8555cds.dts | 2 +- arch/powerpc/boot/dts/fsl/mpc8560ads.dts | 2 +- 5 files changed, 55 insertions(+), 4 deletions(-) create mode 100644 arch/powerpc/boot/dts/fsl/e500v1_power_isa.dtsi diff --git a/arch/powerpc/boot/dts/fsl/e500v1_power_isa.dtsi b/arch/powerpc/boot/dts/fsl/e500v1_power_isa.dtsi new file mode 100644 index 000000000000..7e2a90cde72e --- /dev/null +++ b/arch/powerpc/boot/dts/fsl/e500v1_power_isa.dtsi @@ -0,0 +1,51 @@ +/* + * e500v1 Power ISA Device Tree Source (include) + * + * Copyright 2012 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/ { + cpus { + power-isa-version = "2.03"; + power-isa-b; // Base + power-isa-e; // Embedded + power-isa-atb; // Alternate Time Base + power-isa-cs; // Cache Specification + power-isa-e.le; // Embedded.Little-Endian + power-isa-e.pm; // Embedded.Performance Monitor + power-isa-ecl; // Embedded Cache Locking + power-isa-mmc; // Memory Coherence + power-isa-sp; // Signal Processing Engine + power-isa-sp.fs; // SPE.Embedded Float Scalar Single + power-isa-sp.fv; // SPE.Embedded Float Vector + mmu-type = "power-embedded"; + }; +}; diff --git a/arch/powerpc/boot/dts/fsl/mpc8540ads.dts b/arch/powerpc/boot/dts/fsl/mpc8540ads.dts index 18a885130538..e03ae130162b 100644 --- a/arch/powerpc/boot/dts/fsl/mpc8540ads.dts +++ b/arch/powerpc/boot/dts/fsl/mpc8540ads.dts @@ -7,7 +7,7 @@ /dts-v1/; -/include/ "e500v2_power_isa.dtsi" +/include/ "e500v1_power_isa.dtsi" / { model = "MPC8540ADS"; diff --git a/arch/powerpc/boot/dts/fsl/mpc8541cds.dts b/arch/powerpc/boot/dts/fsl/mpc8541cds.dts index ac381e7b1c60..a2a6c5cf852e 100644 --- a/arch/powerpc/boot/dts/fsl/mpc8541cds.dts +++ b/arch/powerpc/boot/dts/fsl/mpc8541cds.dts @@ -7,7 +7,7 @@ /dts-v1/; -/include/ "e500v2_power_isa.dtsi" +/include/ "e500v1_power_isa.dtsi" / { model = "MPC8541CDS"; diff --git a/arch/powerpc/boot/dts/fsl/mpc8555cds.dts b/arch/powerpc/boot/dts/fsl/mpc8555cds.dts index 9f58db2a7e66..901b6ff06dfb 100644 --- a/arch/powerpc/boot/dts/fsl/mpc8555cds.dts +++ b/arch/powerpc/boot/dts/fsl/mpc8555cds.dts @@ -7,7 +7,7 @@ /dts-v1/; -/include/ "e500v2_power_isa.dtsi" +/include/ "e500v1_power_isa.dtsi" / { model = "MPC8555CDS"; diff --git a/arch/powerpc/boot/dts/fsl/mpc8560ads.dts b/arch/powerpc/boot/dts/fsl/mpc8560ads.dts index a24722ccaebf..c2f9aea78b29 100644 --- a/arch/powerpc/boot/dts/fsl/mpc8560ads.dts +++ b/arch/powerpc/boot/dts/fsl/mpc8560ads.dts @@ -7,7 +7,7 @@ /dts-v1/; -/include/ "e500v2_power_isa.dtsi" +/include/ "e500v1_power_isa.dtsi" / { model = "MPC8560ADS"; From 25f13424737232d5ef5337916dac9fe303b5b0c6 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Mon, 25 Jul 2022 12:09:28 +0800 Subject: [PATCH 0270/1477] crypto: sahara - don't sleep when in softirq [ Upstream commit 108586eba094b318e6a831f977f4ddcc403a15da ] Function of sahara_aes_crypt maybe could be called by function of crypto_skcipher_encrypt during the rx softirq, so it is not allowed to use mutex lock. Fixes: c0c3c89ae347 ("crypto: sahara - replace tasklets with...") Signed-off-by: Zhengchao Shao Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/sahara.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/crypto/sahara.c b/drivers/crypto/sahara.c index d60679c79822..2043dd061121 100644 --- a/drivers/crypto/sahara.c +++ b/drivers/crypto/sahara.c @@ -25,10 +25,10 @@ #include #include #include -#include #include #include #include +#include #define SHA_BUFFER_LEN PAGE_SIZE #define SAHARA_MAX_SHA_BLOCK_SIZE SHA256_BLOCK_SIZE @@ -195,7 +195,7 @@ struct sahara_dev { void __iomem *regs_base; struct clk *clk_ipg; struct clk *clk_ahb; - struct mutex queue_mutex; + spinlock_t queue_spinlock; struct task_struct *kthread; struct completion dma_completion; @@ -641,9 +641,9 @@ static int sahara_aes_crypt(struct skcipher_request *req, unsigned long mode) rctx->mode = mode; - mutex_lock(&dev->queue_mutex); + spin_lock_bh(&dev->queue_spinlock); err = crypto_enqueue_request(&dev->queue, &req->base); - mutex_unlock(&dev->queue_mutex); + spin_unlock_bh(&dev->queue_spinlock); wake_up_process(dev->kthread); @@ -1042,10 +1042,10 @@ static int sahara_queue_manage(void *data) do { __set_current_state(TASK_INTERRUPTIBLE); - mutex_lock(&dev->queue_mutex); + spin_lock_bh(&dev->queue_spinlock); backlog = crypto_get_backlog(&dev->queue); async_req = crypto_dequeue_request(&dev->queue); - mutex_unlock(&dev->queue_mutex); + spin_unlock_bh(&dev->queue_spinlock); if (backlog) backlog->complete(backlog, -EINPROGRESS); @@ -1091,9 +1091,9 @@ static int sahara_sha_enqueue(struct ahash_request *req, int last) rctx->first = 1; } - mutex_lock(&dev->queue_mutex); + spin_lock_bh(&dev->queue_spinlock); ret = crypto_enqueue_request(&dev->queue, &req->base); - mutex_unlock(&dev->queue_mutex); + spin_unlock_bh(&dev->queue_spinlock); wake_up_process(dev->kthread); @@ -1454,7 +1454,7 @@ static int sahara_probe(struct platform_device *pdev) crypto_init_queue(&dev->queue, SAHARA_QUEUE_LENGTH); - mutex_init(&dev->queue_mutex); + spin_lock_init(&dev->queue_spinlock); dev_ptr = dev; From d88b88514ef28515ccfa1f1787c2aedef75a79dd Mon Sep 17 00:00:00 2001 From: Ye Weihua Date: Thu, 28 Jul 2022 10:07:58 +0800 Subject: [PATCH 0271/1477] crypto: hisilicon/zip - fix mismatch in get/set sgl_sge_nr [ Upstream commit d74f9340097a881869c4c22ca376654cc2516ecc ] KASAN reported this Bug: [17619.659757] BUG: KASAN: global-out-of-bounds in param_get_int+0x34/0x60 [17619.673193] Read of size 4 at addr fffff01332d7ed00 by task read_all/1507958 ... [17619.698934] The buggy address belongs to the variable: [17619.708371] sgl_sge_nr+0x0/0xffffffffffffa300 [hisi_zip] There is a mismatch in hisi_zip when get/set the variable sgl_sge_nr. The type of sgl_sge_nr is u16, and get/set sgl_sge_nr by param_get/set_int. Replacing param_get/set_int to param_get/set_ushort can fix this bug. Fixes: f081fda293ffb ("crypto: hisilicon - add sgl_sge_nr module param for zip") Signed-off-by: Ye Weihua Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/hisilicon/zip/zip_crypto.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/hisilicon/zip/zip_crypto.c b/drivers/crypto/hisilicon/zip/zip_crypto.c index 08b4660b014c..5db7cdea994a 100644 --- a/drivers/crypto/hisilicon/zip/zip_crypto.c +++ b/drivers/crypto/hisilicon/zip/zip_crypto.c @@ -107,12 +107,12 @@ static int sgl_sge_nr_set(const char *val, const struct kernel_param *kp) if (ret || n == 0 || n > HISI_ACC_SGL_SGE_NR_MAX) return -EINVAL; - return param_set_int(val, kp); + return param_set_ushort(val, kp); } static const struct kernel_param_ops sgl_sge_nr_ops = { .set = sgl_sge_nr_set, - .get = param_get_int, + .get = param_get_ushort, }; static u16 sgl_sge_nr = HZIP_SGL_SGE_NR; From ab2485eb5dfab4519a6551611a1dbd12d17b518d Mon Sep 17 00:00:00 2001 From: Kshitiz Varshney Date: Mon, 22 Aug 2022 13:19:03 +0200 Subject: [PATCH 0272/1477] hwrng: imx-rngc - Moving IRQ handler registering after imx_rngc_irq_mask_clear() [ Upstream commit 10a2199caf437e893d9027d97700b3c6010048b7 ] Issue: While servicing interrupt, if the IRQ happens to be because of a SEED_DONE due to a previous boot stage, you end up completing the completion prematurely, hence causing kernel to crash while booting. Fix: Moving IRQ handler registering after imx_rngc_irq_mask_clear() Fixes: 1d5449445bd0 (hwrng: mx-rngc - add a driver for Freescale RNGC) Signed-off-by: Kshitiz Varshney Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/char/hw_random/imx-rngc.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/char/hw_random/imx-rngc.c b/drivers/char/hw_random/imx-rngc.c index 61c844baf26e..9b182e5bfa87 100644 --- a/drivers/char/hw_random/imx-rngc.c +++ b/drivers/char/hw_random/imx-rngc.c @@ -272,13 +272,6 @@ static int imx_rngc_probe(struct platform_device *pdev) goto err; } - ret = devm_request_irq(&pdev->dev, - irq, imx_rngc_irq, 0, pdev->name, (void *)rngc); - if (ret) { - dev_err(rngc->dev, "Can't get interrupt working.\n"); - goto err; - } - init_completion(&rngc->rng_op_done); rngc->rng.name = pdev->name; @@ -292,6 +285,13 @@ static int imx_rngc_probe(struct platform_device *pdev) imx_rngc_irq_mask_clear(rngc); + ret = devm_request_irq(&pdev->dev, + irq, imx_rngc_irq, 0, pdev->name, (void *)rngc); + if (ret) { + dev_err(rngc->dev, "Can't get interrupt working.\n"); + return ret; + } + if (self_test) { ret = imx_rngc_self_test(rngc); if (ret) { From cfde58a8e41f4b5a1015d6a7e289fe986594df37 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 1 Sep 2022 16:57:36 -0400 Subject: [PATCH 0273/1477] cgroup/cpuset: Enable update_tasks_cpumask() on top_cpuset [ Upstream commit ec5fbdfb99d18482619ac42605cb80fbb56068ee ] Previously, update_tasks_cpumask() is not supposed to be called with top cpuset. With cpuset partition that takes CPUs away from the top cpuset, adjusting the cpus_mask of the tasks in the top cpuset is necessary. Percpu kthreads, however, are ignored. Fixes: ee8dde0cd2ce ("cpuset: Add new v2 cpuset.sched.partition flag") Signed-off-by: Waiman Long Signed-off-by: Tejun Heo Signed-off-by: Sasha Levin --- kernel/cgroup/cpuset.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index b7830f1f1f3a..43270b07b2e0 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -1059,10 +1060,18 @@ static void update_tasks_cpumask(struct cpuset *cs) { struct css_task_iter it; struct task_struct *task; + bool top_cs = cs == &top_cpuset; css_task_iter_start(&cs->css, 0, &it); - while ((task = css_task_iter_next(&it))) + while ((task = css_task_iter_next(&it))) { + /* + * Percpu kthreads in top_cpuset are ignored + */ + if (top_cs && (task->flags & PF_KTHREAD) && + kthread_is_per_cpu(task)) + continue; set_cpus_allowed_ptr(task, cs->effective_cpus); + } css_task_iter_end(&it); } @@ -2016,12 +2025,7 @@ static int update_prstate(struct cpuset *cs, int new_prs) update_flag(CS_CPU_EXCLUSIVE, cs, 0); } - /* - * Update cpumask of parent's tasks except when it is the top - * cpuset as some system daemons cannot be mapped to other CPUs. - */ - if (parent != &top_cpuset) - update_tasks_cpumask(parent); + update_tasks_cpumask(parent); if (parent->child_ecpus_count) update_sibling_cpumasks(parent, cs, &tmpmask); From 2fee0dbfaeaaa4bda04279ce772c4572b1429d04 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 4 Aug 2022 17:32:39 +0300 Subject: [PATCH 0274/1477] iommu/omap: Fix buffer overflow in debugfs [ Upstream commit 184233a5202786b20220acd2d04ddf909ef18f29 ] There are two issues here: 1) The "len" variable needs to be checked before the very first write. Otherwise if omap2_iommu_dump_ctx() with "bytes" less than 32 it is a buffer overflow. 2) The snprintf() function returns the number of bytes that *would* have been copied if there were enough space. But we want to know the number of bytes which were *actually* copied so use scnprintf() instead. Fixes: bd4396f09a4a ("iommu/omap: Consolidate OMAP IOMMU modules") Signed-off-by: Dan Carpenter Reviewed-by: Robin Murphy Reviewed-by: Laurent Pinchart Link: https://lore.kernel.org/r/YuvYh1JbE3v+abd5@kili Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/omap-iommu-debug.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/omap-iommu-debug.c b/drivers/iommu/omap-iommu-debug.c index a99afb5d9011..259f65291d90 100644 --- a/drivers/iommu/omap-iommu-debug.c +++ b/drivers/iommu/omap-iommu-debug.c @@ -32,12 +32,12 @@ static inline bool is_omap_iommu_detached(struct omap_iommu *obj) ssize_t bytes; \ const char *str = "%20s: %08x\n"; \ const int maxcol = 32; \ - bytes = snprintf(p, maxcol, str, __stringify(name), \ + if (len < maxcol) \ + goto out; \ + bytes = scnprintf(p, maxcol, str, __stringify(name), \ iommu_read_reg(obj, MMU_##name)); \ p += bytes; \ len -= bytes; \ - if (len < maxcol) \ - goto out; \ } while (0) static ssize_t From a1354bdd191d533211b7cb723aa76a66f516f197 Mon Sep 17 00:00:00 2001 From: Ignat Korchagin Date: Wed, 31 Aug 2022 19:37:06 +0100 Subject: [PATCH 0275/1477] crypto: akcipher - default implementation for setting a private key [ Upstream commit bc155c6c188c2f0c5749993b1405673d25a80389 ] Changes from v1: * removed the default implementation from set_pub_key: it is assumed that an implementation must always have this callback defined as there are no use case for an algorithm, which doesn't need a public key Many akcipher implementations (like ECDSA) support only signature verifications, so they don't have all callbacks defined. Commit 78a0324f4a53 ("crypto: akcipher - default implementations for request callbacks") introduced default callbacks for sign/verify operations, which just return an error code. However, these are not enough, because before calling sign the caller would likely call set_priv_key first on the instantiated transform (as the in-kernel testmgr does). This function does not have a default stub, so the kernel crashes, when trying to set a private key on an akcipher, which doesn't support signature generation. I've noticed this, when trying to add a KAT vector for ECDSA signature to the testmgr. With this patch the testmgr returns an error in dmesg (as it should) instead of crashing the kernel NULL ptr dereference. Fixes: 78a0324f4a53 ("crypto: akcipher - default implementations for request callbacks") Signed-off-by: Ignat Korchagin Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- crypto/akcipher.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/crypto/akcipher.c b/crypto/akcipher.c index f866085c8a4a..ab975a420e1e 100644 --- a/crypto/akcipher.c +++ b/crypto/akcipher.c @@ -120,6 +120,12 @@ static int akcipher_default_op(struct akcipher_request *req) return -ENOSYS; } +static int akcipher_default_set_key(struct crypto_akcipher *tfm, + const void *key, unsigned int keylen) +{ + return -ENOSYS; +} + int crypto_register_akcipher(struct akcipher_alg *alg) { struct crypto_alg *base = &alg->base; @@ -132,6 +138,8 @@ int crypto_register_akcipher(struct akcipher_alg *alg) alg->encrypt = akcipher_default_op; if (!alg->decrypt) alg->decrypt = akcipher_default_op; + if (!alg->set_priv_key) + alg->set_priv_key = akcipher_default_set_key; akcipher_prepare_alg(alg); return crypto_register_alg(base); From d33935e666043f560abdd040a84f03d6d3868c89 Mon Sep 17 00:00:00 2001 From: Koba Ko Date: Thu, 1 Sep 2022 22:47:12 +0800 Subject: [PATCH 0276/1477] crypto: ccp - Release dma channels before dmaengine unrgister [ Upstream commit 68dbe80f5b510c66c800b9e8055235c5b07e37d1 ] A warning is shown during shutdown, __dma_async_device_channel_unregister called while 2 clients hold a reference WARNING: CPU: 15 PID: 1 at drivers/dma/dmaengine.c:1110 __dma_async_device_channel_unregister+0xb7/0xc0 Call dma_release_channel for occupied channles before dma_async_device_unregister. Fixes: 54cce8ecb925 ("crypto: ccp - ccp_dmaengine_unregister release dma channels") Reported-by: kernel test robot Signed-off-by: Koba Ko Acked-by: Tom Lendacky Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/ccp/ccp-dmaengine.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/ccp/ccp-dmaengine.c b/drivers/crypto/ccp/ccp-dmaengine.c index b3eea329f840..b9299defb431 100644 --- a/drivers/crypto/ccp/ccp-dmaengine.c +++ b/drivers/crypto/ccp/ccp-dmaengine.c @@ -642,6 +642,10 @@ static void ccp_dma_release(struct ccp_device *ccp) for (i = 0; i < ccp->cmd_q_count; i++) { chan = ccp->ccp_dma_chan + i; dma_chan = &chan->dma_chan; + + if (dma_chan->client_count) + dma_release_channel(dma_chan); + tasklet_kill(&chan->cleanup_tasklet); list_del_rcu(&dma_chan->device_node); } @@ -767,8 +771,8 @@ void ccp_dmaengine_unregister(struct ccp_device *ccp) if (!dmaengine) return; - dma_async_device_unregister(dma_dev); ccp_dma_release(ccp); + dma_async_device_unregister(dma_dev); kmem_cache_destroy(ccp->dma_desc_cache); kmem_cache_destroy(ccp->dma_cmd_cache); From 8a4ed09ed816a434c942fc7026651a3b99680612 Mon Sep 17 00:00:00 2001 From: Peter Harliman Liem Date: Tue, 6 Sep 2022 10:51:28 +0800 Subject: [PATCH 0277/1477] crypto: inside-secure - Change swab to swab32 [ Upstream commit 664593407e936b6438fbfaaf98876910fd31cf9a ] The use of swab() is causing failures in 64-bit arch, as it translates to __swab64() instead of the intended __swab32(). It eventually causes wrong results in xcbcmac & cmac algo. Fixes: 78cf1c8bfcb8 ("crypto: inside-secure - Move ipad/opad into safexcel_context") Signed-off-by: Peter Harliman Liem Acked-by: Antoine Tenart Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/inside-secure/safexcel_hash.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/inside-secure/safexcel_hash.c b/drivers/crypto/inside-secure/safexcel_hash.c index 56d5ccb5cc00..1c9af02eb63b 100644 --- a/drivers/crypto/inside-secure/safexcel_hash.c +++ b/drivers/crypto/inside-secure/safexcel_hash.c @@ -381,7 +381,7 @@ static int safexcel_ahash_send_req(struct crypto_async_request *async, int ring, u32 x; x = ipad[i] ^ ipad[i + 4]; - cache[i] ^= swab(x); + cache[i] ^= swab32(x); } } cache_len = AES_BLOCK_SIZE; @@ -819,7 +819,7 @@ static int safexcel_ahash_final(struct ahash_request *areq) u32 *result = (void *)areq->result; /* K3 */ - result[i] = swab(ctx->base.ipad.word[i + 4]); + result[i] = swab32(ctx->base.ipad.word[i + 4]); } areq->result[0] ^= 0x80; // 10- padding crypto_cipher_encrypt_one(ctx->kaes, areq->result, areq->result); @@ -2104,7 +2104,7 @@ static int safexcel_xcbcmac_setkey(struct crypto_ahash *tfm, const u8 *key, crypto_cipher_encrypt_one(ctx->kaes, (u8 *)key_tmp + AES_BLOCK_SIZE, "\x3\x3\x3\x3\x3\x3\x3\x3\x3\x3\x3\x3\x3\x3\x3\x3"); for (i = 0; i < 3 * AES_BLOCK_SIZE / sizeof(u32); i++) - ctx->base.ipad.word[i] = swab(key_tmp[i]); + ctx->base.ipad.word[i] = swab32(key_tmp[i]); crypto_cipher_clear_flags(ctx->kaes, CRYPTO_TFM_REQ_MASK); crypto_cipher_set_flags(ctx->kaes, crypto_ahash_get_flags(tfm) & @@ -2187,7 +2187,7 @@ static int safexcel_cmac_setkey(struct crypto_ahash *tfm, const u8 *key, return ret; for (i = 0; i < len / sizeof(u32); i++) - ctx->base.ipad.word[i + 8] = swab(aes.key_enc[i]); + ctx->base.ipad.word[i + 8] = swab32(aes.key_enc[i]); /* precompute the CMAC key material */ crypto_cipher_clear_flags(ctx->kaes, CRYPTO_TFM_REQ_MASK); From a91af50850270554a4ff032a12eaa4c2fed0dd44 Mon Sep 17 00:00:00 2001 From: Hui Tang Date: Fri, 5 Mar 2021 14:35:02 +0800 Subject: [PATCH 0278/1477] crypto: qat - fix use of 'dma_map_single' [ Upstream commit 7cc05071f930a631040fea16a41f9d78771edc49 ] DMA_TO_DEVICE synchronisation must be done after the last modification of the memory region by the software and before it is handed off to the device. Signed-off-by: Hui Tang Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Herbert Xu Stable-dep-of: cf5bb835b7c8 ("crypto: qat - fix DMA transfer direction") Signed-off-by: Sasha Levin --- drivers/crypto/qat/qat_common/qat_algs.c | 27 ++++++++++++------------ 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c index 06abe1e2074e..8625e299d445 100644 --- a/drivers/crypto/qat/qat_common/qat_algs.c +++ b/drivers/crypto/qat/qat_common/qat_algs.c @@ -669,8 +669,8 @@ static int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst, int n = sg_nents(sgl); struct qat_alg_buf_list *bufl; struct qat_alg_buf_list *buflout = NULL; - dma_addr_t blp; - dma_addr_t bloutp; + dma_addr_t blp = DMA_MAPPING_ERROR; + dma_addr_t bloutp = DMA_MAPPING_ERROR; struct scatterlist *sg; size_t sz_out, sz = struct_size(bufl, bufers, n + 1); @@ -685,10 +685,6 @@ static int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst, for_each_sg(sgl, sg, n, i) bufl->bufers[i].addr = DMA_MAPPING_ERROR; - blp = dma_map_single(dev, bufl, sz, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(dev, blp))) - goto err_in; - for_each_sg(sgl, sg, n, i) { int y = sg_nctr; @@ -704,6 +700,9 @@ static int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst, sg_nctr++; } bufl->num_bufs = sg_nctr; + blp = dma_map_single(dev, bufl, sz, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev, blp))) + goto err_in; qat_req->buf.bl = bufl; qat_req->buf.blp = blp; qat_req->buf.sz = sz; @@ -723,9 +722,6 @@ static int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst, for_each_sg(sglout, sg, n, i) bufers[i].addr = DMA_MAPPING_ERROR; - bloutp = dma_map_single(dev, buflout, sz_out, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(dev, bloutp))) - goto err_out; for_each_sg(sglout, sg, n, i) { int y = sg_nctr; @@ -742,6 +738,9 @@ static int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst, } buflout->num_bufs = sg_nctr; buflout->num_mapped_bufs = sg_nctr; + bloutp = dma_map_single(dev, buflout, sz_out, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev, bloutp))) + goto err_out; qat_req->buf.blout = buflout; qat_req->buf.bloutp = bloutp; qat_req->buf.sz_out = sz_out; @@ -753,17 +752,21 @@ static int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst, return 0; err_out: + if (!dma_mapping_error(dev, bloutp)) + dma_unmap_single(dev, bloutp, sz_out, DMA_TO_DEVICE); + n = sg_nents(sglout); for (i = 0; i < n; i++) if (!dma_mapping_error(dev, buflout->bufers[i].addr)) dma_unmap_single(dev, buflout->bufers[i].addr, buflout->bufers[i].len, DMA_BIDIRECTIONAL); - if (!dma_mapping_error(dev, bloutp)) - dma_unmap_single(dev, bloutp, sz_out, DMA_TO_DEVICE); kfree(buflout); err_in: + if (!dma_mapping_error(dev, blp)) + dma_unmap_single(dev, blp, sz, DMA_TO_DEVICE); + n = sg_nents(sgl); for (i = 0; i < n; i++) if (!dma_mapping_error(dev, bufl->bufers[i].addr)) @@ -771,8 +774,6 @@ err_in: bufl->bufers[i].len, DMA_BIDIRECTIONAL); - if (!dma_mapping_error(dev, blp)) - dma_unmap_single(dev, blp, sz, DMA_TO_DEVICE); kfree(bufl); dev_err(dev, "Failed to map buf for dma\n"); From a43babc059a7a992bd9d3bb14c4caf053ee7f7a5 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Mon, 9 May 2022 14:34:08 +0100 Subject: [PATCH 0279/1477] crypto: qat - use pre-allocated buffers in datapath [ Upstream commit e0831e7af4e03f2715de102e18e9179ec0a81562 ] In order to do DMAs, the QAT device requires that the scatterlist structures are mapped and translated into a format that the firmware can understand. This is defined as the composition of a scatter gather list (SGL) descriptor header, the struct qat_alg_buf_list, plus a variable number of flat buffer descriptors, the struct qat_alg_buf. The allocation and mapping of these data structures is done each time a request is received from the skcipher and aead APIs. In an OOM situation, this behaviour might lead to a dead-lock if an allocation fails. Based on the conversation in [1], increase the size of the aead and skcipher request contexts to include an SGL descriptor that can handle a maximum of 4 flat buffers. If requests exceed 4 entries buffers, memory is allocated dynamically. [1] https://lore.kernel.org/linux-crypto/20200722072932.GA27544@gondor.apana.org.au/ Cc: stable@vger.kernel.org Fixes: d370cec32194 ("crypto: qat - Intel(R) QAT crypto interface") Reported-by: Mikulas Patocka Signed-off-by: Giovanni Cabiddu Reviewed-by: Marco Chiappero Reviewed-by: Wojciech Ziemba Signed-off-by: Herbert Xu Stable-dep-of: cf5bb835b7c8 ("crypto: qat - fix DMA transfer direction") Signed-off-by: Sasha Levin --- drivers/crypto/qat/qat_common/qat_algs.c | 64 +++++++++++++--------- drivers/crypto/qat/qat_common/qat_crypto.h | 24 ++++++++ 2 files changed, 61 insertions(+), 27 deletions(-) diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c index 8625e299d445..2e2c2ac53609 100644 --- a/drivers/crypto/qat/qat_common/qat_algs.c +++ b/drivers/crypto/qat/qat_common/qat_algs.c @@ -34,19 +34,6 @@ static DEFINE_MUTEX(algs_lock); static unsigned int active_devs; -struct qat_alg_buf { - u32 len; - u32 resrvd; - u64 addr; -} __packed; - -struct qat_alg_buf_list { - u64 resrvd; - u32 num_bufs; - u32 num_mapped_bufs; - struct qat_alg_buf bufers[]; -} __packed __aligned(64); - /* Common content descriptor */ struct qat_alg_cd { union { @@ -644,7 +631,10 @@ static void qat_alg_free_bufl(struct qat_crypto_instance *inst, bl->bufers[i].len, DMA_BIDIRECTIONAL); dma_unmap_single(dev, blp, sz, DMA_TO_DEVICE); - kfree(bl); + + if (!qat_req->buf.sgl_src_valid) + kfree(bl); + if (blp != blpout) { /* If out of place operation dma unmap only data */ int bufless = blout->num_bufs - blout->num_mapped_bufs; @@ -655,7 +645,9 @@ static void qat_alg_free_bufl(struct qat_crypto_instance *inst, DMA_BIDIRECTIONAL); } dma_unmap_single(dev, blpout, sz_out, DMA_TO_DEVICE); - kfree(blout); + + if (!qat_req->buf.sgl_dst_valid) + kfree(blout); } } @@ -672,15 +664,24 @@ static int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst, dma_addr_t blp = DMA_MAPPING_ERROR; dma_addr_t bloutp = DMA_MAPPING_ERROR; struct scatterlist *sg; - size_t sz_out, sz = struct_size(bufl, bufers, n + 1); + size_t sz_out, sz = struct_size(bufl, bufers, n); + int node = dev_to_node(&GET_DEV(inst->accel_dev)); if (unlikely(!n)) return -EINVAL; - bufl = kzalloc_node(sz, GFP_ATOMIC, - dev_to_node(&GET_DEV(inst->accel_dev))); - if (unlikely(!bufl)) - return -ENOMEM; + qat_req->buf.sgl_src_valid = false; + qat_req->buf.sgl_dst_valid = false; + + if (n > QAT_MAX_BUFF_DESC) { + bufl = kzalloc_node(sz, GFP_ATOMIC, node); + if (unlikely(!bufl)) + return -ENOMEM; + } else { + bufl = &qat_req->buf.sgl_src.sgl_hdr; + memset(bufl, 0, sizeof(struct qat_alg_buf_list)); + qat_req->buf.sgl_src_valid = true; + } for_each_sg(sgl, sg, n, i) bufl->bufers[i].addr = DMA_MAPPING_ERROR; @@ -711,12 +712,18 @@ static int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst, struct qat_alg_buf *bufers; n = sg_nents(sglout); - sz_out = struct_size(buflout, bufers, n + 1); + sz_out = struct_size(buflout, bufers, n); sg_nctr = 0; - buflout = kzalloc_node(sz_out, GFP_ATOMIC, - dev_to_node(&GET_DEV(inst->accel_dev))); - if (unlikely(!buflout)) - goto err_in; + + if (n > QAT_MAX_BUFF_DESC) { + buflout = kzalloc_node(sz_out, GFP_ATOMIC, node); + if (unlikely(!buflout)) + goto err_in; + } else { + buflout = &qat_req->buf.sgl_dst.sgl_hdr; + memset(buflout, 0, sizeof(struct qat_alg_buf_list)); + qat_req->buf.sgl_dst_valid = true; + } bufers = buflout->bufers; for_each_sg(sglout, sg, n, i) @@ -761,7 +768,9 @@ err_out: dma_unmap_single(dev, buflout->bufers[i].addr, buflout->bufers[i].len, DMA_BIDIRECTIONAL); - kfree(buflout); + + if (!qat_req->buf.sgl_dst_valid) + kfree(buflout); err_in: if (!dma_mapping_error(dev, blp)) @@ -774,7 +783,8 @@ err_in: bufl->bufers[i].len, DMA_BIDIRECTIONAL); - kfree(bufl); + if (!qat_req->buf.sgl_src_valid) + kfree(bufl); dev_err(dev, "Failed to map buf for dma\n"); return -ENOMEM; diff --git a/drivers/crypto/qat/qat_common/qat_crypto.h b/drivers/crypto/qat/qat_common/qat_crypto.h index 12682d1e9f5f..5f9328201ba4 100644 --- a/drivers/crypto/qat/qat_common/qat_crypto.h +++ b/drivers/crypto/qat/qat_common/qat_crypto.h @@ -20,6 +20,26 @@ struct qat_crypto_instance { atomic_t refctr; }; +#define QAT_MAX_BUFF_DESC 4 + +struct qat_alg_buf { + u32 len; + u32 resrvd; + u64 addr; +} __packed; + +struct qat_alg_buf_list { + u64 resrvd; + u32 num_bufs; + u32 num_mapped_bufs; + struct qat_alg_buf bufers[]; +} __packed; + +struct qat_alg_fixed_buf_list { + struct qat_alg_buf_list sgl_hdr; + struct qat_alg_buf descriptors[QAT_MAX_BUFF_DESC]; +} __packed __aligned(64); + struct qat_crypto_request_buffs { struct qat_alg_buf_list *bl; dma_addr_t blp; @@ -27,6 +47,10 @@ struct qat_crypto_request_buffs { dma_addr_t bloutp; size_t sz; size_t sz_out; + bool sgl_src_valid; + bool sgl_dst_valid; + struct qat_alg_fixed_buf_list sgl_src; + struct qat_alg_fixed_buf_list sgl_dst; }; struct qat_crypto_request; From 426d5bc089e7731e36b514d1beca19e777a2d653 Mon Sep 17 00:00:00 2001 From: Damian Muszynski Date: Fri, 9 Sep 2022 11:49:12 +0100 Subject: [PATCH 0280/1477] crypto: qat - fix DMA transfer direction [ Upstream commit cf5bb835b7c8a5fee7f26455099cca7feb57f5e9 ] When CONFIG_DMA_API_DEBUG is selected, while running the crypto self test on the QAT crypto algorithms, the function add_dma_entry() reports a warning similar to the one below, saying that overlapping mappings are not supported. This occurs in tests where the input and the output scatter list point to the same buffers (i.e. two different scatter lists which point to the same chunks of memory). The logic that implements the mapping uses the flag DMA_BIDIRECTIONAL for both the input and the output scatter lists which leads to overlapped write mappings. These are not supported by the DMA layer. Fix by specifying the correct DMA transfer directions when mapping buffers. For in-place operations where the input scatter list matches the output scatter list, buffers are mapped once with DMA_BIDIRECTIONAL, otherwise input buffers are mapped using the flag DMA_TO_DEVICE and output buffers are mapped with DMA_FROM_DEVICE. Overlapping a read mapping with a write mapping is a valid case in dma-coherent devices like QAT. The function that frees and unmaps the buffers, qat_alg_free_bufl() has been changed accordingly to the changes to the mapping function. DMA-API: 4xxx 0000:06:00.0: cacheline tracking EEXIST, overlapping mappings aren't supported WARNING: CPU: 53 PID: 4362 at kernel/dma/debug.c:570 add_dma_entry+0x1e9/0x270 ... Call Trace: dma_map_page_attrs+0x82/0x2d0 ? preempt_count_add+0x6a/0xa0 qat_alg_sgl_to_bufl+0x45b/0x990 [intel_qat] qat_alg_aead_dec+0x71/0x250 [intel_qat] crypto_aead_decrypt+0x3d/0x70 test_aead_vec_cfg+0x649/0x810 ? number+0x310/0x3a0 ? vsnprintf+0x2a3/0x550 ? scnprintf+0x42/0x70 ? valid_sg_divisions.constprop.0+0x86/0xa0 ? test_aead_vec+0xdf/0x120 test_aead_vec+0xdf/0x120 alg_test_aead+0x185/0x400 alg_test+0x3d8/0x500 ? crypto_acomp_scomp_free_ctx+0x30/0x30 ? __schedule+0x32a/0x12a0 ? ttwu_queue_wakelist+0xbf/0x110 ? _raw_spin_unlock_irqrestore+0x23/0x40 ? try_to_wake_up+0x83/0x570 ? _raw_spin_unlock_irqrestore+0x23/0x40 ? __set_cpus_allowed_ptr_locked+0xea/0x1b0 ? crypto_acomp_scomp_free_ctx+0x30/0x30 cryptomgr_test+0x27/0x50 kthread+0xe6/0x110 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x1f/0x30 Fixes: d370cec ("crypto: qat - Intel(R) QAT crypto interface") Link: https://lore.kernel.org/linux-crypto/20220223080400.139367-1-gilad@benyossef.com/ Signed-off-by: Damian Muszynski Signed-off-by: Giovanni Cabiddu Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/qat/qat_common/qat_algs.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c index 2e2c2ac53609..5b71768fc0c7 100644 --- a/drivers/crypto/qat/qat_common/qat_algs.c +++ b/drivers/crypto/qat/qat_common/qat_algs.c @@ -624,11 +624,14 @@ static void qat_alg_free_bufl(struct qat_crypto_instance *inst, dma_addr_t blpout = qat_req->buf.bloutp; size_t sz = qat_req->buf.sz; size_t sz_out = qat_req->buf.sz_out; + int bl_dma_dir; int i; + bl_dma_dir = blp != blpout ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL; + for (i = 0; i < bl->num_bufs; i++) dma_unmap_single(dev, bl->bufers[i].addr, - bl->bufers[i].len, DMA_BIDIRECTIONAL); + bl->bufers[i].len, bl_dma_dir); dma_unmap_single(dev, blp, sz, DMA_TO_DEVICE); @@ -642,7 +645,7 @@ static void qat_alg_free_bufl(struct qat_crypto_instance *inst, for (i = bufless; i < blout->num_bufs; i++) { dma_unmap_single(dev, blout->bufers[i].addr, blout->bufers[i].len, - DMA_BIDIRECTIONAL); + DMA_FROM_DEVICE); } dma_unmap_single(dev, blpout, sz_out, DMA_TO_DEVICE); @@ -666,6 +669,7 @@ static int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst, struct scatterlist *sg; size_t sz_out, sz = struct_size(bufl, bufers, n); int node = dev_to_node(&GET_DEV(inst->accel_dev)); + int bufl_dma_dir; if (unlikely(!n)) return -EINVAL; @@ -683,6 +687,8 @@ static int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst, qat_req->buf.sgl_src_valid = true; } + bufl_dma_dir = sgl != sglout ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL; + for_each_sg(sgl, sg, n, i) bufl->bufers[i].addr = DMA_MAPPING_ERROR; @@ -694,7 +700,7 @@ static int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst, bufl->bufers[y].addr = dma_map_single(dev, sg_virt(sg), sg->length, - DMA_BIDIRECTIONAL); + bufl_dma_dir); bufl->bufers[y].len = sg->length; if (unlikely(dma_mapping_error(dev, bufl->bufers[y].addr))) goto err_in; @@ -737,7 +743,7 @@ static int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst, bufers[y].addr = dma_map_single(dev, sg_virt(sg), sg->length, - DMA_BIDIRECTIONAL); + DMA_FROM_DEVICE); if (unlikely(dma_mapping_error(dev, bufers[y].addr))) goto err_out; bufers[y].len = sg->length; @@ -767,7 +773,7 @@ err_out: if (!dma_mapping_error(dev, buflout->bufers[i].addr)) dma_unmap_single(dev, buflout->bufers[i].addr, buflout->bufers[i].len, - DMA_BIDIRECTIONAL); + DMA_FROM_DEVICE); if (!qat_req->buf.sgl_dst_valid) kfree(buflout); @@ -781,7 +787,7 @@ err_in: if (!dma_mapping_error(dev, bufl->bufers[i].addr)) dma_unmap_single(dev, bufl->bufers[i].addr, bufl->bufers[i].len, - DMA_BIDIRECTIONAL); + bufl_dma_dir); if (!qat_req->buf.sgl_src_valid) kfree(bufl); From 9e6ba62d418d48ba4bf2b993b62d61f2de16ad50 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 13 Sep 2022 12:47:20 +0100 Subject: [PATCH 0281/1477] iommu/iova: Fix module config properly [ Upstream commit 4f58330fcc8482aa90674e1f40f601e82f18ed4a ] IOMMU_IOVA is intended to be an optional library for users to select as and when they desire. Since it can be a module now, this means that built-in code which has chosen not to select it should not fail to link if it happens to have selected as a module by someone else. Replace IS_ENABLED() with IS_REACHABLE() to do the right thing. CC: Thierry Reding Reported-by: John Garry Fixes: 15bbdec3931e ("iommu: Make the iova library a module") Signed-off-by: Robin Murphy Reviewed-by: Thierry Reding Link: https://lore.kernel.org/r/548c2f683ca379aface59639a8f0cccc3a1ac050.1663069227.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- include/linux/iova.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/iova.h b/include/linux/iova.h index a0637abffee8..6c19b09e9663 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -132,7 +132,7 @@ static inline unsigned long iova_pfn(struct iova_domain *iovad, dma_addr_t iova) return iova >> iova_shift(iovad); } -#if IS_ENABLED(CONFIG_IOMMU_IOVA) +#if IS_REACHABLE(CONFIG_IOMMU_IOVA) int iova_cache_get(void); void iova_cache_put(void); From c6512a6f0cb19dcfc02a6bfb2cfacdeed14e6269 Mon Sep 17 00:00:00 2001 From: Yipeng Zou Date: Mon, 19 Sep 2022 20:56:28 +0800 Subject: [PATCH 0282/1477] tracing: kprobe: Fix kprobe event gen test module on exit [ Upstream commit ac48e189527fae87253ef2bf58892e782fb36874 ] Correct gen_kretprobe_test clr event para on module exit. This will make it can't to delete. Link: https://lkml.kernel.org/r/20220919125629.238242-2-zouyipeng@huawei.com Cc: Cc: Cc: Cc: Cc: Cc: Cc: Cc: Fixes: 64836248dda2 ("tracing: Add kprobe event command generation test module") Signed-off-by: Yipeng Zou Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) Signed-off-by: Sasha Levin --- kernel/trace/kprobe_event_gen_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/kprobe_event_gen_test.c b/kernel/trace/kprobe_event_gen_test.c index 18b0f1cbb947..e023154be0f8 100644 --- a/kernel/trace/kprobe_event_gen_test.c +++ b/kernel/trace/kprobe_event_gen_test.c @@ -206,7 +206,7 @@ static void __exit kprobe_event_gen_test_exit(void) WARN_ON(kprobe_event_delete("gen_kprobe_test")); /* Disable the event or you can't remove it */ - WARN_ON(trace_array_set_clr_event(gen_kprobe_test->tr, + WARN_ON(trace_array_set_clr_event(gen_kretprobe_test->tr, "kprobes", "gen_kretprobe_test", false)); From 8d76dd508093f7af6befe2142133b5572e316d56 Mon Sep 17 00:00:00 2001 From: Yipeng Zou Date: Mon, 19 Sep 2022 20:56:29 +0800 Subject: [PATCH 0283/1477] tracing: kprobe: Make gen test module work in arm and riscv [ Upstream commit d8ef45d66c01425ff748e13ef7dd1da7a91cc93c ] For now, this selftest module can only work in x86 because of the kprobe cmd was fixed use of x86 registers. This patch adapted to register names under arm and riscv, So that this module can be worked on those platform. Link: https://lkml.kernel.org/r/20220919125629.238242-3-zouyipeng@huawei.com Cc: Cc: Cc: Cc: Cc: Cc: Cc: Cc: Fixes: 64836248dda2 ("tracing: Add kprobe event command generation test module") Signed-off-by: Yipeng Zou Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) Signed-off-by: Sasha Levin --- kernel/trace/kprobe_event_gen_test.c | 47 +++++++++++++++++++++++++--- 1 file changed, 43 insertions(+), 4 deletions(-) diff --git a/kernel/trace/kprobe_event_gen_test.c b/kernel/trace/kprobe_event_gen_test.c index e023154be0f8..80e04a1e1977 100644 --- a/kernel/trace/kprobe_event_gen_test.c +++ b/kernel/trace/kprobe_event_gen_test.c @@ -35,6 +35,45 @@ static struct trace_event_file *gen_kprobe_test; static struct trace_event_file *gen_kretprobe_test; +#define KPROBE_GEN_TEST_FUNC "do_sys_open" + +/* X86 */ +#if defined(CONFIG_X86_64) || defined(CONFIG_X86_32) +#define KPROBE_GEN_TEST_ARG0 "dfd=%ax" +#define KPROBE_GEN_TEST_ARG1 "filename=%dx" +#define KPROBE_GEN_TEST_ARG2 "flags=%cx" +#define KPROBE_GEN_TEST_ARG3 "mode=+4($stack)" + +/* ARM64 */ +#elif defined(CONFIG_ARM64) +#define KPROBE_GEN_TEST_ARG0 "dfd=%x0" +#define KPROBE_GEN_TEST_ARG1 "filename=%x1" +#define KPROBE_GEN_TEST_ARG2 "flags=%x2" +#define KPROBE_GEN_TEST_ARG3 "mode=%x3" + +/* ARM */ +#elif defined(CONFIG_ARM) +#define KPROBE_GEN_TEST_ARG0 "dfd=%r0" +#define KPROBE_GEN_TEST_ARG1 "filename=%r1" +#define KPROBE_GEN_TEST_ARG2 "flags=%r2" +#define KPROBE_GEN_TEST_ARG3 "mode=%r3" + +/* RISCV */ +#elif defined(CONFIG_RISCV) +#define KPROBE_GEN_TEST_ARG0 "dfd=%a0" +#define KPROBE_GEN_TEST_ARG1 "filename=%a1" +#define KPROBE_GEN_TEST_ARG2 "flags=%a2" +#define KPROBE_GEN_TEST_ARG3 "mode=%a3" + +/* others */ +#else +#define KPROBE_GEN_TEST_ARG0 NULL +#define KPROBE_GEN_TEST_ARG1 NULL +#define KPROBE_GEN_TEST_ARG2 NULL +#define KPROBE_GEN_TEST_ARG3 NULL +#endif + + /* * Test to make sure we can create a kprobe event, then add more * fields. @@ -58,14 +97,14 @@ static int __init test_gen_kprobe_cmd(void) * fields. */ ret = kprobe_event_gen_cmd_start(&cmd, "gen_kprobe_test", - "do_sys_open", - "dfd=%ax", "filename=%dx"); + KPROBE_GEN_TEST_FUNC, + KPROBE_GEN_TEST_ARG0, KPROBE_GEN_TEST_ARG1); if (ret) goto free; /* Use kprobe_event_add_fields to add the rest of the fields */ - ret = kprobe_event_add_fields(&cmd, "flags=%cx", "mode=+4($stack)"); + ret = kprobe_event_add_fields(&cmd, KPROBE_GEN_TEST_ARG2, KPROBE_GEN_TEST_ARG3); if (ret) goto free; @@ -128,7 +167,7 @@ static int __init test_gen_kretprobe_cmd(void) * Define the kretprobe event. */ ret = kretprobe_event_gen_cmd_start(&cmd, "gen_kretprobe_test", - "do_sys_open", + KPROBE_GEN_TEST_FUNC, "$retval"); if (ret) goto free; From 6d1aef17e7f26d315efd0cf46f4340bb932afac1 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 7 Aug 2022 09:48:09 +0900 Subject: [PATCH 0284/1477] kbuild: remove the target in signal traps when interrupted [ Upstream commit a7f3257da8a86b96fb9bf1bba40ae0bbd7f1885a ] When receiving some signal, GNU Make automatically deletes the target if it has already been changed by the interrupted recipe. If the target is possibly incomplete due to interruption, it must be deleted so that it will be remade from scratch on the next run of make. Otherwise, the target would remain corrupted permanently because its timestamp had already been updated. Thanks to this behavior of Make, you can stop the build any time by pressing Ctrl-C, and just run 'make' to resume it. Kbuild also relies on this feature, but it is equivalently important for any build systems that make decisions based on timestamps (if you want to support Ctrl-C reliably). However, this does not always work as claimed; Make immediately dies with Ctrl-C if its stderr goes into a pipe. [Test Makefile] foo: echo hello > $@ sleep 3 echo world >> $@ [Test Result] $ make # hit Ctrl-C echo hello > foo sleep 3 ^Cmake: *** Deleting file 'foo' make: *** [Makefile:3: foo] Interrupt $ make 2>&1 | cat # hit Ctrl-C echo hello > foo sleep 3 ^C$ # 'foo' is often left-over The reason is because SIGINT is sent to the entire process group. In this example, SIGINT kills 'cat', and 'make' writes the message to the closed pipe, then dies with SIGPIPE before cleaning the target. A typical bad scenario (as reported by [1], [2]) is to save build log by using the 'tee' command: $ make 2>&1 | tee log This can be problematic for any build systems based on Make, so I hope it will be fixed in GNU Make. The maintainer of GNU Make stated this is a long-standing issue and difficult to fix [3]. It has not been fixed yet as of writing. So, we cannot rely on Make cleaning the target. We can do it by ourselves, in signal traps. As far as I understand, Make takes care of SIGHUP, SIGINT, SIGQUIT, and SITERM for the target removal. I added the traps for them, and also for SIGPIPE just in case cmd_* rule prints something to stdout or stderr (but I did not observe an actual case where SIGPIPE was triggered). [Note 1] The trap handler might be worth explaining. rm -f $@; trap - $(sig); kill -s $(sig) $$ This lets the shell kill itself by the signal it caught, so the parent process can tell the child has exited on the signal. Generally, this is a proper manner for handling signals, in case the calling program (like Bash) may monitor WIFSIGNALED() and WTERMSIG() for WCE although this may not be a big deal here because GNU Make handles SIGHUP, SIGINT, SIGQUIT in WUE and SIGTERM in IUE. IUE - Immediate Unconditional Exit WUE - Wait and Unconditional Exit WCE - Wait and Cooperative Exit For details, see "Proper handling of SIGINT/SIGQUIT" [4]. [Note 2] Reverting 392885ee82d3 ("kbuild: let fixdep directly write to .*.cmd files") would directly address [1], but it only saves if_changed_dep. As reported in [2], all commands that use redirection can potentially leave an empty (i.e. broken) target. [Note 3] Another (even safer) approach might be to always write to a temporary file, and rename it to $@ at the end of the recipe. > $(tmp-target) mv $(tmp-target) $@ It would require a lot of Makefile changes, and result in ugly code, so I did not take it. [Note 4] A little more thoughts about a pattern rule with multiple targets (or a grouped target). %.x %.y: %.z When interrupted, GNU Make deletes both %.x and %.y, while this solution only deletes $@. Probably, this is not a big deal. The next run of make will execute the rule again to create $@ along with the other files. [1]: https://lore.kernel.org/all/YLeot94yAaM4xbMY@gmail.com/ [2]: https://lore.kernel.org/all/20220510221333.2770571-1-robh@kernel.org/ [3]: https://lists.gnu.org/archive/html/help-make/2021-06/msg00001.html [4]: https://www.cons.org/cracauer/sigint.html Fixes: 392885ee82d3 ("kbuild: let fixdep directly write to .*.cmd files") Reported-by: Ingo Molnar Reported-by: Rob Herring Signed-off-by: Masahiro Yamada Tested-by: Ingo Molnar Reviewed-by: Nicolas Schier Signed-off-by: Sasha Levin --- scripts/Kbuild.include | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index 0d6e11820791..25696de8114a 100644 --- a/scripts/Kbuild.include +++ b/scripts/Kbuild.include @@ -179,8 +179,29 @@ echo-cmd = $(if $($(quiet)cmd_$(1)),\ quiet_redirect := silent_redirect := exec >/dev/null; +# Delete the target on interruption +# +# GNU Make automatically deletes the target if it has already been changed by +# the interrupted recipe. So, you can safely stop the build by Ctrl-C (Make +# will delete incomplete targets), and resume it later. +# +# However, this does not work when the stderr is piped to another program, like +# $ make >&2 | tee log +# Make dies with SIGPIPE before cleaning the targets. +# +# To address it, we clean the target in signal traps. +# +# Make deletes the target when it catches SIGHUP, SIGINT, SIGQUIT, SIGTERM. +# So, we cover them, and also SIGPIPE just in case. +# +# Of course, this is unneeded for phony targets. +delete-on-interrupt = \ + $(if $(filter-out $(PHONY), $@), \ + $(foreach sig, HUP INT QUIT TERM PIPE, \ + trap 'rm -f $@; trap - $(sig); kill -s $(sig) $$$$' $(sig);)) + # printing commands -cmd = @set -e; $(echo-cmd) $($(quiet)redirect) $(cmd_$(1)) +cmd = @set -e; $(echo-cmd) $($(quiet)redirect) $(delete-on-interrupt) $(cmd_$(1)) ### # if_changed - execute command if any prerequisite is newer than From cdd42eb4689ba5c9ecaacbfabeb7974ab29bb527 Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Fri, 16 Sep 2022 14:41:12 +0200 Subject: [PATCH 0285/1477] kbuild: rpm-pkg: fix breakage when V=1 is used [ Upstream commit 2e07005f4813a9ff6e895787e0c2d1fea859b033 ] Doing make V=1 binrpm-pkg results in: Executing(%install): /bin/sh -e /var/tmp/rpm-tmp.EgV6qJ + umask 022 + cd . + /bin/rm -rf /home/scgl/rpmbuild/BUILDROOT/kernel-6.0.0_rc5+-1.s390x + /bin/mkdir -p /home/scgl/rpmbuild/BUILDROOT + /bin/mkdir /home/scgl/rpmbuild/BUILDROOT/kernel-6.0.0_rc5+-1.s390x + mkdir -p /home/scgl/rpmbuild/BUILDROOT/kernel-6.0.0_rc5+-1.s390x/boot + make -f ./Makefile image_name + cp test -e include/generated/autoconf.h -a -e include/config/auto.conf || ( \ echo >&2; \ echo >&2 " ERROR: Kernel configuration is invalid."; \ echo >&2 " include/generated/autoconf.h or include/config/auto.conf are missing.";\ echo >&2 " Run 'make oldconfig && make prepare' on kernel src to fix it."; \ echo >&2 ; \ /bin/false) arch/s390/boot/bzImage /home/scgl/rpmbuild/BUILDROOT/kernel-6.0.0_rc5+-1.s390x/boot/vmlinuz-6.0.0-rc5+ cp: invalid option -- 'e' Try 'cp --help' for more information. error: Bad exit status from /var/tmp/rpm-tmp.EgV6qJ (%install) Because the make call to get the image name is verbose and prints additional information. Fixes: 993bdde94547 ("kbuild: add image_name to no-sync-config-targets") Signed-off-by: Janis Schoetterl-Glausch Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin --- scripts/package/mkspec | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/package/mkspec b/scripts/package/mkspec index 7c477ca7dc98..951cc60e5a90 100755 --- a/scripts/package/mkspec +++ b/scripts/package/mkspec @@ -85,10 +85,10 @@ $S mkdir -p %{buildroot}/boot %ifarch ia64 mkdir -p %{buildroot}/boot/efi - cp \$($MAKE image_name) %{buildroot}/boot/efi/vmlinuz-$KERNELRELEASE + cp \$($MAKE -s image_name) %{buildroot}/boot/efi/vmlinuz-$KERNELRELEASE ln -s efi/vmlinuz-$KERNELRELEASE %{buildroot}/boot/ %else - cp \$($MAKE image_name) %{buildroot}/boot/vmlinuz-$KERNELRELEASE + cp \$($MAKE -s image_name) %{buildroot}/boot/vmlinuz-$KERNELRELEASE %endif $M $MAKE %{?_smp_mflags} INSTALL_MOD_PATH=%{buildroot} modules_install $MAKE %{?_smp_mflags} INSTALL_HDR_PATH=%{buildroot}/usr headers_install From 7bfa7d67735381715c98091194e81e7685f9b7db Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 19 Sep 2022 09:43:19 +0300 Subject: [PATCH 0286/1477] crypto: marvell/octeontx - prevent integer overflows [ Upstream commit caca37cf6c749ff0303f68418cfe7b757a4e0697 ] The "code_length" value comes from the firmware file. If your firmware is untrusted realistically there is probably very little you can do to protect yourself. Still we try to limit the damage as much as possible. Also Smatch marks any data read from the filesystem as untrusted and prints warnings if it not capped correctly. The "code_length * 2" can overflow. The round_up(ucode_size, 16) + sizeof() expression can overflow too. Prevent these overflows. Fixes: d9110b0b01ff ("crypto: marvell - add support for OCTEON TX CPT engine") Signed-off-by: Dan Carpenter Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- .../crypto/marvell/octeontx/otx_cptpf_ucode.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c b/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c index 40b482198ebc..a765eefb18c2 100644 --- a/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c +++ b/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c @@ -286,6 +286,7 @@ static int process_tar_file(struct device *dev, struct tar_ucode_info_t *tar_info; struct otx_cpt_ucode_hdr *ucode_hdr; int ucode_type, ucode_size; + unsigned int code_length; /* * If size is less than microcode header size then don't report @@ -303,7 +304,13 @@ static int process_tar_file(struct device *dev, if (get_ucode_type(ucode_hdr, &ucode_type)) return 0; - ucode_size = ntohl(ucode_hdr->code_length) * 2; + code_length = ntohl(ucode_hdr->code_length); + if (code_length >= INT_MAX / 2) { + dev_err(dev, "Invalid code_length %u\n", code_length); + return -EINVAL; + } + + ucode_size = code_length * 2; if (!ucode_size || (size < round_up(ucode_size, 16) + sizeof(struct otx_cpt_ucode_hdr) + OTX_CPT_UCODE_SIGN_LEN)) { dev_err(dev, "Ucode %s invalid size\n", filename); @@ -886,6 +893,7 @@ static int ucode_load(struct device *dev, struct otx_cpt_ucode *ucode, { struct otx_cpt_ucode_hdr *ucode_hdr; const struct firmware *fw; + unsigned int code_length; int ret; set_ucode_filename(ucode, ucode_filename); @@ -896,7 +904,13 @@ static int ucode_load(struct device *dev, struct otx_cpt_ucode *ucode, ucode_hdr = (struct otx_cpt_ucode_hdr *) fw->data; memcpy(ucode->ver_str, ucode_hdr->ver_str, OTX_CPT_UCODE_VER_STR_SZ); ucode->ver_num = ucode_hdr->ver_num; - ucode->size = ntohl(ucode_hdr->code_length) * 2; + code_length = ntohl(ucode_hdr->code_length); + if (code_length >= INT_MAX / 2) { + dev_err(dev, "Ucode invalid code_length %u\n", code_length); + ret = -EINVAL; + goto release_fw; + } + ucode->size = code_length * 2; if (!ucode->size || (fw->size < round_up(ucode->size, 16) + sizeof(struct otx_cpt_ucode_hdr) + OTX_CPT_UCODE_SIGN_LEN)) { dev_err(dev, "Ucode %s invalid size\n", ucode_filename); From 3a720eb89026c5241b8c4abb33370dc6fb565eee Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 19 Sep 2022 09:43:27 +0300 Subject: [PATCH 0287/1477] crypto: cavium - prevent integer overflow loading firmware [ Upstream commit 2526d6bf27d15054bb0778b2f7bc6625fd934905 ] The "code_length" value comes from the firmware file. If your firmware is untrusted realistically there is probably very little you can do to protect yourself. Still we try to limit the damage as much as possible. Also Smatch marks any data read from the filesystem as untrusted and prints warnings if it not capped correctly. The "ntohl(ucode->code_length) * 2" multiplication can have an integer overflow. Fixes: 9e2c7d99941d ("crypto: cavium - Add Support for Octeon-tx CPT Engine") Signed-off-by: Dan Carpenter Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/cavium/cpt/cptpf_main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/cavium/cpt/cptpf_main.c b/drivers/crypto/cavium/cpt/cptpf_main.c index 781949027451..d9362199423f 100644 --- a/drivers/crypto/cavium/cpt/cptpf_main.c +++ b/drivers/crypto/cavium/cpt/cptpf_main.c @@ -254,6 +254,7 @@ static int cpt_ucode_load_fw(struct cpt_device *cpt, const u8 *fw, bool is_ae) const struct firmware *fw_entry; struct device *dev = &cpt->pdev->dev; struct ucode_header *ucode; + unsigned int code_length; struct microcode *mcode; int j, ret = 0; @@ -264,11 +265,12 @@ static int cpt_ucode_load_fw(struct cpt_device *cpt, const u8 *fw, bool is_ae) ucode = (struct ucode_header *)fw_entry->data; mcode = &cpt->mcode[cpt->next_mc_idx]; memcpy(mcode->version, (u8 *)fw_entry->data, CPT_UCODE_VERSION_SZ); - mcode->code_size = ntohl(ucode->code_length) * 2; - if (!mcode->code_size) { + code_length = ntohl(ucode->code_length); + if (code_length == 0 || code_length >= INT_MAX / 2) { ret = -EINVAL; goto fw_release; } + mcode->code_size = code_length * 2; mcode->is_ae = is_ae; mcode->core_mask = 0ULL; From dce07e87ee1ec98f29fdd684020c7f6e387652eb Mon Sep 17 00:00:00 2001 From: Vincent Knecht Date: Thu, 11 Aug 2022 12:50:14 +0200 Subject: [PATCH 0288/1477] thermal/drivers/qcom/tsens-v0_1: Fix MSM8939 fourth sensor hw_id [ Upstream commit b0c883e900702f408d62cf92b0ef01303ed69be9 ] Reading temperature from this sensor fails with 'Invalid argument'. Looking at old vendor dts [1], its hw_id should be 3 instead of 4. Change this hw_id accordingly. [1] https://github.com/msm8916-mainline/android_kernel_qcom_msm8916/blob/master/arch/arm/boot/dts/qcom/msm8939-common.dtsi#L511 Fixes: 332bc8ebab2c ("thermal: qcom: tsens-v0_1: Add support for MSM8939") Signed-off-by: Vincent Knecht Reviewed-by: Dmitry Baryshkov Reviewed-by: Bjorn Andersson Reviewed-by: Bryan O'Donoghue Link: https://lore.kernel.org/r/20220811105014.7194-1-vincent.knecht@mailoo.org Signed-off-by: Daniel Lezcano Signed-off-by: Sasha Levin --- drivers/thermal/qcom/tsens-v0_1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thermal/qcom/tsens-v0_1.c b/drivers/thermal/qcom/tsens-v0_1.c index 4ffa2e2c0145..9b8ba429a304 100644 --- a/drivers/thermal/qcom/tsens-v0_1.c +++ b/drivers/thermal/qcom/tsens-v0_1.c @@ -522,7 +522,7 @@ static const struct tsens_ops ops_8939 = { struct tsens_plat_data data_8939 = { .num_sensors = 10, .ops = &ops_8939, - .hw_ids = (unsigned int []){ 0, 1, 2, 4, 5, 6, 7, 8, 9, 10 }, + .hw_ids = (unsigned int []){ 0, 1, 2, 3, 5, 6, 7, 8, 9, 10 }, .feat = &tsens_v0_1_feat, .fields = tsens_v0_1_regfields, From 110146ce8f84f8f0ef6f09d36e7c5936d5e2f0cf Mon Sep 17 00:00:00 2001 From: Shuai Xue Date: Sat, 24 Sep 2022 15:49:53 +0800 Subject: [PATCH 0289/1477] ACPI: APEI: do not add task_work to kernel thread to avoid memory leak [ Upstream commit 415fed694fe11395df56e05022d6e7cee1d39dd3 ] If an error is detected as a result of user-space process accessing a corrupt memory location, the CPU may take an abort. Then the platform firmware reports kernel via NMI like notifications, e.g. NOTIFY_SEA, NOTIFY_SOFTWARE_DELEGATED, etc. For NMI like notifications, commit 7f17b4a121d0 ("ACPI: APEI: Kick the memory_failure() queue for synchronous errors") keep track of whether memory_failure() work was queued, and make task_work pending to flush out the queue so that the work is processed before return to user-space. The code use init_mm to check whether the error occurs in user space: if (current->mm != &init_mm) The condition is always true, becase _nobody_ ever has "init_mm" as a real VM any more. In addition to abort, errors can also be signaled as asynchronous exceptions, such as interrupt and SError. In such case, the interrupted current process could be any kind of thread. When a kernel thread is interrupted, the work ghes_kick_task_work deferred to task_work will never be processed because entry_handler returns to call ret_to_kernel() instead of ret_to_user(). Consequently, the estatus_node alloced from ghes_estatus_pool in ghes_in_nmi_queue_one_entry() will not be freed. After around 200 allocations in our platform, the ghes_estatus_pool will run of memory and ghes_in_nmi_queue_one_entry() returns ENOMEM. As a result, the event failed to be processed. sdei: event 805 on CPU 113 failed with error: -2 Finally, a lot of unhandled events may cause platform firmware to exceed some threshold and reboot. The condition should generally just do if (current->mm) as described in active_mm.rst documentation. Then if an asynchronous error is detected when a kernel thread is running, (e.g. when detected by a background scrubber), do not add task_work to it as the original patch intends to do. Fixes: 7f17b4a121d0 ("ACPI: APEI: Kick the memory_failure() queue for synchronous errors") Signed-off-by: Shuai Xue Reviewed-by: Tony Luck Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/apei/ghes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 0c8330ed1ffd..5206fd3b7867 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -985,7 +985,7 @@ static void ghes_proc_in_irq(struct irq_work *irq_work) ghes_estatus_cache_add(generic, estatus); } - if (task_work_pending && current->mm != &init_mm) { + if (task_work_pending && current->mm) { estatus_node->task_work.func = ghes_kick_task_work; estatus_node->task_work_cpu = smp_processor_id(); ret = task_work_add(current, &estatus_node->task_work, From ecbd95958c484780afa6f71b73c3dbdc647cc6ca Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Mon, 5 Sep 2022 12:59:17 +0800 Subject: [PATCH 0290/1477] f2fs: fix race condition on setting FI_NO_EXTENT flag [ Upstream commit 07725adc55c0a414c10acb5c8c86cea34b95ddef ] The following scenarios exist. process A: process B: ->f2fs_drop_extent_tree ->f2fs_update_extent_cache_range ->f2fs_update_extent_tree_range ->write_lock ->set_inode_flag ->is_inode_flag_set ->__free_extent_tree // Shouldn't // have been // cleaned up // here ->write_lock In this case, the "FI_NO_EXTENT" flag is set between f2fs_update_extent_tree_range and is_inode_flag_set by other process. it leads to clearing the whole exten tree which should not have happened. And we fix it by move the setting it to the range of write_lock. Fixes:5f281fab9b9a3 ("f2fs: disable extent_cache for fcollapse/finsert inodes") Signed-off-by: Zhang Qilong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/extent_cache.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 3ebf976a682d..bd16c78b5bf2 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -762,9 +762,8 @@ void f2fs_drop_extent_tree(struct inode *inode) if (!f2fs_may_extent_tree(inode)) return; - set_inode_flag(inode, FI_NO_EXTENT); - write_lock(&et->lock); + set_inode_flag(inode, FI_NO_EXTENT); __free_extent_tree(sbi, et); if (et->largest.len) { et->largest.len = 0; From 0b8230d44ce763ed3811e8f18a77c9b8cdc9bf3c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 25 Nov 2020 10:57:36 +0800 Subject: [PATCH 0291/1477] f2fs: fix to avoid REQ_TIME and CP_TIME collision [ Upstream commit 493720a4854343b7c3fe100cda6a3a2c3f8d4b5d ] Lei Li reported a issue: if foreground operations are frequent, background checkpoint may be always skipped due to below check, result in losing more data after sudden power-cut. f2fs_balance_fs_bg() ... if (!is_idle(sbi, REQ_TIME) && (!excess_dirty_nats(sbi) && !excess_dirty_nodes(sbi))) return; E.g: cp_interval = 5 second idle_interval = 2 second foreground operation interval = 1 second (append 1 byte per second into file) In such case, no matter when it calls f2fs_balance_fs_bg(), is_idle(, REQ_TIME) returns false, result in skipping background checkpoint. This patch changes as below to make trigger condition being more reasonable: - trigger sync_fs() if dirty_{nats,nodes} and prefree segs exceeds threshold; - skip triggering sync_fs() if there is any background inflight IO or there is foreground operation recently and meanwhile cp_rwsem is being held by someone; Reported-by: Lei Li Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Stable-dep-of: d80afefb17e0 ("f2fs: fix to account FS_CP_DATA_IO correctly") Signed-off-by: Sasha Levin --- fs/f2fs/f2fs.h | 19 +++++++++++++------ fs/f2fs/segment.c | 43 +++++++++++++++++++++++++------------------ 2 files changed, 38 insertions(+), 24 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index dbe9fcef07e3..70fec13d35b7 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2426,24 +2426,31 @@ static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep, return entry; } -static inline bool is_idle(struct f2fs_sb_info *sbi, int type) +static inline bool is_inflight_io(struct f2fs_sb_info *sbi, int type) { - if (sbi->gc_mode == GC_URGENT_HIGH) - return true; - if (get_pages(sbi, F2FS_RD_DATA) || get_pages(sbi, F2FS_RD_NODE) || get_pages(sbi, F2FS_RD_META) || get_pages(sbi, F2FS_WB_DATA) || get_pages(sbi, F2FS_WB_CP_DATA) || get_pages(sbi, F2FS_DIO_READ) || get_pages(sbi, F2FS_DIO_WRITE)) - return false; + return true; if (type != DISCARD_TIME && SM_I(sbi) && SM_I(sbi)->dcc_info && atomic_read(&SM_I(sbi)->dcc_info->queued_discard)) - return false; + return true; if (SM_I(sbi) && SM_I(sbi)->fcc_info && atomic_read(&SM_I(sbi)->fcc_info->queued_flush)) + return true; + return false; +} + +static inline bool is_idle(struct f2fs_sb_info *sbi, int type) +{ + if (sbi->gc_mode == GC_URGENT_HIGH) + return true; + + if (is_inflight_io(sbi, type)) return false; if (sbi->gc_mode == GC_URGENT_LOW && diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 19224e7d2ad0..173161f1ced0 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -536,31 +536,38 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg) else f2fs_build_free_nids(sbi, false, false); - if (!is_idle(sbi, REQ_TIME) && - (!excess_dirty_nats(sbi) && !excess_dirty_nodes(sbi))) + if (excess_dirty_nats(sbi) || excess_dirty_nodes(sbi) || + excess_prefree_segs(sbi)) + goto do_sync; + + /* there is background inflight IO or foreground operation recently */ + if (is_inflight_io(sbi, REQ_TIME) || + (!f2fs_time_over(sbi, REQ_TIME) && rwsem_is_locked(&sbi->cp_rwsem))) return; + /* exceed periodical checkpoint timeout threshold */ + if (f2fs_time_over(sbi, CP_TIME)) + goto do_sync; + /* checkpoint is the only way to shrink partial cached entries */ - if (!f2fs_available_free_memory(sbi, NAT_ENTRIES) || - !f2fs_available_free_memory(sbi, INO_ENTRIES) || - excess_prefree_segs(sbi) || - excess_dirty_nats(sbi) || - excess_dirty_nodes(sbi) || - f2fs_time_over(sbi, CP_TIME)) { - if (test_opt(sbi, DATA_FLUSH) && from_bg) { - struct blk_plug plug; + if (f2fs_available_free_memory(sbi, NAT_ENTRIES) || + f2fs_available_free_memory(sbi, INO_ENTRIES)) + return; - mutex_lock(&sbi->flush_lock); +do_sync: + if (test_opt(sbi, DATA_FLUSH) && from_bg) { + struct blk_plug plug; - blk_start_plug(&plug); - f2fs_sync_dirty_inodes(sbi, FILE_INODE); - blk_finish_plug(&plug); + mutex_lock(&sbi->flush_lock); - mutex_unlock(&sbi->flush_lock); - } - f2fs_sync_fs(sbi->sb, true); - stat_inc_bg_cp_count(sbi->stat_info); + blk_start_plug(&plug); + f2fs_sync_dirty_inodes(sbi, FILE_INODE); + blk_finish_plug(&plug); + + mutex_unlock(&sbi->flush_lock); } + f2fs_sync_fs(sbi->sb, true); + stat_inc_bg_cp_count(sbi->stat_info); } static int __submit_flush_wait(struct f2fs_sb_info *sbi, From b60aa21e2f3ab0cfccefcd3f4963d184ec801eea Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 14 Sep 2022 21:28:46 +0800 Subject: [PATCH 0292/1477] f2fs: fix to account FS_CP_DATA_IO correctly [ Upstream commit d80afefb17e01aa0c46a8eebc01882e0ebd8b0f6 ] f2fs_inode_info.cp_task was introduced for FS_CP_DATA_IO accounting since commit b0af6d491a6b ("f2fs: add app/fs io stat"). However, cp_task usage coverage has been increased due to below commits: commit 040d2bb318d1 ("f2fs: fix to avoid deadloop if data_flush is on") commit 186857c5a14a ("f2fs: fix potential recursive call when enabling data_flush") So that, if data_flush mountoption is on, when data flush was triggered from background, the IO from data flush will be accounted as checkpoint IO type incorrectly. In order to fix this issue, this patch splits cp_task into two: a) cp_task: used for IO accounting b) wb_task: used to avoid deadlock Fixes: 040d2bb318d1 ("f2fs: fix to avoid deadloop if data_flush is on") Fixes: 186857c5a14a ("f2fs: fix potential recursive call when enabling data_flush") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/checkpoint.c | 13 +++++++++---- fs/f2fs/data.c | 4 ++-- fs/f2fs/f2fs.h | 4 +++- fs/f2fs/segment.c | 2 +- 4 files changed, 15 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 0653c54873b5..cd46a64ace1b 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1047,7 +1047,8 @@ void f2fs_remove_dirty_inode(struct inode *inode) spin_unlock(&sbi->inode_lock[type]); } -int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type) +int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type, + bool from_cp) { struct list_head *head; struct inode *inode; @@ -1082,11 +1083,15 @@ retry: if (inode) { unsigned long cur_ino = inode->i_ino; - F2FS_I(inode)->cp_task = current; + if (from_cp) + F2FS_I(inode)->cp_task = current; + F2FS_I(inode)->wb_task = current; filemap_fdatawrite(inode->i_mapping); - F2FS_I(inode)->cp_task = NULL; + F2FS_I(inode)->wb_task = NULL; + if (from_cp) + F2FS_I(inode)->cp_task = NULL; iput(inode); /* We need to give cpu to another writers. */ @@ -1215,7 +1220,7 @@ retry_flush_dents: /* write all the dirty dentry pages */ if (get_pages(sbi, F2FS_DIRTY_DENTS)) { f2fs_unlock_all(sbi); - err = f2fs_sync_dirty_inodes(sbi, DIR_INODE); + err = f2fs_sync_dirty_inodes(sbi, DIR_INODE, true); if (err) return err; cond_resched(); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index b2016fd3a7ca..9270330ec5ce 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2912,7 +2912,7 @@ out: } unlock_page(page); if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) && - !F2FS_I(inode)->cp_task && allow_balance) + !F2FS_I(inode)->wb_task && allow_balance) f2fs_balance_fs(sbi, need_balance_fs); if (unlikely(f2fs_cp_error(sbi))) { @@ -3210,7 +3210,7 @@ static inline bool __should_serialize_io(struct inode *inode, struct writeback_control *wbc) { /* to avoid deadlock in path of data flush */ - if (F2FS_I(inode)->cp_task) + if (F2FS_I(inode)->wb_task) return false; if (!S_ISREG(inode->i_mode)) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 70fec13d35b7..c03fdda1bddf 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -701,6 +701,7 @@ struct f2fs_inode_info { unsigned int clevel; /* maximum level of given file name */ struct task_struct *task; /* lookup and create consistency */ struct task_struct *cp_task; /* separate cp/wb IO stats*/ + struct task_struct *wb_task; /* indicate inode is in context of writeback */ nid_t i_xattr_nid; /* node id that contains xattrs */ loff_t last_disk_size; /* lastly written file size */ spinlock_t i_size_lock; /* protect last_disk_size */ @@ -3400,7 +3401,8 @@ int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi); int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi); void f2fs_update_dirty_page(struct inode *inode, struct page *page); void f2fs_remove_dirty_inode(struct inode *inode); -int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type); +int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type, + bool from_cp); void f2fs_wait_on_all_pages(struct f2fs_sb_info *sbi, int type); int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc); void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 173161f1ced0..3123fd49c8ce 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -561,7 +561,7 @@ do_sync: mutex_lock(&sbi->flush_lock); blk_start_plug(&plug); - f2fs_sync_dirty_inodes(sbi, FILE_INODE); + f2fs_sync_dirty_inodes(sbi, FILE_INODE, false); blk_finish_plug(&plug); mutex_unlock(&sbi->flush_lock); From 278d8ba2b288e8f7928f2333f8611a5f0fdbdd68 Mon Sep 17 00:00:00 2001 From: Stefan Berger Date: Tue, 20 Sep 2022 09:15:18 -0400 Subject: [PATCH 0293/1477] selftest: tpm2: Add Client.__del__() to close /dev/tpm* handle [ Upstream commit 2d869f0b458547386fbcd8cf3004b271b7347b7f ] The following output can bee seen when the test is executed: test_flush_context (tpm2_tests.SpaceTest) ... \ /usr/lib64/python3.6/unittest/case.py:605: ResourceWarning: \ unclosed file <_io.FileIO name='/dev/tpmrm0' mode='rb+' closefd=True> An instance of Client does not implicitly close /dev/tpm* handle, once it gets destroyed. Close the file handle in the class destructor Client.__del__(). Fixes: 6ea3dfe1e0732 ("selftests: add TPM 2.0 tests") Cc: Shuah Khan Cc: linux-kselftest@vger.kernel.org Cc: Jarkko Sakkinen Signed-off-by: Stefan Berger Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Sasha Levin --- tools/testing/selftests/tpm2/tpm2.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/tpm2/tpm2.py b/tools/testing/selftests/tpm2/tpm2.py index f34486cd7342..3e67fdb518ec 100644 --- a/tools/testing/selftests/tpm2/tpm2.py +++ b/tools/testing/selftests/tpm2/tpm2.py @@ -370,6 +370,10 @@ class Client: fcntl.fcntl(self.tpm, fcntl.F_SETFL, flags) self.tpm_poll = select.poll() + def __del__(self): + if self.tpm: + self.tpm.close() + def close(self): self.tpm.close() From 36d4ffbedff718a0701234069ab3b38d5dd32799 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 22 Jun 2022 13:47:11 +0200 Subject: [PATCH 0294/1477] rcu: Back off upon fill_page_cache_func() allocation failure [ Upstream commit 093590c16b447f53e66771c8579ae66c96f6ef61 ] The fill_page_cache_func() function allocates couple of pages to store kvfree_rcu_bulk_data structures. This is a lightweight (GFP_NORETRY) allocation which can fail under memory pressure. The function will, however keep retrying even when the previous attempt has failed. This retrying is in theory correct, but in practice the allocation is invoked from workqueue context, which means that if the memory reclaim gets stuck, these retries can hog the worker for quite some time. Although the workqueues subsystem automatically adjusts concurrency, such adjustment is not guaranteed to happen until the worker context sleeps. And the fill_page_cache_func() function's retry loop is not guaranteed to sleep (see the should_reclaim_retry() function). And we have seen this function cause workqueue lockups: kernel: BUG: workqueue lockup - pool cpus=93 node=1 flags=0x1 nice=0 stuck for 32s! [...] kernel: pool 74: cpus=37 node=0 flags=0x1 nice=0 hung=32s workers=2 manager: 2146 kernel: pwq 498: cpus=249 node=1 flags=0x1 nice=0 active=4/256 refcnt=5 kernel: in-flight: 1917:fill_page_cache_func kernel: pending: dbs_work_handler, free_work, kfree_rcu_monitor Originally, we thought that the root cause of this lockup was several retries with direct reclaim, but this is not yet confirmed. Furthermore, we have seen similar lockups without any heavy memory pressure. This suggests that there are other factors contributing to these lockups. However, it is not really clear that endless retries are desireable. So let's make the fill_page_cache_func() function back off after allocation failure. Cc: Uladzislau Rezki (Sony) Cc: "Paul E. McKenney" Cc: Frederic Weisbecker Cc: Neeraj Upadhyay Cc: Josh Triplett Cc: Steven Rostedt Cc: Mathieu Desnoyers Cc: Lai Jiangshan Cc: Joel Fernandes Signed-off-by: Michal Hocko Reviewed-by: Uladzislau Rezki (Sony) Signed-off-by: Paul E. McKenney Signed-off-by: Sasha Levin --- kernel/rcu/tree.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index b41009a283ca..b10d6bcea77d 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3393,15 +3393,16 @@ static void fill_page_cache_func(struct work_struct *work) bnode = (struct kvfree_rcu_bulk_data *) __get_free_page(GFP_KERNEL | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); - if (bnode) { - raw_spin_lock_irqsave(&krcp->lock, flags); - pushed = put_cached_bnode(krcp, bnode); - raw_spin_unlock_irqrestore(&krcp->lock, flags); + if (!bnode) + break; - if (!pushed) { - free_page((unsigned long) bnode); - break; - } + raw_spin_lock_irqsave(&krcp->lock, flags); + pushed = put_cached_bnode(krcp, bnode); + raw_spin_unlock_irqrestore(&krcp->lock, flags); + + if (!pushed) { + free_page((unsigned long) bnode); + break; } } From 0dd025483f150f329842c36ef8c3a24fb60f3bbb Mon Sep 17 00:00:00 2001 From: Zqiang Date: Tue, 12 Jul 2022 16:26:05 +0800 Subject: [PATCH 0295/1477] rcu-tasks: Convert RCU_LOCKDEP_WARN() to WARN_ONCE() [ Upstream commit fcd53c8a4dfa38bafb89efdd0b0f718f3a03f884 ] Kernels built with CONFIG_PROVE_RCU=y and CONFIG_DEBUG_LOCK_ALLOC=y attempt to emit a warning when the synchronize_rcu_tasks_generic() function is called during early boot while the rcu_scheduler_active variable is RCU_SCHEDULER_INACTIVE. However the warnings is not actually be printed because the debug_lockdep_rcu_enabled() returns false, exactly because the rcu_scheduler_active variable is still equal to RCU_SCHEDULER_INACTIVE. This commit therefore replaces RCU_LOCKDEP_WARN() with WARN_ONCE() to force these warnings to actually be printed. Signed-off-by: Zqiang Signed-off-by: Paul E. McKenney Signed-off-by: Sasha Levin --- kernel/rcu/tasks.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 14af29fe1377..8b51e6a5b386 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -171,7 +171,7 @@ static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func, static void synchronize_rcu_tasks_generic(struct rcu_tasks *rtp) { /* Complain if the scheduler has not started. */ - RCU_LOCKDEP_WARN(rcu_scheduler_active == RCU_SCHEDULER_INACTIVE, + WARN_ONCE(rcu_scheduler_active == RCU_SCHEDULER_INACTIVE, "synchronize_rcu_tasks called too soon"); /* Wait for the grace period. */ From 6c61a37ea70e18998158fa57554eb9e571e3f3aa Mon Sep 17 00:00:00 2001 From: Arvid Norlander Date: Wed, 24 Aug 2022 20:49:50 +0200 Subject: [PATCH 0296/1477] ACPI: video: Add Toshiba Satellite/Portege Z830 quirk [ Upstream commit 574160b8548deff8b80b174f03201e94ab8431e2 ] Toshiba Satellite Z830 needs the quirk video_disable_backlight_sysfs_if for proper backlight control after suspend/resume cycles. Toshiba Portege Z830 is simply the same laptop rebranded for certain markets (I looked through the manual to other language sections to confirm this) and thus also needs this quirk. Thanks to Hans de Goede for suggesting this fix. Link: https://www.spinics.net/lists/platform-driver-x86/msg34394.html Suggested-by: Hans de Goede Signed-off-by: Arvid Norlander Reviewed-by: Hans de Goede Tested-by: Arvid Norlander Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/acpi_video.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c index eb04b2f828ee..cf6c9ffe04a2 100644 --- a/drivers/acpi/acpi_video.c +++ b/drivers/acpi/acpi_video.c @@ -498,6 +498,22 @@ static const struct dmi_system_id video_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "SATELLITE R830"), }, }, + { + .callback = video_disable_backlight_sysfs_if, + .ident = "Toshiba Satellite Z830", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"), + DMI_MATCH(DMI_PRODUCT_NAME, "SATELLITE Z830"), + }, + }, + { + .callback = video_disable_backlight_sysfs_if, + .ident = "Toshiba Portege Z830", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"), + DMI_MATCH(DMI_PRODUCT_NAME, "PORTEGE Z830"), + }, + }, /* * Some machine's _DOD IDs don't have bit 31(Device ID Scheme) set * but the IDs actually follow the Device ID Scheme. From b434edb0e9d159741f909eeb20843ae008e30e62 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 7 Sep 2022 16:05:56 -0700 Subject: [PATCH 0297/1477] MIPS: BCM47XX: Cast memcmp() of function to (void *) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 0dedcf6e3301836eb70cfa649052e7ce4fcd13ba ] Clang is especially sensitive about argument type matching when using __overloaded functions (like memcmp(), etc). Help it see that function pointers are just "void *". Avoids this error: arch/mips/bcm47xx/prom.c:89:8: error: no matching function for call to 'memcmp' if (!memcmp(prom_init, prom_init + mem, 32)) ^~~~~~ include/linux/string.h:156:12: note: candidate function not viable: no known conversion from 'void (void)' to 'const void *' for 1st argument extern int memcmp(const void *,const void *,__kernel_size_t); Cc: Hauke Mehrtens Cc: "Rafał Miłecki" Cc: Thomas Bogendoerfer Cc: linux-mips@vger.kernel.org Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: llvm@lists.linux.dev Reported-by: kernel test robot Link: https://lore.kernel.org/lkml/202209080652.sz2d68e5-lkp@intel.com Signed-off-by: Kees Cook Signed-off-by: Thomas Bogendoerfer Signed-off-by: Sasha Levin --- arch/mips/bcm47xx/prom.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/mips/bcm47xx/prom.c b/arch/mips/bcm47xx/prom.c index 3e2a8166377f..22509b5fab74 100644 --- a/arch/mips/bcm47xx/prom.c +++ b/arch/mips/bcm47xx/prom.c @@ -86,7 +86,7 @@ static __init void prom_init_mem(void) pr_debug("Assume 128MB RAM\n"); break; } - if (!memcmp(prom_init, prom_init + mem, 32)) + if (!memcmp((void *)prom_init, (void *)prom_init + mem, 32)) break; } lowmem = mem; @@ -163,7 +163,7 @@ void __init bcm47xx_prom_highmem_init(void) off = EXTVBASE + __pa(off); for (extmem = 128 << 20; extmem < 512 << 20; extmem <<= 1) { - if (!memcmp(prom_init, (void *)(off + extmem), 16)) + if (!memcmp((void *)prom_init, (void *)(off + extmem), 16)) break; } extmem -= lowmem; From 708b9abe1b4a2f050a483db4b7edfc446b13df1f Mon Sep 17 00:00:00 2001 From: Chao Qin Date: Tue, 20 Sep 2022 14:08:26 +0800 Subject: [PATCH 0298/1477] powercap: intel_rapl: fix UBSAN shift-out-of-bounds issue [ Upstream commit 2d93540014387d1c73b9ccc4d7895320df66d01b ] When value < time_unit, the parameter of ilog2() will be zero and the return value is -1. u64(-1) is too large for shift exponent and then will trigger shift-out-of-bounds: shift exponent 18446744073709551615 is too large for 32-bit type 'int' Call Trace: rapl_compute_time_window_core rapl_write_data_raw set_time_window store_constraint_time_window_us Signed-off-by: Chao Qin Acked-by: Zhang Rui Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/powercap/intel_rapl_common.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index a13a07f475d2..285420c1eb7c 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -938,6 +938,9 @@ static u64 rapl_compute_time_window_core(struct rapl_package *rp, u64 value, y = value & 0x1f; value = (1 << y) * (4 + f) * rp->time_unit / 4; } else { + if (value < rp->time_unit) + return 0; + do_div(value, rp->time_unit); y = ilog2(value); f = div64_u64(4 * (value - (1 << y)), 1 << y); From 513943bf879d45005213e6f5cfb7d9e9943f589f Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 20 Sep 2022 04:06:57 -0700 Subject: [PATCH 0299/1477] thermal: intel_powerclamp: Use get_cpu() instead of smp_processor_id() to avoid crash [ Upstream commit 68b99e94a4a2db6ba9b31fe0485e057b9354a640 ] When CPU 0 is offline and intel_powerclamp is used to inject idle, it generates kernel BUG: BUG: using smp_processor_id() in preemptible [00000000] code: bash/15687 caller is debug_smp_processor_id+0x17/0x20 CPU: 4 PID: 15687 Comm: bash Not tainted 5.19.0-rc7+ #57 Call Trace: dump_stack_lvl+0x49/0x63 dump_stack+0x10/0x16 check_preemption_disabled+0xdd/0xe0 debug_smp_processor_id+0x17/0x20 powerclamp_set_cur_state+0x7f/0xf9 [intel_powerclamp] ... ... Here CPU 0 is the control CPU by default and changed to the current CPU, if CPU 0 offlined. This check has to be performed under cpus_read_lock(), hence the above warning. Use get_cpu() instead of smp_processor_id() to avoid this BUG. Suggested-by: Chen Yu Signed-off-by: Srinivas Pandruvada [ rjw: Subject edits ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/thermal/intel/intel_powerclamp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c index b0eb5ece9243..14381f7587ff 100644 --- a/drivers/thermal/intel/intel_powerclamp.c +++ b/drivers/thermal/intel/intel_powerclamp.c @@ -532,8 +532,10 @@ static int start_power_clamp(void) /* prefer BSP */ control_cpu = 0; - if (!cpu_online(control_cpu)) - control_cpu = smp_processor_id(); + if (!cpu_online(control_cpu)) { + control_cpu = get_cpu(); + put_cpu(); + } clamping = true; schedule_delayed_work(&poll_pkg_cstate_work, 0); From 1f730d4ae6f9ea8aa3b5e0c6c338fe8903b4647d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 19 Sep 2022 19:45:14 -0700 Subject: [PATCH 0300/1477] x86/entry: Work around Clang __bdos() bug [ Upstream commit 3e1730842f142add55dc658929221521a9ea62b6 ] Clang produces a false positive when building with CONFIG_FORTIFY_SOURCE=y and CONFIG_UBSAN_BOUNDS=y when operating on an array with a dynamic offset. Work around this by using a direct assignment of an empty instance. Avoids this warning: ../include/linux/fortify-string.h:309:4: warning: call to __write_overflow_field declared with 'warn ing' attribute: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Wat tribute-warning] __write_overflow_field(p_size_field, size); ^ which was isolated to the memset() call in xen_load_idt(). Note that this looks very much like another bug that was worked around: https://github.com/ClangBuiltLinux/linux/issues/1592 Cc: Juergen Gross Cc: Boris Ostrovsky Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: x86@kernel.org Cc: "H. Peter Anvin" Cc: xen-devel@lists.xenproject.org Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/lkml/41527d69-e8ab-3f86-ff37-6b298c01d5bc@oracle.com Signed-off-by: Kees Cook Signed-off-by: Sasha Levin --- arch/x86/xen/enlighten_pv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 804c65d2b95f..815030b7f6fa 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -768,6 +768,7 @@ static void xen_load_idt(const struct desc_ptr *desc) { static DEFINE_SPINLOCK(lock); static struct trap_info traps[257]; + static const struct trap_info zero = { }; unsigned out; trace_xen_cpu_load_idt(desc); @@ -777,7 +778,7 @@ static void xen_load_idt(const struct desc_ptr *desc) memcpy(this_cpu_ptr(&idt_desc), desc, sizeof(idt_desc)); out = xen_convert_trap_info(desc, traps, false); - memset(&traps[out], 0, sizeof(traps[0])); + traps[out] = zero; xen_mc_flush(); if (HYPERVISOR_set_trap_table(traps)) From 3de402a5248a9de841d47b5a11b5dbe5c30da682 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 13 Sep 2022 14:01:50 -0400 Subject: [PATCH 0301/1477] NFSD: Return nfserr_serverfault if splice_ok but buf->pages have data [ Upstream commit 06981d560606ac48d61e5f4fff6738b925c93173 ] This was discussed with Chuck as part of this patch set. Returning nfserr_resource was decided to not be the best error message here, and he suggested changing to nfserr_serverfault instead. Signed-off-by: Anna Schumaker Link: https://lore.kernel.org/linux-nfs/20220907195259.926736-1-anna@kernel.org/T/#t Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- fs/nfsd/nfs4xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 46f825cf53f4..cc605ee0b2fa 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3871,7 +3871,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, if (resp->xdr.buf->page_len && test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) { WARN_ON_ONCE(1); - return nfserr_resource; + return nfserr_serverfault; } xdr_commit_encode(xdr); From bbacfcde5fff25ac22597e8373a065c647da6738 Mon Sep 17 00:00:00 2001 From: Dai Ngo Date: Mon, 26 Sep 2022 10:59:16 -0700 Subject: [PATCH 0302/1477] NFSD: fix use-after-free on source server when doing inter-server copy [ Upstream commit 019805fea91599b22dfa62ffb29c022f35abeb06 ] Use-after-free occurred when the laundromat tried to free expired cpntf_state entry on the s2s_cp_stateids list after inter-server copy completed. The sc_cp_list that the expired copy state was inserted on was already freed. When COPY completes, the Linux client normally sends LOCKU(lock_state x), FREE_STATEID(lock_state x) and CLOSE(open_state y) to the source server. The nfs4_put_stid call from nfsd4_free_stateid cleans up the copy state from the s2s_cp_stateids list before freeing the lock state's stid. However, sometimes the CLOSE was sent before the FREE_STATEID request. When this happens, the nfsd4_close_open_stateid call from nfsd4_close frees all lock states on its st_locks list without cleaning up the copy state on the sc_cp_list list. When the time the FREE_STATEID arrives the server returns BAD_STATEID since the lock state was freed. This causes the use-after-free error to occur when the laundromat tries to free the expired cpntf_state. This patch adds a call to nfs4_free_cpntf_statelist in nfsd4_close_open_stateid to clean up the copy state before calling free_ol_stateid_reaplist to free the lock state's stid on the reaplist. Signed-off-by: Dai Ngo Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- fs/nfsd/nfs4state.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index f1b503bec222..665d0eaeb8db 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -843,6 +843,7 @@ static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp) static void nfs4_free_deleg(struct nfs4_stid *stid) { + WARN_ON(!list_empty(&stid->sc_cp_list)); kmem_cache_free(deleg_slab, stid); atomic_long_dec(&num_delegations); } @@ -1358,6 +1359,7 @@ static void nfs4_free_ol_stateid(struct nfs4_stid *stid) release_all_access(stp); if (stp->st_stateowner) nfs4_put_stateowner(stp->st_stateowner); + WARN_ON(!list_empty(&stid->sc_cp_list)); kmem_cache_free(stateid_slab, stid); } @@ -6207,6 +6209,7 @@ static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s) struct nfs4_client *clp = s->st_stid.sc_client; bool unhashed; LIST_HEAD(reaplist); + struct nfs4_ol_stateid *stp; spin_lock(&clp->cl_lock); unhashed = unhash_open_stateid(s, &reaplist); @@ -6215,6 +6218,8 @@ static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s) if (unhashed) put_ol_stateid_locked(s, &reaplist); spin_unlock(&clp->cl_lock); + list_for_each_entry(stp, &reaplist, st_locks) + nfs4_free_cpntf_statelist(clp->net, &stp->st_stid); free_ol_stateid_reaplist(&reaplist); } else { spin_unlock(&clp->cl_lock); From 50e45034c5802cedbf5b707364ea76ace29ad984 Mon Sep 17 00:00:00 2001 From: Wright Feng Date: Fri, 22 Jul 2022 13:56:28 +0200 Subject: [PATCH 0303/1477] wifi: brcmfmac: fix invalid address access when enabling SCAN log level MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit aa666b68e73fc06d83c070d96180b9010cf5a960 ] The variable i is changed when setting random MAC address and causes invalid address access when printing the value of pi->reqs[i]->reqid. We replace reqs index with ri to fix the issue. [ 136.726473] Unable to handle kernel access to user memory outside uaccess routines at virtual address 0000000000000000 [ 136.737365] Mem abort info: [ 136.740172] ESR = 0x96000004 [ 136.743359] Exception class = DABT (current EL), IL = 32 bits [ 136.749294] SET = 0, FnV = 0 [ 136.752481] EA = 0, S1PTW = 0 [ 136.755635] Data abort info: [ 136.758514] ISV = 0, ISS = 0x00000004 [ 136.762487] CM = 0, WnR = 0 [ 136.765522] user pgtable: 4k pages, 48-bit VAs, pgdp = 000000005c4e2577 [ 136.772265] [0000000000000000] pgd=0000000000000000 [ 136.777160] Internal error: Oops: 96000004 [#1] PREEMPT SMP [ 136.782732] Modules linked in: brcmfmac(O) brcmutil(O) cfg80211(O) compat(O) [ 136.789788] Process wificond (pid: 3175, stack limit = 0x00000000053048fb) [ 136.796664] CPU: 3 PID: 3175 Comm: wificond Tainted: G O 4.19.42-00001-g531a5f5 #1 [ 136.805532] Hardware name: Freescale i.MX8MQ EVK (DT) [ 136.810584] pstate: 60400005 (nZCv daif +PAN -UAO) [ 136.815429] pc : brcmf_pno_config_sched_scans+0x6cc/0xa80 [brcmfmac] [ 136.821811] lr : brcmf_pno_config_sched_scans+0x67c/0xa80 [brcmfmac] [ 136.828162] sp : ffff00000e9a3880 [ 136.831475] x29: ffff00000e9a3890 x28: ffff800020543400 [ 136.836786] x27: ffff8000b1008880 x26: ffff0000012bf6a0 [ 136.842098] x25: ffff80002054345c x24: ffff800088d22400 [ 136.847409] x23: ffff0000012bf638 x22: ffff0000012bf6d8 [ 136.852721] x21: ffff8000aced8fc0 x20: ffff8000ac164400 [ 136.858032] x19: ffff00000e9a3946 x18: 0000000000000000 [ 136.863343] x17: 0000000000000000 x16: 0000000000000000 [ 136.868655] x15: ffff0000093f3b37 x14: 0000000000000050 [ 136.873966] x13: 0000000000003135 x12: 0000000000000000 [ 136.879277] x11: 0000000000000000 x10: ffff000009a61888 [ 136.884589] x9 : 000000000000000f x8 : 0000000000000008 [ 136.889900] x7 : 303a32303d726464 x6 : ffff00000a1f957d [ 136.895211] x5 : 0000000000000000 x4 : ffff00000e9a3942 [ 136.900523] x3 : 0000000000000000 x2 : ffff0000012cead8 [ 136.905834] x1 : ffff0000012bf6d8 x0 : 0000000000000000 [ 136.911146] Call trace: [ 136.913623] brcmf_pno_config_sched_scans+0x6cc/0xa80 [brcmfmac] [ 136.919658] brcmf_pno_start_sched_scan+0xa4/0x118 [brcmfmac] [ 136.925430] brcmf_cfg80211_sched_scan_start+0x80/0xe0 [brcmfmac] [ 136.931636] nl80211_start_sched_scan+0x140/0x308 [cfg80211] [ 136.937298] genl_rcv_msg+0x358/0x3f4 [ 136.940960] netlink_rcv_skb+0xb4/0x118 [ 136.944795] genl_rcv+0x34/0x48 [ 136.947935] netlink_unicast+0x264/0x300 [ 136.951856] netlink_sendmsg+0x2e4/0x33c [ 136.955781] __sys_sendto+0x120/0x19c Signed-off-by: Wright Feng Signed-off-by: Chi-hsien Lin Signed-off-by: Ahmad Fatoum Signed-off-by: Alvin Šipraga Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20220722115632.620681-4-alvin@pqrs.dk Signed-off-by: Sasha Levin --- .../net/wireless/broadcom/brcm80211/brcmfmac/pno.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pno.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pno.c index fabfbb0b40b0..d0a7465be586 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pno.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pno.c @@ -158,12 +158,12 @@ static int brcmf_pno_set_random(struct brcmf_if *ifp, struct brcmf_pno_info *pi) struct brcmf_pno_macaddr_le pfn_mac; u8 *mac_addr = NULL; u8 *mac_mask = NULL; - int err, i; + int err, i, ri; - for (i = 0; i < pi->n_reqs; i++) - if (pi->reqs[i]->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) { - mac_addr = pi->reqs[i]->mac_addr; - mac_mask = pi->reqs[i]->mac_addr_mask; + for (ri = 0; ri < pi->n_reqs; ri++) + if (pi->reqs[ri]->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) { + mac_addr = pi->reqs[ri]->mac_addr; + mac_mask = pi->reqs[ri]->mac_addr_mask; break; } @@ -185,7 +185,7 @@ static int brcmf_pno_set_random(struct brcmf_if *ifp, struct brcmf_pno_info *pi) pfn_mac.mac[0] |= 0x02; brcmf_dbg(SCAN, "enabling random mac: reqid=%llu mac=%pM\n", - pi->reqs[i]->reqid, pfn_mac.mac); + pi->reqs[ri]->reqid, pfn_mac.mac); err = brcmf_fil_iovar_data_set(ifp, "pfn_macaddr", &pfn_mac, sizeof(pfn_mac)); if (err) From e3c9b94734531a08c9bf51057ca3a9022cc90f91 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Mon, 15 Aug 2022 17:22:05 +0100 Subject: [PATCH 0304/1477] bpftool: Clear errno after libcap's checks [ Upstream commit cea558855c39b7f1f02ff50dcf701ca6596bc964 ] When bpftool is linked against libcap, the library runs a "constructor" function to compute the number of capabilities of the running kernel [0], at the beginning of the execution of the program. As part of this, it performs multiple calls to prctl(). Some of these may fail, and set errno to a non-zero value: # strace -e prctl ./bpftool version prctl(PR_CAPBSET_READ, CAP_MAC_OVERRIDE) = 1 prctl(PR_CAPBSET_READ, 0x30 /* CAP_??? */) = -1 EINVAL (Invalid argument) prctl(PR_CAPBSET_READ, CAP_CHECKPOINT_RESTORE) = 1 prctl(PR_CAPBSET_READ, 0x2c /* CAP_??? */) = -1 EINVAL (Invalid argument) prctl(PR_CAPBSET_READ, 0x2a /* CAP_??? */) = -1 EINVAL (Invalid argument) prctl(PR_CAPBSET_READ, 0x29 /* CAP_??? */) = -1 EINVAL (Invalid argument) ** fprintf added at the top of main(): we have errno == 1 ./bpftool v7.0.0 using libbpf v1.0 features: libbfd, libbpf_strict, skeletons +++ exited with 0 +++ This has been addressed in libcap 2.63 [1], but until this version is available everywhere, we can fix it on bpftool side. Let's clean errno at the beginning of the main() function, to make sure that these checks do not interfere with the batch mode, where we error out if errno is set after a bpftool command. [0] https://git.kernel.org/pub/scm/libs/libcap/libcap.git/tree/libcap/cap_alloc.c?h=libcap-2.65#n20 [1] https://git.kernel.org/pub/scm/libs/libcap/libcap.git/commit/?id=f25a1b7e69f7b33e6afb58b3e38f3450b7d2d9a0 Signed-off-by: Quentin Monnet Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220815162205.45043-1-quentin@isovalent.com Signed-off-by: Sasha Levin --- tools/bpf/bpftool/main.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 1854d6b97860..4fd4e3462ebc 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -398,6 +398,16 @@ int main(int argc, char **argv) setlinebuf(stdout); +#ifdef USE_LIBCAP + /* Libcap < 2.63 hooks before main() to compute the number of + * capabilities of the running kernel, and doing so it calls prctl() + * which may fail and set errno to non-zero. + * Let's reset errno to make sure this does not interfere with the + * batch mode. + */ + errno = 0; +#endif + last_do_help = do_help; pretty_output = false; json_output = false; From 4398e8a7fd6abf9cdfc69ee2a75434bf47c2a210 Mon Sep 17 00:00:00 2001 From: Mike Pattrick Date: Wed, 17 Aug 2022 11:06:34 -0400 Subject: [PATCH 0305/1477] openvswitch: Fix double reporting of drops in dropwatch [ Upstream commit 1100248a5c5ccd57059eb8d02ec077e839a23826 ] Frames sent to userspace can be reported as dropped in ovs_dp_process_packet, however, if they are dropped in the netlink code then netlink_attachskb will report the same frame as dropped. This patch checks for error codes which indicate that the frame has already been freed. Signed-off-by: Mike Pattrick Link: https://bugzilla.redhat.com/show_bug.cgi?id=2109946 Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/openvswitch/datapath.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 9d6ef6cb9b26..4d2d91d6f990 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -241,10 +241,17 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) upcall.portid = ovs_vport_find_upcall_portid(p, skb); upcall.mru = OVS_CB(skb)->mru; error = ovs_dp_upcall(dp, skb, key, &upcall, 0); - if (unlikely(error)) - kfree_skb(skb); - else + switch (error) { + case 0: + case -EAGAIN: + case -ERESTARTSYS: + case -EINTR: consume_skb(skb); + break; + default: + kfree_skb(skb); + break; + } stats_counter = &stats->n_missed; goto out; } From 129ca0db956e1e2483f55d2217756f9258f52737 Mon Sep 17 00:00:00 2001 From: Mike Pattrick Date: Wed, 17 Aug 2022 11:06:35 -0400 Subject: [PATCH 0306/1477] openvswitch: Fix overreporting of drops in dropwatch [ Upstream commit c21ab2afa2c64896a7f0e3cbc6845ec63dcfad2e ] Currently queue_userspace_packet will call kfree_skb for all frames, whether or not an error occurred. This can result in a single dropped frame being reported as multiple drops in dropwatch. This functions caller may also call kfree_skb in case of an error. This patch will consume the skbs instead and allow caller's to use kfree_skb. Signed-off-by: Mike Pattrick Link: https://bugzilla.redhat.com/show_bug.cgi?id=2109957 Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/openvswitch/datapath.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 4d2d91d6f990..6b5c0abf7f1b 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -544,8 +544,9 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, out: if (err) skb_tx_error(skb); - kfree_skb(user_skb); - kfree_skb(nskb); + consume_skb(user_skb); + consume_skb(nskb); + return err; } From 0958e487e81bdb5ca658a3b6e72c8252d6e163f9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 22 Aug 2022 21:15:28 +0000 Subject: [PATCH 0307/1477] tcp: annotate data-race around tcp_md5sig_pool_populated [ Upstream commit aacd467c0a576e5e44d2de4205855dc0fe43f6fb ] tcp_md5sig_pool_populated can be read while another thread changes its value. The race has no consequence because allocations are protected with tcp_md5sig_mutex. This patch adds READ_ONCE() and WRITE_ONCE() to document the race and silence KCSAN. Reported-by: Abhishek Shah Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/tcp.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 24328ad00278..b0aa7cc69d51 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4043,12 +4043,16 @@ static void __tcp_alloc_md5sig_pool(void) * to memory. See smp_rmb() in tcp_get_md5sig_pool() */ smp_wmb(); - tcp_md5sig_pool_populated = true; + /* Paired with READ_ONCE() from tcp_alloc_md5sig_pool() + * and tcp_get_md5sig_pool(). + */ + WRITE_ONCE(tcp_md5sig_pool_populated, true); } bool tcp_alloc_md5sig_pool(void) { - if (unlikely(!tcp_md5sig_pool_populated)) { + /* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */ + if (unlikely(!READ_ONCE(tcp_md5sig_pool_populated))) { mutex_lock(&tcp_md5sig_mutex); if (!tcp_md5sig_pool_populated) { @@ -4059,7 +4063,8 @@ bool tcp_alloc_md5sig_pool(void) mutex_unlock(&tcp_md5sig_mutex); } - return tcp_md5sig_pool_populated; + /* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */ + return READ_ONCE(tcp_md5sig_pool_populated); } EXPORT_SYMBOL(tcp_alloc_md5sig_pool); @@ -4075,7 +4080,8 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) { local_bh_disable(); - if (tcp_md5sig_pool_populated) { + /* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */ + if (READ_ONCE(tcp_md5sig_pool_populated)) { /* coupled with smp_wmb() in __tcp_alloc_md5sig_pool() */ smp_rmb(); return this_cpu_ptr(&tcp_md5sig_pool); From 9661724f6206bd606ecf13acada676a9975d230b Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Tue, 16 Aug 2022 23:46:13 +0900 Subject: [PATCH 0308/1477] wifi: ath9k: avoid uninit memory read in ath9k_htc_rx_msg() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit b383e8abed41cc6ff1a3b34de75df9397fa4878c ] syzbot is reporting uninit value at ath9k_htc_rx_msg() [1], for ioctl(USB_RAW_IOCTL_EP_WRITE) can call ath9k_hif_usb_rx_stream() with pkt_len = 0 but ath9k_hif_usb_rx_stream() uses __dev_alloc_skb(pkt_len + 32, GFP_ATOMIC) based on an assumption that pkt_len is valid. As a result, ath9k_hif_usb_rx_stream() allocates skb with uninitialized memory and ath9k_htc_rx_msg() is reading from uninitialized memory. Since bytes accessed by ath9k_htc_rx_msg() is not known until ath9k_htc_rx_msg() is called, it would be difficult to check minimal valid pkt_len at "if (pkt_len > 2 * MAX_RX_BUF_SIZE) {" line in ath9k_hif_usb_rx_stream(). We have two choices. One is to workaround by adding __GFP_ZERO so that ath9k_htc_rx_msg() sees 0 if pkt_len is invalid. The other is to let ath9k_htc_rx_msg() validate pkt_len before accessing. This patch chose the latter. Note that I'm not sure threshold condition is correct, for I can't find details on possible packet length used by this protocol. Link: https://syzkaller.appspot.com/bug?extid=2ca247c2d60c7023de7f [1] Reported-by: syzbot Signed-off-by: Tetsuo Handa Acked-by: Toke Høiland-Jørgensen Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/7acfa1be-4b5c-b2ce-de43-95b0593fb3e5@I-love.SAKURA.ne.jp Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath9k/htc_hst.c | 43 +++++++++++++++--------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/htc_hst.c b/drivers/net/wireless/ath/ath9k/htc_hst.c index 994ec48b2f66..ca05b07a45e6 100644 --- a/drivers/net/wireless/ath/ath9k/htc_hst.c +++ b/drivers/net/wireless/ath/ath9k/htc_hst.c @@ -364,33 +364,27 @@ ret: } static void ath9k_htc_fw_panic_report(struct htc_target *htc_handle, - struct sk_buff *skb) + struct sk_buff *skb, u32 len) { uint32_t *pattern = (uint32_t *)skb->data; - switch (*pattern) { - case 0x33221199: - { + if (*pattern == 0x33221199 && len >= sizeof(struct htc_panic_bad_vaddr)) { struct htc_panic_bad_vaddr *htc_panic; htc_panic = (struct htc_panic_bad_vaddr *) skb->data; dev_err(htc_handle->dev, "ath: firmware panic! " "exccause: 0x%08x; pc: 0x%08x; badvaddr: 0x%08x.\n", htc_panic->exccause, htc_panic->pc, htc_panic->badvaddr); - break; - } - case 0x33221299: - { + return; + } + if (*pattern == 0x33221299) { struct htc_panic_bad_epid *htc_panic; htc_panic = (struct htc_panic_bad_epid *) skb->data; dev_err(htc_handle->dev, "ath: firmware panic! " "bad epid: 0x%08x\n", htc_panic->epid); - break; - } - default: - dev_err(htc_handle->dev, "ath: unknown panic pattern!\n"); - break; + return; } + dev_err(htc_handle->dev, "ath: unknown panic pattern!\n"); } /* @@ -411,16 +405,26 @@ void ath9k_htc_rx_msg(struct htc_target *htc_handle, if (!htc_handle || !skb) return; + /* A valid message requires len >= 8. + * + * sizeof(struct htc_frame_hdr) == 8 + * sizeof(struct htc_ready_msg) == 8 + * sizeof(struct htc_panic_bad_vaddr) == 16 + * sizeof(struct htc_panic_bad_epid) == 8 + */ + if (unlikely(len < sizeof(struct htc_frame_hdr))) + goto invalid; htc_hdr = (struct htc_frame_hdr *) skb->data; epid = htc_hdr->endpoint_id; if (epid == 0x99) { - ath9k_htc_fw_panic_report(htc_handle, skb); + ath9k_htc_fw_panic_report(htc_handle, skb, len); kfree_skb(skb); return; } if (epid < 0 || epid >= ENDPOINT_MAX) { +invalid: if (pipe_id != USB_REG_IN_PIPE) dev_kfree_skb_any(skb); else @@ -432,21 +436,30 @@ void ath9k_htc_rx_msg(struct htc_target *htc_handle, /* Handle trailer */ if (htc_hdr->flags & HTC_FLAGS_RECV_TRAILER) { - if (be32_to_cpu(*(__be32 *) skb->data) == 0x00C60000) + if (be32_to_cpu(*(__be32 *) skb->data) == 0x00C60000) { /* Move past the Watchdog pattern */ htc_hdr = (struct htc_frame_hdr *)(skb->data + 4); + len -= 4; + } } /* Get the message ID */ + if (unlikely(len < sizeof(struct htc_frame_hdr) + sizeof(__be16))) + goto invalid; msg_id = (__be16 *) ((void *) htc_hdr + sizeof(struct htc_frame_hdr)); /* Now process HTC messages */ switch (be16_to_cpu(*msg_id)) { case HTC_MSG_READY_ID: + if (unlikely(len < sizeof(struct htc_ready_msg))) + goto invalid; htc_process_target_rdy(htc_handle, htc_hdr); break; case HTC_MSG_CONNECT_SERVICE_RESPONSE_ID: + if (unlikely(len < sizeof(struct htc_frame_hdr) + + sizeof(struct htc_conn_svc_rspmsg))) + goto invalid; htc_process_conn_rsp(htc_handle, htc_hdr); break; default: From be81c44242b20fc3bdcc73480ef8aaee56f5d0b6 Mon Sep 17 00:00:00 2001 From: Khalid Masum Date: Thu, 1 Sep 2022 13:12:10 +0600 Subject: [PATCH 0309/1477] xfrm: Update ipcomp_scratches with NULL when freed [ Upstream commit 8a04d2fc700f717104bfb95b0f6694e448a4537f ] Currently if ipcomp_alloc_scratches() fails to allocate memory ipcomp_scratches holds obsolete address. So when we try to free the percpu scratches using ipcomp_free_scratches() it tries to vfree non existent vm area. Described below: static void * __percpu *ipcomp_alloc_scratches(void) { ... scratches = alloc_percpu(void *); if (!scratches) return NULL; ipcomp_scratches does not know about this allocation failure. Therefore holding the old obsolete address. ... } So when we free, static void ipcomp_free_scratches(void) { ... scratches = ipcomp_scratches; Assigning obsolete address from ipcomp_scratches if (!scratches) return; for_each_possible_cpu(i) vfree(*per_cpu_ptr(scratches, i)); Trying to free non existent page, causing warning: trying to vfree existent vm area. ... } Fix this breakage by updating ipcomp_scrtches with NULL when scratches is freed Suggested-by: Herbert Xu Reported-by: syzbot+5ec9bb042ddfe9644773@syzkaller.appspotmail.com Tested-by: syzbot+5ec9bb042ddfe9644773@syzkaller.appspotmail.com Signed-off-by: Khalid Masum Acked-by: Herbert Xu Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/xfrm/xfrm_ipcomp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c index 0814320472f1..24ac6805275e 100644 --- a/net/xfrm/xfrm_ipcomp.c +++ b/net/xfrm/xfrm_ipcomp.c @@ -212,6 +212,7 @@ static void ipcomp_free_scratches(void) vfree(*per_cpu_ptr(scratches, i)); free_percpu(scratches); + ipcomp_scratches = NULL; } static void * __percpu *ipcomp_alloc_scratches(void) From e01d96494a9de0f48b1167f0494f6d929fa773ed Mon Sep 17 00:00:00 2001 From: Alexander Coffin Date: Mon, 8 Aug 2022 10:49:26 -0700 Subject: [PATCH 0310/1477] wifi: brcmfmac: fix use-after-free bug in brcmf_netdev_start_xmit() [ Upstream commit 3f42faf6db431e04bf942d2ebe3ae88975723478 ] > ret = brcmf_proto_tx_queue_data(drvr, ifp->ifidx, skb); may be schedule, and then complete before the line > ndev->stats.tx_bytes += skb->len; [ 46.912801] ================================================================== [ 46.920552] BUG: KASAN: use-after-free in brcmf_netdev_start_xmit+0x718/0x8c8 [brcmfmac] [ 46.928673] Read of size 4 at addr ffffff803f5882e8 by task systemd-resolve/328 [ 46.935991] [ 46.937514] CPU: 1 PID: 328 Comm: systemd-resolve Tainted: G O 5.4.199-[REDACTED] #1 [ 46.947255] Hardware name: [REDACTED] [ 46.954568] Call trace: [ 46.957037] dump_backtrace+0x0/0x2b8 [ 46.960719] show_stack+0x24/0x30 [ 46.964052] dump_stack+0x128/0x194 [ 46.967557] print_address_description.isra.0+0x64/0x380 [ 46.972877] __kasan_report+0x1d4/0x240 [ 46.976723] kasan_report+0xc/0x18 [ 46.980138] __asan_report_load4_noabort+0x18/0x20 [ 46.985027] brcmf_netdev_start_xmit+0x718/0x8c8 [brcmfmac] [ 46.990613] dev_hard_start_xmit+0x1bc/0xda0 [ 46.994894] sch_direct_xmit+0x198/0xd08 [ 46.998827] __qdisc_run+0x37c/0x1dc0 [ 47.002500] __dev_queue_xmit+0x1528/0x21f8 [ 47.006692] dev_queue_xmit+0x24/0x30 [ 47.010366] neigh_resolve_output+0x37c/0x678 [ 47.014734] ip_finish_output2+0x598/0x2458 [ 47.018927] __ip_finish_output+0x300/0x730 [ 47.023118] ip_output+0x2e0/0x430 [ 47.026530] ip_local_out+0x90/0x140 [ 47.030117] igmpv3_sendpack+0x14c/0x228 [ 47.034049] igmpv3_send_cr+0x384/0x6b8 [ 47.037895] igmp_ifc_timer_expire+0x4c/0x118 [ 47.042262] call_timer_fn+0x1cc/0xbe8 [ 47.046021] __run_timers+0x4d8/0xb28 [ 47.049693] run_timer_softirq+0x24/0x40 [ 47.053626] __do_softirq+0x2c0/0x117c [ 47.057387] irq_exit+0x2dc/0x388 [ 47.060715] __handle_domain_irq+0xb4/0x158 [ 47.064908] gic_handle_irq+0x58/0xb0 [ 47.068581] el0_irq_naked+0x50/0x5c [ 47.072162] [ 47.073665] Allocated by task 328: [ 47.077083] save_stack+0x24/0xb0 [ 47.080410] __kasan_kmalloc.isra.0+0xc0/0xe0 [ 47.084776] kasan_slab_alloc+0x14/0x20 [ 47.088622] kmem_cache_alloc+0x15c/0x468 [ 47.092643] __alloc_skb+0xa4/0x498 [ 47.096142] igmpv3_newpack+0x158/0xd78 [ 47.099987] add_grhead+0x210/0x288 [ 47.103485] add_grec+0x6b0/0xb70 [ 47.106811] igmpv3_send_cr+0x2e0/0x6b8 [ 47.110657] igmp_ifc_timer_expire+0x4c/0x118 [ 47.115027] call_timer_fn+0x1cc/0xbe8 [ 47.118785] __run_timers+0x4d8/0xb28 [ 47.122457] run_timer_softirq+0x24/0x40 [ 47.126389] __do_softirq+0x2c0/0x117c [ 47.130142] [ 47.131643] Freed by task 180: [ 47.134712] save_stack+0x24/0xb0 [ 47.138041] __kasan_slab_free+0x108/0x180 [ 47.142146] kasan_slab_free+0x10/0x18 [ 47.145904] slab_free_freelist_hook+0xa4/0x1b0 [ 47.150444] kmem_cache_free+0x8c/0x528 [ 47.154292] kfree_skbmem+0x94/0x108 [ 47.157880] consume_skb+0x10c/0x5a8 [ 47.161466] __dev_kfree_skb_any+0x88/0xa0 [ 47.165598] brcmu_pkt_buf_free_skb+0x44/0x68 [brcmutil] [ 47.171023] brcmf_txfinalize+0xec/0x190 [brcmfmac] [ 47.176016] brcmf_proto_bcdc_txcomplete+0x1c0/0x210 [brcmfmac] [ 47.182056] brcmf_sdio_sendfromq+0x8dc/0x1e80 [brcmfmac] [ 47.187568] brcmf_sdio_dpc+0xb48/0x2108 [brcmfmac] [ 47.192529] brcmf_sdio_dataworker+0xc8/0x238 [brcmfmac] [ 47.197859] process_one_work+0x7fc/0x1a80 [ 47.201965] worker_thread+0x31c/0xc40 [ 47.205726] kthread+0x2d8/0x370 [ 47.208967] ret_from_fork+0x10/0x18 [ 47.212546] [ 47.214051] The buggy address belongs to the object at ffffff803f588280 [ 47.214051] which belongs to the cache skbuff_head_cache of size 208 [ 47.227086] The buggy address is located 104 bytes inside of [ 47.227086] 208-byte region [ffffff803f588280, ffffff803f588350) [ 47.238814] The buggy address belongs to the page: [ 47.243618] page:ffffffff00dd6200 refcount:1 mapcount:0 mapping:ffffff804b6bf800 index:0xffffff803f589900 compound_mapcount: 0 [ 47.255007] flags: 0x10200(slab|head) [ 47.258689] raw: 0000000000010200 ffffffff00dfa980 0000000200000002 ffffff804b6bf800 [ 47.266439] raw: ffffff803f589900 0000000080190018 00000001ffffffff 0000000000000000 [ 47.274180] page dumped because: kasan: bad access detected [ 47.279752] [ 47.281251] Memory state around the buggy address: [ 47.286051] ffffff803f588180: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 47.293277] ffffff803f588200: fb fb fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 47.300502] >ffffff803f588280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 47.307723] ^ [ 47.314343] ffffff803f588300: fb fb fb fb fb fb fb fb fb fb fc fc fc fc fc fc [ 47.321569] ffffff803f588380: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb [ 47.328789] ================================================================== Signed-off-by: Alexander Coffin Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20220808174925.3922558-1-alex.coffin@matician.com Signed-off-by: Sasha Levin --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c index 61039538a15b..c8e1d505f7b5 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c @@ -290,6 +290,7 @@ static netdev_tx_t brcmf_netdev_start_xmit(struct sk_buff *skb, struct brcmf_pub *drvr = ifp->drvr; struct ethhdr *eh; int head_delta; + unsigned int tx_bytes = skb->len; brcmf_dbg(DATA, "Enter, bsscfgidx=%d\n", ifp->bsscfgidx); @@ -364,7 +365,7 @@ done: ndev->stats.tx_dropped++; } else { ndev->stats.tx_packets++; - ndev->stats.tx_bytes += skb->len; + ndev->stats.tx_bytes += tx_bytes; } /* Return ok: we always eat the packet */ From b051d9bf98bd9cea312b228e264eb6542a9beb67 Mon Sep 17 00:00:00 2001 From: Patrick Rudolph Date: Fri, 9 Sep 2022 14:59:53 +0200 Subject: [PATCH 0311/1477] regulator: core: Prevent integer underflow [ Upstream commit 8d8e16592022c9650df8aedfe6552ed478d7135b ] By using a ratio of delay to poll_enabled_time that is not integer time_remaining underflows and does not exit the loop as expected. As delay could be derived from DT and poll_enabled_time is defined in the driver this can easily happen. Use a signed iterator to make sure that the loop exits once the remaining time is negative. Signed-off-by: Patrick Rudolph Link: https://lore.kernel.org/r/20220909125954.577669-1-patrick.rudolph@9elements.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/regulator/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 317d701487ec..bf8ba73d6c7c 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -2544,7 +2544,7 @@ static int _regulator_do_enable(struct regulator_dev *rdev) * expired, return -ETIMEDOUT. */ if (rdev->desc->poll_enabled_time) { - unsigned int time_remaining = delay; + int time_remaining = delay; while (time_remaining > 0) { _regulator_enable_delay(rdev->desc->poll_enabled_time); From e25ca9af8a139422e347a3346bb9a03a10ae73f1 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sun, 4 Sep 2022 00:32:56 +0900 Subject: [PATCH 0312/1477] Bluetooth: L2CAP: initialize delayed works at l2cap_chan_create() [ Upstream commit 2d2cb3066f2c90cd8ca540b36ba7a55e7f2406e0 ] syzbot is reporting cancel_delayed_work() without INIT_DELAYED_WORK() at l2cap_chan_del() [1], for CONF_NOT_COMPLETE flag (which meant to prevent l2cap_chan_del() from calling cancel_delayed_work()) is cleared by timer which fires before l2cap_chan_del() is called by closing file descriptor created by socket(AF_BLUETOOTH, SOCK_STREAM, BTPROTO_L2CAP). l2cap_bredr_sig_cmd(L2CAP_CONF_REQ) and l2cap_bredr_sig_cmd(L2CAP_CONF_RSP) are calling l2cap_ertm_init(chan), and they call l2cap_chan_ready() (which clears CONF_NOT_COMPLETE flag) only when l2cap_ertm_init(chan) succeeded. l2cap_sock_init() does not call l2cap_ertm_init(chan), and it instead sets CONF_NOT_COMPLETE flag by calling l2cap_chan_set_defaults(). However, when connect() is requested, "command 0x0409 tx timeout" happens after 2 seconds from connect() request, and CONF_NOT_COMPLETE flag is cleared after 4 seconds from connect() request, for l2cap_conn_start() from l2cap_info_timeout() callback scheduled by schedule_delayed_work(&conn->info_timer, L2CAP_INFO_TIMEOUT); in l2cap_connect() is calling l2cap_chan_ready(). Fix this problem by initializing delayed works used by L2CAP_MODE_ERTM mode as soon as l2cap_chan_create() allocates a channel, like I did in commit be8597239379f0f5 ("Bluetooth: initialize skb_queue_head at l2cap_chan_create()"). Link: https://syzkaller.appspot.com/bug?extid=83672956c7aa6af698b3 [1] Reported-by: syzbot Signed-off-by: Tetsuo Handa Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/l2cap_core.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 0c38af2ff209..8d5029c81ee7 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -61,6 +61,9 @@ static void l2cap_send_disconn_req(struct l2cap_chan *chan, int err); static void l2cap_tx(struct l2cap_chan *chan, struct l2cap_ctrl *control, struct sk_buff_head *skbs, u8 event); +static void l2cap_retrans_timeout(struct work_struct *work); +static void l2cap_monitor_timeout(struct work_struct *work); +static void l2cap_ack_timeout(struct work_struct *work); static inline u8 bdaddr_type(u8 link_type, u8 bdaddr_type) { @@ -476,6 +479,9 @@ struct l2cap_chan *l2cap_chan_create(void) write_unlock(&chan_list_lock); INIT_DELAYED_WORK(&chan->chan_timer, l2cap_chan_timeout); + INIT_DELAYED_WORK(&chan->retrans_timer, l2cap_retrans_timeout); + INIT_DELAYED_WORK(&chan->monitor_timer, l2cap_monitor_timeout); + INIT_DELAYED_WORK(&chan->ack_timer, l2cap_ack_timeout); chan->state = BT_OPEN; @@ -3316,10 +3322,6 @@ int l2cap_ertm_init(struct l2cap_chan *chan) chan->rx_state = L2CAP_RX_STATE_RECV; chan->tx_state = L2CAP_TX_STATE_XMIT; - INIT_DELAYED_WORK(&chan->retrans_timer, l2cap_retrans_timeout); - INIT_DELAYED_WORK(&chan->monitor_timer, l2cap_monitor_timeout); - INIT_DELAYED_WORK(&chan->ack_timer, l2cap_ack_timeout); - skb_queue_head_init(&chan->srej_q); err = l2cap_seq_list_init(&chan->srej_list, chan->tx_win); From 7b674dce4162bb46d396586e30e4653427023875 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 19 Sep 2022 10:56:59 -0700 Subject: [PATCH 0313/1477] Bluetooth: hci_sysfs: Fix attempting to call device_add multiple times [ Upstream commit 448a496f760664d3e2e79466aa1787e6abc922b5 ] device_add shall not be called multiple times as stated in its documentation: 'Do not call this routine or device_register() more than once for any device structure' Syzkaller reports a bug as follows [1]: ------------[ cut here ]------------ kernel BUG at lib/list_debug.c:33! invalid opcode: 0000 [#1] PREEMPT SMP KASAN [...] Call Trace: __list_add include/linux/list.h:69 [inline] list_add_tail include/linux/list.h:102 [inline] kobj_kset_join lib/kobject.c:164 [inline] kobject_add_internal+0x18f/0x8f0 lib/kobject.c:214 kobject_add_varg lib/kobject.c:358 [inline] kobject_add+0x150/0x1c0 lib/kobject.c:410 device_add+0x368/0x1e90 drivers/base/core.c:3452 hci_conn_add_sysfs+0x9b/0x1b0 net/bluetooth/hci_sysfs.c:53 hci_le_cis_estabilished_evt+0x57c/0xae0 net/bluetooth/hci_event.c:6799 hci_le_meta_evt+0x2b8/0x510 net/bluetooth/hci_event.c:7110 hci_event_func net/bluetooth/hci_event.c:7440 [inline] hci_event_packet+0x63d/0xfd0 net/bluetooth/hci_event.c:7495 hci_rx_work+0xae7/0x1230 net/bluetooth/hci_core.c:4007 process_one_work+0x991/0x1610 kernel/workqueue.c:2289 worker_thread+0x665/0x1080 kernel/workqueue.c:2436 kthread+0x2e4/0x3a0 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:306 Link: https://syzkaller.appspot.com/bug?id=da3246e2d33afdb92d66bc166a0934c5b146404a Signed-off-by: Luiz Augusto von Dentz Tested-by: Hawkins Jiawei Signed-off-by: Sasha Levin --- net/bluetooth/hci_sysfs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index b69d88b88d2e..ccd2c377bf83 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -48,6 +48,9 @@ void hci_conn_add_sysfs(struct hci_conn *conn) BT_DBG("conn %p", conn); + if (device_is_registered(&conn->dev)) + return; + dev_set_name(&conn->dev, "%s:%d", hdev->name, conn->handle); if (device_add(&conn->dev) < 0) { From 3d67986e72087733390e50936722a60dd4139639 Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Thu, 15 Sep 2022 09:55:56 +0800 Subject: [PATCH 0314/1477] can: bcm: check the result of can_send() in bcm_can_tx() [ Upstream commit 3fd7bfd28cfd68ae80a2fe92ea1615722cc2ee6e ] If can_send() fail, it should not update frames_abs counter in bcm_can_tx(). Add the result check for can_send() in bcm_can_tx(). Suggested-by: Marc Kleine-Budde Suggested-by: Oliver Hartkopp Signed-off-by: Ziyang Xuan Link: https://lore.kernel.org/all/9851878e74d6d37aee2f1ee76d68361a46f89458.1663206163.git.william.xuanziyang@huawei.com Acked-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- net/can/bcm.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/can/bcm.c b/net/can/bcm.c index e918a0f3cda2..afa82adaf6cd 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -274,6 +274,7 @@ static void bcm_can_tx(struct bcm_op *op) struct sk_buff *skb; struct net_device *dev; struct canfd_frame *cf = op->frames + op->cfsiz * op->currframe; + int err; /* no target device? => exit */ if (!op->ifindex) @@ -298,11 +299,11 @@ static void bcm_can_tx(struct bcm_op *op) /* send with loopback */ skb->dev = dev; can_skb_set_owner(skb, op->sk); - can_send(skb, 1); + err = can_send(skb, 1); + if (!err) + op->frames_abs++; - /* update statistics */ op->currframe++; - op->frames_abs++; /* reached last frame? */ if (op->currframe >= op->nframes) From 27ed98e8a9b053ef893e7191cbe1a73057928c57 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Sat, 17 Sep 2022 21:28:29 +0100 Subject: [PATCH 0315/1477] wifi: rt2x00: don't run Rt5592 IQ calibration on MT7620 [ Upstream commit d3aad83d05aec0cfd7670cf0028f2ad4b81de92e ] The function rt2800_iq_calibrate is intended for Rt5592 only. Don't call it for MT7620 which has it's own calibration functions. Reported-by: Serge Vasilugin Signed-off-by: Daniel Golle Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/31a1c34ddbd296b82f38c18c9ae7339059215fdc.1663445157.git.daniel@makrotopia.org Signed-off-by: Sasha Levin --- drivers/net/wireless/ralink/rt2x00/rt2800lib.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c index fed6d21cd6ce..3f2c10c2aaf8 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c @@ -4352,7 +4352,8 @@ static void rt2800_config_channel(struct rt2x00_dev *rt2x00dev, reg = (rf->channel <= 14 ? 0x1c : 0x24) + 2*rt2x00dev->lna_gain; rt2800_bbp_write_with_rx_chain(rt2x00dev, 66, reg); - rt2800_iq_calibrate(rt2x00dev, rf->channel); + if (rt2x00_rt(rt2x00dev, RT5592)) + rt2800_iq_calibrate(rt2x00dev, rf->channel); } bbp = rt2800_bbp_read(rt2x00dev, 4); From 8d9c00979a7e1607b4403378cf016ecc118cbeea Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Sat, 17 Sep 2022 21:29:26 +0100 Subject: [PATCH 0316/1477] wifi: rt2x00: set correct TX_SW_CFG1 MAC register for MT7620 [ Upstream commit eeb50acf15762b61921f9df18663f839f387c054 ] Set correct TX_SW_CFG1 MAC register as it is done also in v3 of the vendor driver[1]. [1]: https://gitlab.com/dm38/padavan-ng/-/blob/master/trunk/proprietary/rt_wifi/rtpci/3.0.X.X/mt76x2/chips/rt6352.c#L531 Reported-by: Serge Vasilugin Signed-off-by: Daniel Golle Acked-by: Stanislaw Gruszka Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/4be38975ce600a34249e12d09a3cb758c6e71071.1663445157.git.daniel@makrotopia.org Signed-off-by: Sasha Levin --- drivers/net/wireless/ralink/rt2x00/rt2800lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c index 3f2c10c2aaf8..327f19cae4d7 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c @@ -5849,7 +5849,7 @@ static int rt2800_init_registers(struct rt2x00_dev *rt2x00dev) rt2800_register_write(rt2x00dev, TX_SW_CFG0, 0x00000404); } else if (rt2x00_rt(rt2x00dev, RT6352)) { rt2800_register_write(rt2x00dev, TX_SW_CFG0, 0x00000401); - rt2800_register_write(rt2x00dev, TX_SW_CFG1, 0x000C0000); + rt2800_register_write(rt2x00dev, TX_SW_CFG1, 0x000C0001); rt2800_register_write(rt2x00dev, TX_SW_CFG2, 0x00000000); rt2800_register_write(rt2x00dev, TX_ALC_VGA3, 0x00000000); rt2800_register_write(rt2x00dev, TX0_BB_GAIN_ATTEN, 0x0); From 5aa0461d11806599cf3b42bea1a3e68656bd0dd2 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Sat, 17 Sep 2022 21:29:40 +0100 Subject: [PATCH 0317/1477] wifi: rt2x00: set VGC gain for both chains of MT7620 [ Upstream commit 0e09768c085709e10ece3b68f6ac921d3f6a9caa ] Set bbp66 for all chains of the MT7620. Reported-by: Serge Vasilugin Signed-off-by: Daniel Golle Acked-by: Stanislaw Gruszka Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/29e161397e5c9d9399da0fe87d44458aa2b90a78.1663445157.git.daniel@makrotopia.org Signed-off-by: Sasha Levin --- drivers/net/wireless/ralink/rt2x00/rt2800lib.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c index 327f19cae4d7..94e5c3c373ba 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c @@ -5626,7 +5626,8 @@ static inline void rt2800_set_vgc(struct rt2x00_dev *rt2x00dev, if (qual->vgc_level != vgc_level) { if (rt2x00_rt(rt2x00dev, RT3572) || rt2x00_rt(rt2x00dev, RT3593) || - rt2x00_rt(rt2x00dev, RT3883)) { + rt2x00_rt(rt2x00dev, RT3883) || + rt2x00_rt(rt2x00dev, RT6352)) { rt2800_bbp_write_with_rx_chain(rt2x00dev, 66, vgc_level); } else if (rt2x00_rt(rt2x00dev, RT5592)) { From a01614447954733a020ae15260d19f8e89a1ad1b Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Sat, 17 Sep 2022 21:29:55 +0100 Subject: [PATCH 0318/1477] wifi: rt2x00: set SoC wmac clock register [ Upstream commit cbde6ed406a51092d9e8a2df058f5f8490f27443 ] Instead of using the default value 33 (pci), set US_CYC_CNT init based on Programming guide: If available, set chipset bus clock with fallback to cpu clock/3. Reported-by: Serge Vasilugin Signed-off-by: Daniel Golle Acked-by: Stanislaw Gruszka Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/3e275d259f476f597dab91a9c395015ef3fe3284.1663445157.git.daniel@makrotopia.org Signed-off-by: Sasha Levin --- .../net/wireless/ralink/rt2x00/rt2800lib.c | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c index 94e5c3c373ba..f237fc17dedc 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c @@ -6112,6 +6112,27 @@ static int rt2800_init_registers(struct rt2x00_dev *rt2x00dev) reg = rt2800_register_read(rt2x00dev, US_CYC_CNT); rt2x00_set_field32(®, US_CYC_CNT_CLOCK_CYCLE, 125); rt2800_register_write(rt2x00dev, US_CYC_CNT, reg); + } else if (rt2x00_is_soc(rt2x00dev)) { + struct clk *clk = clk_get_sys("bus", NULL); + int rate; + + if (IS_ERR(clk)) { + clk = clk_get_sys("cpu", NULL); + + if (IS_ERR(clk)) { + rate = 125; + } else { + rate = clk_get_rate(clk) / 3000000; + clk_put(clk); + } + } else { + rate = clk_get_rate(clk) / 1000000; + clk_put(clk); + } + + reg = rt2800_register_read(rt2x00dev, US_CYC_CNT); + rt2x00_set_field32(®, US_CYC_CNT_CLOCK_CYCLE, rate); + rt2800_register_write(rt2x00dev, US_CYC_CNT, reg); } reg = rt2800_register_read(rt2x00dev, HT_FBK_CFG0); From 4851303c853919f27310edec80181fd1c4185b41 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Sat, 17 Sep 2022 21:30:09 +0100 Subject: [PATCH 0319/1477] wifi: rt2x00: correctly set BBP register 86 for MT7620 [ Upstream commit c9aada64fe6493461127f1522d7e2f01792d2424 ] Instead of 0 set the correct value for BBP register 86 for MT7620. Reported-by: Serge Vasilugin Signed-off-by: Daniel Golle Acked-by: Stanislaw Gruszka Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/257267247ee4fa7ebc6a5d0c4948b3f8119c0d77.1663445157.git.daniel@makrotopia.org Signed-off-by: Sasha Levin --- drivers/net/wireless/ralink/rt2x00/rt2800lib.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c index f237fc17dedc..4bdd3a95f2d2 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c @@ -4151,7 +4151,10 @@ static void rt2800_config_channel(struct rt2x00_dev *rt2x00dev, rt2800_bbp_write(rt2x00dev, 62, 0x37 - rt2x00dev->lna_gain); rt2800_bbp_write(rt2x00dev, 63, 0x37 - rt2x00dev->lna_gain); rt2800_bbp_write(rt2x00dev, 64, 0x37 - rt2x00dev->lna_gain); - rt2800_bbp_write(rt2x00dev, 86, 0); + if (rt2x00_rt(rt2x00dev, RT6352)) + rt2800_bbp_write(rt2x00dev, 86, 0x38); + else + rt2800_bbp_write(rt2x00dev, 86, 0); } if (rf->channel <= 14) { From 65029aaedd15d9fe5ea1a899134e236d83f627bb Mon Sep 17 00:00:00 2001 From: Liu Jian Date: Tue, 23 Aug 2022 21:37:54 +0800 Subject: [PATCH 0320/1477] net: If sock is dead don't access sock's sk_wq in sk_stream_wait_memory [ Upstream commit 3f8ef65af927db247418d4e1db49164d7a158fc5 ] Fixes the below NULL pointer dereference: [...] [ 14.471200] Call Trace: [ 14.471562] [ 14.471882] lock_acquire+0x245/0x2e0 [ 14.472416] ? remove_wait_queue+0x12/0x50 [ 14.473014] ? _raw_spin_lock_irqsave+0x17/0x50 [ 14.473681] _raw_spin_lock_irqsave+0x3d/0x50 [ 14.474318] ? remove_wait_queue+0x12/0x50 [ 14.474907] remove_wait_queue+0x12/0x50 [ 14.475480] sk_stream_wait_memory+0x20d/0x340 [ 14.476127] ? do_wait_intr_irq+0x80/0x80 [ 14.476704] do_tcp_sendpages+0x287/0x600 [ 14.477283] tcp_bpf_push+0xab/0x260 [ 14.477817] tcp_bpf_sendmsg_redir+0x297/0x500 [ 14.478461] ? __local_bh_enable_ip+0x77/0xe0 [ 14.479096] tcp_bpf_send_verdict+0x105/0x470 [ 14.479729] tcp_bpf_sendmsg+0x318/0x4f0 [ 14.480311] sock_sendmsg+0x2d/0x40 [ 14.480822] ____sys_sendmsg+0x1b4/0x1c0 [ 14.481390] ? copy_msghdr_from_user+0x62/0x80 [ 14.482048] ___sys_sendmsg+0x78/0xb0 [ 14.482580] ? vmf_insert_pfn_prot+0x91/0x150 [ 14.483215] ? __do_fault+0x2a/0x1a0 [ 14.483738] ? do_fault+0x15e/0x5d0 [ 14.484246] ? __handle_mm_fault+0x56b/0x1040 [ 14.484874] ? lock_is_held_type+0xdf/0x130 [ 14.485474] ? find_held_lock+0x2d/0x90 [ 14.486046] ? __sys_sendmsg+0x41/0x70 [ 14.486587] __sys_sendmsg+0x41/0x70 [ 14.487105] ? intel_pmu_drain_pebs_core+0x350/0x350 [ 14.487822] do_syscall_64+0x34/0x80 [ 14.488345] entry_SYSCALL_64_after_hwframe+0x63/0xcd [...] The test scenario has the following flow: thread1 thread2 ----------- --------------- tcp_bpf_sendmsg tcp_bpf_send_verdict tcp_bpf_sendmsg_redir sock_close tcp_bpf_push_locked __sock_release tcp_bpf_push //inet_release do_tcp_sendpages sock->ops->release sk_stream_wait_memory // tcp_close sk_wait_event sk->sk_prot->close release_sock(__sk); *** lock_sock(sk); __tcp_close sock_orphan(sk) sk->sk_wq = NULL release_sock **** lock_sock(__sk); remove_wait_queue(sk_sleep(sk), &wait); sk_sleep(sk) //NULL pointer dereference &rcu_dereference_raw(sk->sk_wq)->wait While waiting for memory in thread1, the socket is released with its wait queue because thread2 has closed it. This caused by tcp_bpf_send_verdict didn't increase the f_count of psock->sk_redir->sk_socket->file in thread1. We should check if SOCK_DEAD flag is set on wakeup in sk_stream_wait_memory before accessing the wait queue. Suggested-by: Jakub Sitnicki Signed-off-by: Liu Jian Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Cc: Eric Dumazet Link: https://lore.kernel.org/bpf/20220823133755.314697-2-liujian56@huawei.com Signed-off-by: Sasha Levin --- net/core/stream.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/stream.c b/net/core/stream.c index a166a32b411f..a61130504827 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -159,7 +159,8 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) *timeo_p = current_timeo; } out: - remove_wait_queue(sk_sleep(sk), &wait); + if (!sock_flag(sk, SOCK_DEAD)) + remove_wait_queue(sk_sleep(sk), &wait); return err; do_error: From 0c108cf3ad386e0084277093b55a351c49e0be27 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 29 Sep 2022 13:27:13 -0700 Subject: [PATCH 0321/1477] Bluetooth: L2CAP: Fix user-after-free [ Upstream commit 35fcbc4243aad7e7d020b7c1dfb14bb888b20a4f ] This uses l2cap_chan_hold_unless_zero() after calling __l2cap_get_chan_blah() to prevent the following trace: Bluetooth: l2cap_core.c:static void l2cap_chan_destroy(struct kref *kref) Bluetooth: chan 0000000023c4974d Bluetooth: parent 00000000ae861c08 ================================================================== BUG: KASAN: use-after-free in __mutex_waiter_is_first kernel/locking/mutex.c:191 [inline] BUG: KASAN: use-after-free in __mutex_lock_common kernel/locking/mutex.c:671 [inline] BUG: KASAN: use-after-free in __mutex_lock+0x278/0x400 kernel/locking/mutex.c:729 Read of size 8 at addr ffff888006a49b08 by task kworker/u3:2/389 Link: https://lore.kernel.org/lkml/20220622082716.478486-1-lee.jones@linaro.org Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sungwoo Kim Signed-off-by: Sasha Levin --- net/bluetooth/l2cap_core.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 8d5029c81ee7..83dd76e9196f 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -4305,6 +4305,12 @@ static int l2cap_connect_create_rsp(struct l2cap_conn *conn, } } + chan = l2cap_chan_hold_unless_zero(chan); + if (!chan) { + err = -EBADSLT; + goto unlock; + } + err = 0; l2cap_chan_lock(chan); @@ -4334,6 +4340,7 @@ static int l2cap_connect_create_rsp(struct l2cap_conn *conn, } l2cap_chan_unlock(chan); + l2cap_chan_put(chan); unlock: mutex_unlock(&conn->chan_lock); From 484400d433ca1903a87268c55f019e932297538a Mon Sep 17 00:00:00 2001 From: Andrew Gaul Date: Sun, 2 Oct 2022 12:41:28 +0900 Subject: [PATCH 0322/1477] r8152: Rate limit overflow messages [ Upstream commit 93e2be344a7db169b7119de21ac1bf253b8c6907 ] My system shows almost 10 million of these messages over a 24-hour period which pollutes my logs. Signed-off-by: Andrew Gaul Link: https://lore.kernel.org/r/20221002034128.2026653-1-gaul@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/usb/r8152.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 0bb5b1c78654..a526242a3e36 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -1689,7 +1689,9 @@ static void intr_callback(struct urb *urb) "Stop submitting intr, status %d\n", status); return; case -EOVERFLOW: - netif_info(tp, intr, tp->netdev, "intr status -EOVERFLOW\n"); + if (net_ratelimit()) + netif_info(tp, intr, tp->netdev, + "intr status -EOVERFLOW\n"); goto resubmit; /* -EPIPE: should clear the halt */ default: From 3339a51bcd89fce33011abf226b17c1c522b5e24 Mon Sep 17 00:00:00 2001 From: Jianglei Nie Date: Tue, 5 Jul 2022 17:43:06 +0800 Subject: [PATCH 0323/1477] drm/nouveau/nouveau_bo: fix potential memory leak in nouveau_bo_alloc() [ Upstream commit 6dc548745d5b5102e3c53dc5097296ac270b6c69 ] nouveau_bo_alloc() allocates a memory chunk for "nvbo" with kzalloc(). When some error occurs, "nvbo" should be released. But when WARN_ON(pi < 0)) equals true, the function return ERR_PTR without releasing the "nvbo", which will lead to a memory leak. We should release the "nvbo" with kfree() if WARN_ON(pi < 0)) equals true. Signed-off-by: Jianglei Nie Signed-off-by: Lyude Paul Reviewed-by: Lyude Paul Link: https://patchwork.freedesktop.org/patch/msgid/20220705094306.2244103-1-niejianglei2021@163.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/nouveau/nouveau_bo.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index b4946b595d86..b57dcad8865f 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -279,8 +279,10 @@ nouveau_bo_alloc(struct nouveau_cli *cli, u64 *size, int *align, u32 domain, break; } - if (WARN_ON(pi < 0)) + if (WARN_ON(pi < 0)) { + kfree(nvbo); return ERR_PTR(-EINVAL); + } /* Disable compression if suitable settings couldn't be found. */ if (nvbo->comp && !vmm->page[pi].comp) { From e7d701800365d2dd3ef2252497dcaed9a2d2fada Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Tue, 5 Jul 2022 12:02:13 +0200 Subject: [PATCH 0324/1477] drm: Use size_t type for len variable in drm_copy_field() [ Upstream commit 94dc3471d1b2b58b3728558d0e3f264e9ce6ff59 ] The strlen() function returns a size_t which is an unsigned int on 32-bit arches and an unsigned long on 64-bit arches. But in the drm_copy_field() function, the strlen() return value is assigned to an 'int len' variable. Later, the len variable is passed as copy_from_user() third argument that is an unsigned long parameter as well. In theory, this can lead to an integer overflow via type conversion. Since the assignment happens to a signed int lvalue instead of a size_t lvalue. In practice though, that's unlikely since the values copied are set by DRM drivers and not controlled by userspace. But using a size_t for len is the correct thing to do anyways. Signed-off-by: Javier Martinez Canillas Tested-by: Peter Robinson Reviewed-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20220705100215.572498-2-javierm@redhat.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index 4606cc938b36..a15d55d06510 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -473,7 +473,7 @@ EXPORT_SYMBOL(drm_invalid_op); */ static int drm_copy_field(char __user *buf, size_t *buf_len, const char *value) { - int len; + size_t len; /* don't overflow userbuf */ len = strlen(value); From c28a8082b25ce4ec94999e10a30c50d20bd44a25 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Tue, 5 Jul 2022 12:02:14 +0200 Subject: [PATCH 0325/1477] drm: Prevent drm_copy_field() to attempt copying a NULL pointer [ Upstream commit f6ee30407e883042482ad4ad30da5eaba47872ee ] There are some struct drm_driver fields that are required by drivers since drm_copy_field() attempts to copy them to user-space via DRM_IOCTL_VERSION. But it can be possible that a driver has a bug and did not set some of the fields, which leads to drm_copy_field() attempting to copy a NULL pointer: [ +10.395966] Unable to handle kernel access to user memory outside uaccess routines at virtual address 0000000000000000 [ +0.010955] Mem abort info: [ +0.002835] ESR = 0x0000000096000004 [ +0.003872] EC = 0x25: DABT (current EL), IL = 32 bits [ +0.005395] SET = 0, FnV = 0 [ +0.003113] EA = 0, S1PTW = 0 [ +0.003182] FSC = 0x04: level 0 translation fault [ +0.004964] Data abort info: [ +0.002919] ISV = 0, ISS = 0x00000004 [ +0.003886] CM = 0, WnR = 0 [ +0.003040] user pgtable: 4k pages, 48-bit VAs, pgdp=0000000115dad000 [ +0.006536] [0000000000000000] pgd=0000000000000000, p4d=0000000000000000 [ +0.006925] Internal error: Oops: 96000004 [#1] SMP ... [ +0.011113] pstate: 80400005 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ +0.007061] pc : __pi_strlen+0x14/0x150 [ +0.003895] lr : drm_copy_field+0x30/0x1a4 [ +0.004156] sp : ffff8000094b3a50 [ +0.003355] x29: ffff8000094b3a50 x28: ffff8000094b3b70 x27: 0000000000000040 [ +0.007242] x26: ffff443743c2ba00 x25: 0000000000000000 x24: 0000000000000040 [ +0.007243] x23: ffff443743c2ba00 x22: ffff8000094b3b70 x21: 0000000000000000 [ +0.007241] x20: 0000000000000000 x19: ffff8000094b3b90 x18: 0000000000000000 [ +0.007241] x17: 0000000000000000 x16: 0000000000000000 x15: 0000aaab14b9af40 [ +0.007241] x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000 [ +0.007239] x11: 0000000000000000 x10: 0000000000000000 x9 : ffffa524ad67d4d8 [ +0.007242] x8 : 0101010101010101 x7 : 7f7f7f7f7f7f7f7f x6 : 6c6e6263606e7141 [ +0.007239] x5 : 0000000000000000 x4 : 0000000000000000 x3 : 0000000000000000 [ +0.007241] x2 : 0000000000000000 x1 : ffff8000094b3b90 x0 : 0000000000000000 [ +0.007240] Call trace: [ +0.002475] __pi_strlen+0x14/0x150 [ +0.003537] drm_version+0x84/0xac [ +0.003448] drm_ioctl_kernel+0xa8/0x16c [ +0.003975] drm_ioctl+0x270/0x580 [ +0.003448] __arm64_sys_ioctl+0xb8/0xfc [ +0.003978] invoke_syscall+0x78/0x100 [ +0.003799] el0_svc_common.constprop.0+0x4c/0xf4 [ +0.004767] do_el0_svc+0x38/0x4c [ +0.003357] el0_svc+0x34/0x100 [ +0.003185] el0t_64_sync_handler+0x11c/0x150 [ +0.004418] el0t_64_sync+0x190/0x194 [ +0.003716] Code: 92402c04 b200c3e8 f13fc09f 5400088c (a9400c02) [ +0.006180] ---[ end trace 0000000000000000 ]--- Reported-by: Peter Robinson Signed-off-by: Javier Martinez Canillas Acked-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20220705100215.572498-3-javierm@redhat.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_ioctl.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index a15d55d06510..c160a45a4274 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -475,6 +475,12 @@ static int drm_copy_field(char __user *buf, size_t *buf_len, const char *value) { size_t len; + /* don't attempt to copy a NULL pointer */ + if (WARN_ONCE(!value, "BUG: the value to copy was not set!")) { + *buf_len = 0; + return 0; + } + /* don't overflow userbuf */ len = strlen(value); if (len > *buf_len) From 3959e8faf8bf6bea619e8856c736db64e6eced37 Mon Sep 17 00:00:00 2001 From: Zeng Jingxiang Date: Wed, 27 Jul 2022 15:31:19 +0800 Subject: [PATCH 0326/1477] gpu: lontium-lt9611: Fix NULL pointer dereference in lt9611_connector_init() [ Upstream commit ef8886f321c5dab8124b9153d25afa2a71d05323 ] A NULL check for bridge->encoder shows that it may be NULL, but it already been dereferenced on all paths leading to the check. 812 if (!bridge->encoder) { Dereference the pointer bridge->encoder. 810 drm_connector_attach_encoder(<9611->connector, bridge->encoder); Signed-off-by: Zeng Jingxiang Signed-off-by: Robert Foss Link: https://patchwork.freedesktop.org/patch/msgid/20220727073119.1578972-1-zengjx95@gmail.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/bridge/lontium-lt9611.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/lontium-lt9611.c b/drivers/gpu/drm/bridge/lontium-lt9611.c index 29b1ce2140ab..1dcc28a4d853 100644 --- a/drivers/gpu/drm/bridge/lontium-lt9611.c +++ b/drivers/gpu/drm/bridge/lontium-lt9611.c @@ -816,13 +816,14 @@ static int lt9611_connector_init(struct drm_bridge *bridge, struct lt9611 *lt961 drm_connector_helper_add(<9611->connector, <9611_bridge_connector_helper_funcs); - drm_connector_attach_encoder(<9611->connector, bridge->encoder); if (!bridge->encoder) { DRM_ERROR("Parent encoder object not found"); return -ENODEV; } + drm_connector_attach_encoder(<9611->connector, bridge->encoder); + return 0; } From 0a4fddc95c63fa5460f778885965281360fc3d90 Mon Sep 17 00:00:00 2001 From: David Gow Date: Thu, 11 Aug 2022 17:43:26 -0300 Subject: [PATCH 0327/1477] drm/amd/display: fix overflow on MIN_I64 definition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 6ae0632d17759852c07e2d1e0a31c728eb6ba246 ] The definition of MIN_I64 in bw_fixed.c can cause gcc to whinge about integer overflow, because it is treated as a positive value, which is then negated. The temporary positive value is not necessarily representable. This causes the following warning: ../drivers/gpu/drm/amd/amdgpu/../display/dc/dml/calcs/bw_fixed.c:30:19: warning: integer overflow in expression ‘-9223372036854775808’ of type ‘long long int’ results in ‘-9223372036854775808’ [-Woverflow] 30 | (int64_t)(-(1LL << 63)) | ^ Writing out (-MAX_I64 - 1) works instead. Signed-off-by: David Gow Signed-off-by: Tales Aparecida Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/dc/calcs/bw_fixed.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/calcs/bw_fixed.c b/drivers/gpu/drm/amd/display/dc/calcs/bw_fixed.c index 6ca288fb5fb9..2d46bc527b21 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/bw_fixed.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/bw_fixed.c @@ -26,12 +26,12 @@ #include "bw_fixed.h" -#define MIN_I64 \ - (int64_t)(-(1LL << 63)) - #define MAX_I64 \ (int64_t)((1ULL << 63) - 1) +#define MIN_I64 \ + (-MAX_I64 - 1) + #define FRACTIONAL_PART_MASK \ ((1ULL << BW_FIXED_BITS_PER_FRACTIONAL_PART) - 1) From bbe2f6f90310b3a0b5de4e0dc022b36faabfd718 Mon Sep 17 00:00:00 2001 From: Vivek Kasireddy Date: Wed, 24 Aug 2022 23:35:22 -0700 Subject: [PATCH 0328/1477] udmabuf: Set ubuf->sg = NULL if the creation of sg table fails [ Upstream commit d9c04a1b7a15b5e74b2977461d9511e497f05d8f ] When userspace tries to map the dmabuf and if for some reason (e.g. OOM) the creation of the sg table fails, ubuf->sg needs to be set to NULL. Otherwise, when the userspace subsequently closes the dmabuf fd, we'd try to erroneously free the invalid sg table from release_udmabuf resulting in the following crash reported by syzbot: general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] CPU: 0 PID: 3609 Comm: syz-executor487 Not tainted 5.19.0-syzkaller-13930-g7ebfc85e2cd7 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/22/2022 RIP: 0010:dma_unmap_sgtable include/linux/dma-mapping.h:378 [inline] RIP: 0010:put_sg_table drivers/dma-buf/udmabuf.c:89 [inline] RIP: 0010:release_udmabuf+0xcb/0x4f0 drivers/dma-buf/udmabuf.c:114 Code: 48 89 fa 48 c1 ea 03 80 3c 02 00 0f 85 2b 04 00 00 48 8d 7d 0c 4c 8b 63 30 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <0f> b6 14 02 48 89 f8 83 e0 07 83 c0 03 38 d0 7c 08 84 d2 0f 85 e2 RSP: 0018:ffffc900037efd30 EFLAGS: 00010246 RAX: dffffc0000000000 RBX: ffffffff8cb67800 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffffffff84ad27e0 RDI: 0000000000000000 RBP: fffffffffffffff4 R08: 0000000000000005 R09: 0000000000000000 R10: 0000000000000000 R11: 000000000008c07c R12: ffff88801fa05000 R13: ffff888073db07e8 R14: ffff888025c25440 R15: 0000000000000000 FS: 0000555555fc4300(0000) GS:ffff8880b9a00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fc1c0ce06e4 CR3: 00000000715e6000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: dma_buf_release+0x157/0x2d0 drivers/dma-buf/dma-buf.c:78 __dentry_kill+0x42b/0x640 fs/dcache.c:612 dentry_kill fs/dcache.c:733 [inline] dput+0x806/0xdb0 fs/dcache.c:913 __fput+0x39c/0x9d0 fs/file_table.c:333 task_work_run+0xdd/0x1a0 kernel/task_work.c:177 ptrace_notify+0x114/0x140 kernel/signal.c:2353 ptrace_report_syscall include/linux/ptrace.h:420 [inline] ptrace_report_syscall_exit include/linux/ptrace.h:482 [inline] syscall_exit_work kernel/entry/common.c:249 [inline] syscall_exit_to_user_mode_prepare+0x129/0x280 kernel/entry/common.c:276 __syscall_exit_to_user_mode_work kernel/entry/common.c:281 [inline] syscall_exit_to_user_mode+0x9/0x50 kernel/entry/common.c:294 do_syscall_64+0x42/0xb0 arch/x86/entry/common.c:86 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7fc1c0c35b6b Code: 0f 05 48 3d 00 f0 ff ff 77 45 c3 0f 1f 40 00 48 83 ec 18 89 7c 24 0c e8 63 fc ff ff 8b 7c 24 0c 41 89 c0 b8 03 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 35 44 89 c7 89 44 24 0c e8 a1 fc ff ff 8b 44 RSP: 002b:00007ffd78a06090 EFLAGS: 00000293 ORIG_RAX: 0000000000000003 RAX: 0000000000000000 RBX: 0000000000000007 RCX: 00007fc1c0c35b6b RDX: 0000000020000280 RSI: 0000000040086200 RDI: 0000000000000006 RBP: 0000000000000007 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000293 R12: 000000000000000c R13: 0000000000000003 R14: 00007fc1c0cfe4a0 R15: 00007ffd78a06140 Modules linked in: ---[ end trace 0000000000000000 ]--- RIP: 0010:dma_unmap_sgtable include/linux/dma-mapping.h:378 [inline] RIP: 0010:put_sg_table drivers/dma-buf/udmabuf.c:89 [inline] RIP: 0010:release_udmabuf+0xcb/0x4f0 drivers/dma-buf/udmabuf.c:114 Reported-by: syzbot+c80e9ef5d8bb45894db0@syzkaller.appspotmail.com Cc: Gerd Hoffmann Signed-off-by: Vivek Kasireddy Link: http://patchwork.freedesktop.org/patch/msgid/20220825063522.801264-1-vivek.kasireddy@intel.com Signed-off-by: Gerd Hoffmann Signed-off-by: Sasha Levin --- drivers/dma-buf/udmabuf.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c index b624f3d8f0e6..e359c5c6c4df 100644 --- a/drivers/dma-buf/udmabuf.c +++ b/drivers/dma-buf/udmabuf.c @@ -118,17 +118,20 @@ static int begin_cpu_udmabuf(struct dma_buf *buf, { struct udmabuf *ubuf = buf->priv; struct device *dev = ubuf->device->this_device; + int ret = 0; if (!ubuf->sg) { ubuf->sg = get_sg_table(dev, buf, direction); - if (IS_ERR(ubuf->sg)) - return PTR_ERR(ubuf->sg); + if (IS_ERR(ubuf->sg)) { + ret = PTR_ERR(ubuf->sg); + ubuf->sg = NULL; + } } else { dma_sync_sg_for_cpu(dev, ubuf->sg->sgl, ubuf->sg->nents, direction); } - return 0; + return ret; } static int end_cpu_udmabuf(struct dma_buf *buf, From b70f8abc1a440be6802f27631ecb9acb0a158c66 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Fri, 26 Aug 2022 20:57:33 +0200 Subject: [PATCH 0329/1477] drm: bridge: dw_hdmi: only trigger hotplug event on link change [ Upstream commit da09daf881082266e4075657fac53c7966de8e4d ] There are two events that signal a real change of the link state: HPD going high means the sink is newly connected or wants the source to re-read the EDID, RX sense going low is a indication that the link has been disconnected. Ignore the other two events that also trigger interrupts, but don't need immediate attention: HPD going low does not necessarily mean the link has been lost and should not trigger a immediate read of the status. RX sense going high also does not require a detect cycle, as HPD going high is the right point in time to read the EDID. Signed-off-by: Lucas Stach Reviewed-by: Neil Armstrong (v1) Reviewed-by: Robert Foss Signed-off-by: Robert Foss Link: https://patchwork.freedesktop.org/patch/msgid/20220826185733.3213248-1-l.stach@pengutronix.de Signed-off-by: Sasha Levin --- drivers/gpu/drm/bridge/synopsys/dw-hdmi.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c index b10228b9e3a9..356c7d0bd035 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c @@ -2984,6 +2984,7 @@ static irqreturn_t dw_hdmi_irq(int irq, void *dev_id) { struct dw_hdmi *hdmi = dev_id; u8 intr_stat, phy_int_pol, phy_pol_mask, phy_stat; + enum drm_connector_status status = connector_status_unknown; intr_stat = hdmi_readb(hdmi, HDMI_IH_PHY_STAT0); phy_int_pol = hdmi_readb(hdmi, HDMI_PHY_POL0); @@ -3022,13 +3023,15 @@ static irqreturn_t dw_hdmi_irq(int irq, void *dev_id) cec_notifier_phys_addr_invalidate(hdmi->cec_notifier); mutex_unlock(&hdmi->cec_notifier_mutex); } + + if (phy_stat & HDMI_PHY_HPD) + status = connector_status_connected; + + if (!(phy_stat & (HDMI_PHY_HPD | HDMI_PHY_RX_SENSE))) + status = connector_status_disconnected; } - if (intr_stat & HDMI_IH_PHY_STAT0_HPD) { - enum drm_connector_status status = phy_int_pol & HDMI_PHY_HPD - ? connector_status_connected - : connector_status_disconnected; - + if (status != connector_status_unknown) { dev_dbg(hdmi->dev, "EVENT=%s\n", status == connector_status_connected ? "plugin" : "plugout"); From bfdb391d57df9aad8cb5ed241d71bf9655b40730 Mon Sep 17 00:00:00 2001 From: Mateusz Kwiatkowski Date: Mon, 29 Aug 2022 15:11:42 +0200 Subject: [PATCH 0330/1477] drm/vc4: vec: Fix timings for VEC modes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 30d7565be96b3946c18a1ce3fd538f7946839092 ] This commit fixes vertical timings of the VEC (composite output) modes to accurately represent the 525-line ("NTSC") and 625-line ("PAL") ITU-R standards. Previous timings were actually defined as 502 and 601 lines, resulting in non-standard 62.69 Hz and 52 Hz signals being generated, respectively. Signed-off-by: Mateusz Kwiatkowski Acked-by: Noralf Trønnes Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20220728-rpi-analog-tv-properties-v2-28-459522d653a7@cerno.tech Signed-off-by: Sasha Levin --- drivers/gpu/drm/vc4/vc4_vec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_vec.c b/drivers/gpu/drm/vc4/vc4_vec.c index bd5b8eb58b18..c6bd168a5898 100644 --- a/drivers/gpu/drm/vc4/vc4_vec.c +++ b/drivers/gpu/drm/vc4/vc4_vec.c @@ -257,7 +257,7 @@ static void vc4_vec_ntsc_j_mode_set(struct vc4_vec *vec) static const struct drm_display_mode ntsc_mode = { DRM_MODE("720x480", DRM_MODE_TYPE_DRIVER, 13500, 720, 720 + 14, 720 + 14 + 64, 720 + 14 + 64 + 60, 0, - 480, 480 + 3, 480 + 3 + 3, 480 + 3 + 3 + 16, 0, + 480, 480 + 7, 480 + 7 + 6, 525, 0, DRM_MODE_FLAG_INTERLACE) }; @@ -279,7 +279,7 @@ static void vc4_vec_pal_m_mode_set(struct vc4_vec *vec) static const struct drm_display_mode pal_mode = { DRM_MODE("720x576", DRM_MODE_TYPE_DRIVER, 13500, 720, 720 + 20, 720 + 20 + 64, 720 + 20 + 64 + 60, 0, - 576, 576 + 2, 576 + 2 + 3, 576 + 2 + 3 + 20, 0, + 576, 576 + 4, 576 + 4 + 6, 625, 0, DRM_MODE_FLAG_INTERLACE) }; From e29d20deaf9a457835df66b8c48e5bc4735db118 Mon Sep 17 00:00:00 2001 From: Maya Matuszczyk Date: Wed, 3 Aug 2022 20:24:03 +0200 Subject: [PATCH 0331/1477] drm: panel-orientation-quirks: Add quirk for Anbernic Win600 [ Upstream commit 770e19076065e079a32f33eb11be2057c87f1cde ] This device is another x86 gaming handheld, and as (hopefully) there is only one set of DMI IDs it's using DMI_EXACT_MATCH Signed-off-by: Maya Matuszczyk Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Link: https://patchwork.freedesktop.org/patch/msgid/20220803182402.1217293-1-maccraft123mc@gmail.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_panel_orientation_quirks.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index f5ab891731d0..083273736c83 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -128,6 +128,12 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "One S1003"), }, .driver_data = (void *)&lcd800x1280_rightside_up, + }, { /* Anbernic Win600 */ + .matches = { + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Anbernic"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Win600"), + }, + .driver_data = (void *)&lcd720x1280_rightside_up, }, { /* Asus T100HA */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), From a9d6a7c9b685fce285e14009fd725e61a280f542 Mon Sep 17 00:00:00 2001 From: Jameson Thies Date: Tue, 13 Sep 2022 20:49:54 +0000 Subject: [PATCH 0332/1477] platform/chrome: cros_ec: Notify the PM of wake events during resume [ Upstream commit 8edd2752b0aa498b3a61f3caee8f79f7e0567fad ] cros_ec_handle_event in the cros_ec driver can notify the PM of wake events. When a device is suspended, cros_ec_handle_event will not check MKBP events. Instead, received MKBP events are checked during resume by cros_ec_report_events_during_suspend. But cros_ec_report_events_during_suspend cannot notify the PM if received events are wake events, causing wake events to not be reported if received while the device is suspended. Update cros_ec_report_events_during_suspend to notify the PM of wake events during resume by calling pm_wakeup_event. Signed-off-by: Jameson Thies Reviewed-by: Prashant Malani Reviewed-by: Benson Leung Signed-off-by: Tzung-Bi Shih Link: https://lore.kernel.org/r/20220913204954.2931042-1-jthies@google.com Signed-off-by: Sasha Levin --- drivers/platform/chrome/cros_ec.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/platform/chrome/cros_ec.c b/drivers/platform/chrome/cros_ec.c index c4de8c4db193..5a622666a075 100644 --- a/drivers/platform/chrome/cros_ec.c +++ b/drivers/platform/chrome/cros_ec.c @@ -332,10 +332,16 @@ EXPORT_SYMBOL(cros_ec_suspend); static void cros_ec_report_events_during_suspend(struct cros_ec_device *ec_dev) { + bool wake_event; + while (ec_dev->mkbp_event_supported && - cros_ec_get_next_event(ec_dev, NULL, NULL) > 0) + cros_ec_get_next_event(ec_dev, &wake_event, NULL) > 0) { blocking_notifier_call_chain(&ec_dev->event_notifier, 1, ec_dev); + + if (wake_event && device_may_wakeup(ec_dev->dev)) + pm_wakeup_event(ec_dev->dev, 0); + } } /** From 54f2585e2de09b64010df06dfe48488e32727c97 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 17 Sep 2022 23:04:07 +0200 Subject: [PATCH 0333/1477] platform/x86: msi-laptop: Change DMI match / alias strings to fix module autoloading [ Upstream commit 2a2565272a3628e45d61625e36ef17af7af4e3de ] On a MSI S270 with Fedora 37 x86_64 / systemd-251.4 the module does not properly autoload. This is likely caused by issues with how systemd-udevd handles the single quote char (') which is part of the sys_vendor / chassis_vendor strings on this laptop. As a workaround remove the single quote char + everything behind it from the sys_vendor + chassis_vendor matches. This fixes the module not autoloading. Link: https://github.com/systemd/systemd/issues/24715 Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20220917210407.647432-1-hdegoede@redhat.com Signed-off-by: Sasha Levin --- drivers/platform/x86/msi-laptop.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/platform/x86/msi-laptop.c b/drivers/platform/x86/msi-laptop.c index 3e935303b143..0e804b6c2d24 100644 --- a/drivers/platform/x86/msi-laptop.c +++ b/drivers/platform/x86/msi-laptop.c @@ -596,11 +596,10 @@ static const struct dmi_system_id msi_dmi_table[] __initconst = { { .ident = "MSI S270", .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "MICRO-STAR INT'L CO.,LTD"), + DMI_MATCH(DMI_SYS_VENDOR, "MICRO-STAR INT"), DMI_MATCH(DMI_PRODUCT_NAME, "MS-1013"), DMI_MATCH(DMI_PRODUCT_VERSION, "0131"), - DMI_MATCH(DMI_CHASSIS_VENDOR, - "MICRO-STAR INT'L CO.,LTD") + DMI_MATCH(DMI_CHASSIS_VENDOR, "MICRO-STAR INT") }, .driver_data = &quirk_old_ec_model, .callback = dmi_check_cb @@ -633,8 +632,7 @@ static const struct dmi_system_id msi_dmi_table[] __initconst = { DMI_MATCH(DMI_SYS_VENDOR, "NOTEBOOK"), DMI_MATCH(DMI_PRODUCT_NAME, "SAM2000"), DMI_MATCH(DMI_PRODUCT_VERSION, "0131"), - DMI_MATCH(DMI_CHASSIS_VENDOR, - "MICRO-STAR INT'L CO.,LTD") + DMI_MATCH(DMI_CHASSIS_VENDOR, "MICRO-STAR INT") }, .driver_data = &quirk_old_ec_model, .callback = dmi_check_cb From 69130888b226e1156fcd2a35bd7c1ca33e4ac516 Mon Sep 17 00:00:00 2001 From: Jairaj Arava Date: Mon, 19 Sep 2022 13:44:29 +0200 Subject: [PATCH 0334/1477] ASoC: SOF: pci: Change DMI match info to support all Chrome platforms [ Upstream commit c1c1fc8103f794a10c5c15e3c17879caf4f42c8f ] In some Chrome platforms if OEM's use their own string as SYS_VENDOR than "Google", it leads to firmware load failure from intel/sof/community path. Hence, changing SYS_VENDOR to PRODUCT_FAMILY in which "Google" is used as common prefix and is supported in all Chrome platforms. Reviewed-by: Ranjani Sridharan Reviewed-by: Chao Song Reviewed-by: Curtis Malainey Signed-off-by: Jairaj Arava Signed-off-by: Curtis Malainey Signed-off-by: Sathyanarayana Nujella Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20220919114429.42700-1-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/sof/sof-pci-dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/sof/sof-pci-dev.c b/sound/soc/sof/sof-pci-dev.c index 75657a25dbc0..fe9feaab6a0a 100644 --- a/sound/soc/sof/sof-pci-dev.c +++ b/sound/soc/sof/sof-pci-dev.c @@ -75,7 +75,7 @@ static const struct dmi_system_id community_key_platforms[] = { { .ident = "Google Chromebooks", .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Google"), + DMI_MATCH(DMI_PRODUCT_FAMILY, "Google"), } }, {}, From 1c7d957c5d81930b5cf514c0f42150796c1823e0 Mon Sep 17 00:00:00 2001 From: hongao Date: Tue, 20 Sep 2022 17:24:53 +0800 Subject: [PATCH 0335/1477] drm/amdgpu: fix initial connector audio value [ Upstream commit 4bb71fce58f30df3f251118291d6b0187ce531e6 ] This got lost somewhere along the way, This fixes audio not working until set_property was called. Signed-off-by: hongao Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index df1f9b88a53f..98d3661336a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -1671,10 +1671,12 @@ amdgpu_connector_add(struct amdgpu_device *adev, adev->mode_info.dither_property, AMDGPU_FMT_DITHER_DISABLE); - if (amdgpu_audio != 0) + if (amdgpu_audio != 0) { drm_object_attach_property(&amdgpu_connector->base.base, adev->mode_info.audio_property, AMDGPU_AUDIO_AUTO); + amdgpu_connector->audio = AMDGPU_AUDIO_AUTO; + } subpixel_order = SubPixelHorizontalRGB; connector->interlace_allowed = true; @@ -1796,6 +1798,7 @@ amdgpu_connector_add(struct amdgpu_device *adev, drm_object_attach_property(&amdgpu_connector->base.base, adev->mode_info.audio_property, AMDGPU_AUDIO_AUTO); + amdgpu_connector->audio = AMDGPU_AUDIO_AUTO; } drm_object_attach_property(&amdgpu_connector->base.base, adev->mode_info.dither_property, @@ -1849,6 +1852,7 @@ amdgpu_connector_add(struct amdgpu_device *adev, drm_object_attach_property(&amdgpu_connector->base.base, adev->mode_info.audio_property, AMDGPU_AUDIO_AUTO); + amdgpu_connector->audio = AMDGPU_AUDIO_AUTO; } drm_object_attach_property(&amdgpu_connector->base.base, adev->mode_info.dither_property, @@ -1899,6 +1903,7 @@ amdgpu_connector_add(struct amdgpu_device *adev, drm_object_attach_property(&amdgpu_connector->base.base, adev->mode_info.audio_property, AMDGPU_AUDIO_AUTO); + amdgpu_connector->audio = AMDGPU_AUDIO_AUTO; } drm_object_attach_property(&amdgpu_connector->base.base, adev->mode_info.dither_property, From 8a427a22839daacd36531a62c83d5c9cd6f20657 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Larumbe?= Date: Mon, 19 Sep 2022 02:09:39 +0100 Subject: [PATCH 0336/1477] drm/meson: explicitly remove aggregate driver at module unload time MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 8616f2a0589a80e08434212324250eb22f6a66ce ] Because component_master_del wasn't being called when unloading the meson_drm module, the aggregate device would linger forever in the global aggregate_devices list. That means when unloading and reloading the meson_dw_hdmi module, component_add would call into try_to_bring_up_aggregate_device and find the unbound meson_drm aggregate device. This would in turn dereference some of the aggregate_device's struct entries which point to memory automatically freed by the devres API when unbinding the aggregate device from meson_drv_unbind, and trigger an use-after-free bug: [ +0.000014] ============================================================= [ +0.000007] BUG: KASAN: use-after-free in find_components+0x468/0x500 [ +0.000017] Read of size 8 at addr ffff000006731688 by task modprobe/2536 [ +0.000018] CPU: 4 PID: 2536 Comm: modprobe Tainted: G C O 5.19.0-rc6-lrmbkasan+ #1 [ +0.000010] Hardware name: Hardkernel ODROID-N2Plus (DT) [ +0.000008] Call trace: [ +0.000005] dump_backtrace+0x1ec/0x280 [ +0.000011] show_stack+0x24/0x80 [ +0.000007] dump_stack_lvl+0x98/0xd4 [ +0.000010] print_address_description.constprop.0+0x80/0x520 [ +0.000011] print_report+0x128/0x260 [ +0.000007] kasan_report+0xb8/0xfc [ +0.000007] __asan_report_load8_noabort+0x3c/0x50 [ +0.000009] find_components+0x468/0x500 [ +0.000008] try_to_bring_up_aggregate_device+0x64/0x390 [ +0.000009] __component_add+0x1dc/0x49c [ +0.000009] component_add+0x20/0x30 [ +0.000008] meson_dw_hdmi_probe+0x28/0x34 [meson_dw_hdmi] [ +0.000013] platform_probe+0xd0/0x220 [ +0.000008] really_probe+0x3ac/0xa80 [ +0.000008] __driver_probe_device+0x1f8/0x400 [ +0.000008] driver_probe_device+0x68/0x1b0 [ +0.000008] __driver_attach+0x20c/0x480 [ +0.000009] bus_for_each_dev+0x114/0x1b0 [ +0.000007] driver_attach+0x48/0x64 [ +0.000009] bus_add_driver+0x390/0x564 [ +0.000007] driver_register+0x1a8/0x3e4 [ +0.000009] __platform_driver_register+0x6c/0x94 [ +0.000007] meson_dw_hdmi_platform_driver_init+0x30/0x1000 [meson_dw_hdmi] [ +0.000014] do_one_initcall+0xc4/0x2b0 [ +0.000008] do_init_module+0x154/0x570 [ +0.000010] load_module+0x1a78/0x1ea4 [ +0.000008] __do_sys_init_module+0x184/0x1cc [ +0.000008] __arm64_sys_init_module+0x78/0xb0 [ +0.000008] invoke_syscall+0x74/0x260 [ +0.000008] el0_svc_common.constprop.0+0xcc/0x260 [ +0.000009] do_el0_svc+0x50/0x70 [ +0.000008] el0_svc+0x68/0x1a0 [ +0.000009] el0t_64_sync_handler+0x11c/0x150 [ +0.000009] el0t_64_sync+0x18c/0x190 [ +0.000014] Allocated by task 902: [ +0.000007] kasan_save_stack+0x2c/0x5c [ +0.000009] __kasan_kmalloc+0x90/0xd0 [ +0.000007] __kmalloc_node+0x240/0x580 [ +0.000010] memcg_alloc_slab_cgroups+0xa4/0x1ac [ +0.000010] memcg_slab_post_alloc_hook+0xbc/0x4c0 [ +0.000008] kmem_cache_alloc_node+0x1d0/0x490 [ +0.000009] __alloc_skb+0x1d4/0x310 [ +0.000010] alloc_skb_with_frags+0x8c/0x620 [ +0.000008] sock_alloc_send_pskb+0x5ac/0x6d0 [ +0.000010] unix_dgram_sendmsg+0x2e0/0x12f0 [ +0.000010] sock_sendmsg+0xcc/0x110 [ +0.000007] sock_write_iter+0x1d0/0x304 [ +0.000008] new_sync_write+0x364/0x460 [ +0.000007] vfs_write+0x420/0x5ac [ +0.000008] ksys_write+0x19c/0x1f0 [ +0.000008] __arm64_sys_write+0x78/0xb0 [ +0.000007] invoke_syscall+0x74/0x260 [ +0.000008] el0_svc_common.constprop.0+0x1a8/0x260 [ +0.000009] do_el0_svc+0x50/0x70 [ +0.000007] el0_svc+0x68/0x1a0 [ +0.000008] el0t_64_sync_handler+0x11c/0x150 [ +0.000008] el0t_64_sync+0x18c/0x190 [ +0.000013] Freed by task 2509: [ +0.000008] kasan_save_stack+0x2c/0x5c [ +0.000007] kasan_set_track+0x2c/0x40 [ +0.000008] kasan_set_free_info+0x28/0x50 [ +0.000008] ____kasan_slab_free+0x128/0x1d4 [ +0.000008] __kasan_slab_free+0x18/0x24 [ +0.000007] slab_free_freelist_hook+0x108/0x230 [ +0.000010] kfree+0x110/0x35c [ +0.000008] release_nodes+0xf0/0x16c [ +0.000008] devres_release_all+0xfc/0x180 [ +0.000008] device_unbind_cleanup+0x24/0x164 [ +0.000008] device_release_driver_internal+0x3e8/0x5b0 [ +0.000010] driver_detach+0xac/0x1b0 [ +0.000008] bus_remove_driver+0x158/0x29c [ +0.000008] driver_unregister+0x70/0xb0 [ +0.000009] platform_driver_unregister+0x20/0x2c [ +0.000007] 0xffff800003722d98 [ +0.000012] __do_sys_delete_module+0x288/0x400 [ +0.000009] __arm64_sys_delete_module+0x5c/0x80 [ +0.000008] invoke_syscall+0x74/0x260 [ +0.000008] el0_svc_common.constprop.0+0xcc/0x260 [ +0.000008] do_el0_svc+0x50/0x70 [ +0.000007] el0_svc+0x68/0x1a0 [ +0.000008] el0t_64_sync_handler+0x11c/0x150 [ +0.000009] el0t_64_sync+0x18c/0x190 [ +0.000013] Last potentially related work creation: [ +0.000007] kasan_save_stack+0x2c/0x5c [ +0.000007] __kasan_record_aux_stack+0xb8/0xf0 [ +0.000009] kasan_record_aux_stack_noalloc+0x14/0x20 [ +0.000008] insert_work+0x54/0x290 [ +0.000009] __queue_work+0x48c/0xd24 [ +0.000008] queue_work_on+0x90/0x11c [ +0.000008] call_usermodehelper_exec+0x188/0x404 [ +0.000010] kobject_uevent_env+0x5a8/0x794 [ +0.000010] kobject_uevent+0x14/0x20 [ +0.000008] driver_register+0x230/0x3e4 [ +0.000009] __platform_driver_register+0x6c/0x94 [ +0.000007] gxbb_driver_init+0x28/0x34 [ +0.000010] do_one_initcall+0xc4/0x2b0 [ +0.000008] do_initcalls+0x20c/0x24c [ +0.000010] kernel_init_freeable+0x22c/0x278 [ +0.000009] kernel_init+0x3c/0x170 [ +0.000008] ret_from_fork+0x10/0x20 [ +0.000013] The buggy address belongs to the object at ffff000006731600 which belongs to the cache kmalloc-256 of size 256 [ +0.000009] The buggy address is located 136 bytes inside of 256-byte region [ffff000006731600, ffff000006731700) [ +0.000015] The buggy address belongs to the physical page: [ +0.000008] page:fffffc000019cc00 refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff000006730a00 pfn:0x6730 [ +0.000011] head:fffffc000019cc00 order:2 compound_mapcount:0 compound_pincount:0 [ +0.000008] flags: 0xffff00000010200(slab|head|node=0|zone=0|lastcpupid=0xffff) [ +0.000016] raw: 0ffff00000010200 fffffc00000c3d08 fffffc0000ef2b08 ffff000000002680 [ +0.000009] raw: ffff000006730a00 0000000000150014 00000001ffffffff 0000000000000000 [ +0.000006] page dumped because: kasan: bad access detected [ +0.000011] Memory state around the buggy address: [ +0.000007] ffff000006731580: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ +0.000007] ffff000006731600: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ +0.000007] >ffff000006731680: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ +0.000007] ^ [ +0.000006] ffff000006731700: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ +0.000007] ffff000006731780: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ +0.000006] ================================================================== Fix by adding 'remove' driver callback for meson-drm, and explicitly deleting the aggregate device. Signed-off-by: Adrián Larumbe Reviewed-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20220919010940.419893-3-adrian.larumbe@collabora.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/meson/meson_drv.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c index 2d022f3fb437..b0bfe85f5f6a 100644 --- a/drivers/gpu/drm/meson/meson_drv.c +++ b/drivers/gpu/drm/meson/meson_drv.c @@ -528,6 +528,13 @@ static int meson_drv_probe(struct platform_device *pdev) return 0; }; +static int meson_drv_remove(struct platform_device *pdev) +{ + component_master_del(&pdev->dev, &meson_drv_master_ops); + + return 0; +} + static struct meson_drm_match_data meson_drm_gxbb_data = { .compat = VPU_COMPATIBLE_GXBB, }; @@ -565,6 +572,7 @@ static const struct dev_pm_ops meson_drv_pm_ops = { static struct platform_driver meson_drm_platform_driver = { .probe = meson_drv_probe, + .remove = meson_drv_remove, .shutdown = meson_drv_shutdown, .driver = { .name = "meson-drm", From bb91c06b0be4637f304d5c2d9fbcb90fa4f1dbd9 Mon Sep 17 00:00:00 2001 From: Richard Acayan Date: Thu, 22 Sep 2022 21:43:22 -0400 Subject: [PATCH 0337/1477] mmc: sdhci-msm: add compatible string check for sdm670 [ Upstream commit 4de95950d970c71a9e82a24573bb7a44fd95baa1 ] The Snapdragon 670 has the same quirk as Snapdragon 845 (needing to restore the dll config). Add a compatible string check to detect the need for this. Signed-off-by: Richard Acayan Reviewed-by: Bhupesh Sharma Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20220923014322.33620-3-mailingradian@gmail.com Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/mmc/host/sdhci-msm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c index 192cb8b20b47..ad2e73f9a58f 100644 --- a/drivers/mmc/host/sdhci-msm.c +++ b/drivers/mmc/host/sdhci-msm.c @@ -2182,6 +2182,7 @@ static const struct sdhci_msm_variant_info sm8250_sdhci_var = { static const struct of_device_id sdhci_msm_dt_match[] = { {.compatible = "qcom,sdhci-msm-v4", .data = &sdhci_msm_mci_var}, {.compatible = "qcom,sdhci-msm-v5", .data = &sdhci_msm_v5_var}, + {.compatible = "qcom,sdm670-sdhci", .data = &sdm845_sdhci_var}, {.compatible = "qcom,sdm845-sdhci", .data = &sdm845_sdhci_var}, {.compatible = "qcom,sm8250-sdhci", .data = &sm8250_sdhci_var}, {.compatible = "qcom,sc7180-sdhci", .data = &sdm845_sdhci_var}, From 1bb6f4a8db5ae91194973f98e0b43d5dde3889c4 Mon Sep 17 00:00:00 2001 From: Khaled Almahallawy Date: Thu, 15 Sep 2022 22:49:00 -0700 Subject: [PATCH 0338/1477] drm/dp: Don't rewrite link config when setting phy test pattern [ Upstream commit 7b4d8db657192066bc6f1f6635d348413dac1e18 ] The sequence for Source DP PHY CTS automation is [2][1]: 1- Emulate successful Link Training(LT) 2- Short HPD and change link rates and number of lanes by LT. (This is same flow for Link Layer CTS) 3- Short HPD and change PHY test pattern and swing/pre-emphasis levels (This step should not trigger LT) The problem is with DP PHY compliance setup as follow: [DPTX + on board LTTPR]------Main Link--->[Scope] ^ | | | | | ----------Aux Ch------>[Aux Emulator] At step 3, before writing TRAINING_LANEx_SET/LINK_QUAL_PATTERN_SET to declare the pattern/swing requested by scope, we write link config in LINK_BW_SET/LANE_COUNT_SET on a port that has LTTPR. As LTTPR snoops aux transaction, LINK_BW_SET/LANE_COUNT_SET writes indicate a LT will start [Check DP 2.0 E11 -Sec 3.6.8.2 & 3.6.8.6.3], and LTTPR will reset the link and stop sending DP signals to DPTX/Scope causing the measurements to fail. Note that step 3 will not trigger LT and DP link will never recovered by the Aux Emulator/Scope. The reset of link can be tested with a monitor connected to LTTPR port simply by writing to LINK_BW_SET or LANE_COUNT_SET as follow igt/tools/dpcd_reg write --offset=0x100 --value 0x14 --device=2 OR printf '\x14' | sudo dd of=/dev/drm_dp_aux2 bs=1 count=1 conv=notrunc seek=$((0x100)) This single aux write causes the screen to blank, sending short HPD to DPTX, setting LINK_STATUS_UPDATE = 1 in DPCD 0x204, and triggering LT. As stated in [1]: "Before any TX electrical testing can be performed, the link between a DPTX and DPRX (in this case, a piece of test equipment), including all LTTPRs within the path, shall be trained as defined in this Standard." In addition, changing Phy pattern/Swing/Pre-emphasis (Step 3) uses the same link rate and lane count applied on step 2, so no need to redo LT. The fix is to not rewrite link config in step 3, and just writes TRAINING_LANEx_SET and LINK_QUAL_PATTERN_SET [1]: DP 2.0 E11 - 3.6.11.1 LTTPR DPTX_PHY Electrical Compliance [2]: Configuring UnigrafDPTC Controller - Automation Test Sequence https://www.keysight.com/us/en/assets/9922-01244/help-files/ D9040DPPC-DisplayPort-Test-Software-Online-Help-latest.chm Cc: Imre Deak Cc: Jani Nikula Cc: Or Cochvi Signed-off-by: Khaled Almahallawy Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20220916054900.415804-1-khaled.almahallawy@intel.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_dp_helper.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/gpu/drm/drm_dp_helper.c b/drivers/gpu/drm/drm_dp_helper.c index 3c55753bab16..6ba16db77500 100644 --- a/drivers/gpu/drm/drm_dp_helper.c +++ b/drivers/gpu/drm/drm_dp_helper.c @@ -2172,17 +2172,8 @@ int drm_dp_set_phy_test_pattern(struct drm_dp_aux *aux, struct drm_dp_phy_test_params *data, u8 dp_rev) { int err, i; - u8 link_config[2]; u8 test_pattern; - link_config[0] = drm_dp_link_rate_to_bw_code(data->link_rate); - link_config[1] = data->num_lanes; - if (data->enhanced_frame_cap) - link_config[1] |= DP_LANE_COUNT_ENHANCED_FRAME_EN; - err = drm_dp_dpcd_write(aux, DP_LINK_BW_SET, link_config, 2); - if (err < 0) - return err; - test_pattern = data->phy_pattern; if (dp_rev < 0x12) { test_pattern = (test_pattern << 2) & From 166feb964fc8d9065eae8301b08e39a9fdbebcda Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Fri, 9 Sep 2022 18:07:59 -0400 Subject: [PATCH 0339/1477] drm/amd/display: Remove interface for periodic interrupt 1 [ Upstream commit 97d8d6f075bd8f988589be02b91f6fa644d0b0b8 ] [why] Only a single VLINE interrupt is available so interface should not expose the second one which is used by DMU firmware. [how] Remove references to periodic_interrupt1 and VLINE1 from DC interfaces. Reviewed-by: Jaehyun Chung Acked-by: Jasdeep Dhillon Signed-off-by: Aric Cyr Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/dc/core/dc.c | 16 +++------ drivers/gpu/drm/amd/display/dc/dc_stream.h | 6 ++-- .../amd/display/dc/dcn10/dcn10_hw_sequencer.c | 35 ++++++------------- .../amd/display/dc/dcn10/dcn10_hw_sequencer.h | 3 +- .../gpu/drm/amd/display/dc/inc/hw_sequencer.h | 8 +---- 5 files changed, 18 insertions(+), 50 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 93f5229c303e..99887bcfada0 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -2202,11 +2202,8 @@ static void copy_stream_update_to_stream(struct dc *dc, if (update->abm_level) stream->abm_level = *update->abm_level; - if (update->periodic_interrupt0) - stream->periodic_interrupt0 = *update->periodic_interrupt0; - - if (update->periodic_interrupt1) - stream->periodic_interrupt1 = *update->periodic_interrupt1; + if (update->periodic_interrupt) + stream->periodic_interrupt = *update->periodic_interrupt; if (update->gamut_remap) stream->gamut_remap_matrix = *update->gamut_remap; @@ -2288,13 +2285,8 @@ static void commit_planes_do_stream_update(struct dc *dc, if (!pipe_ctx->top_pipe && !pipe_ctx->prev_odm_pipe && pipe_ctx->stream == stream) { - if (stream_update->periodic_interrupt0 && - dc->hwss.setup_periodic_interrupt) - dc->hwss.setup_periodic_interrupt(dc, pipe_ctx, VLINE0); - - if (stream_update->periodic_interrupt1 && - dc->hwss.setup_periodic_interrupt) - dc->hwss.setup_periodic_interrupt(dc, pipe_ctx, VLINE1); + if (stream_update->periodic_interrupt && dc->hwss.setup_periodic_interrupt) + dc->hwss.setup_periodic_interrupt(dc, pipe_ctx); if ((stream_update->hdr_static_metadata && !stream->use_dynamic_meta) || stream_update->vrr_infopacket || diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index 205bedd1b196..0487c1b8957c 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -179,8 +179,7 @@ struct dc_stream_state { /* DMCU info */ unsigned int abm_level; - struct periodic_interrupt_config periodic_interrupt0; - struct periodic_interrupt_config periodic_interrupt1; + struct periodic_interrupt_config periodic_interrupt; /* from core_stream struct */ struct dc_context *ctx; @@ -244,8 +243,7 @@ struct dc_stream_update { struct dc_info_packet *hdr_static_metadata; unsigned int *abm_level; - struct periodic_interrupt_config *periodic_interrupt0; - struct periodic_interrupt_config *periodic_interrupt1; + struct periodic_interrupt_config *periodic_interrupt; struct dc_info_packet *vrr_infopacket; struct dc_info_packet *vsc_infopacket; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 31a13daf4289..71a85c5306ed 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -3611,7 +3611,7 @@ void dcn10_calc_vupdate_position( { const struct dc_crtc_timing *dc_crtc_timing = &pipe_ctx->stream->timing; int vline_int_offset_from_vupdate = - pipe_ctx->stream->periodic_interrupt0.lines_offset; + pipe_ctx->stream->periodic_interrupt.lines_offset; int vupdate_offset_from_vsync = dc->hwss.get_vupdate_offset_from_vsync(pipe_ctx); int start_position; @@ -3636,18 +3636,10 @@ void dcn10_calc_vupdate_position( static void dcn10_cal_vline_position( struct dc *dc, struct pipe_ctx *pipe_ctx, - enum vline_select vline, uint32_t *start_line, uint32_t *end_line) { - enum vertical_interrupt_ref_point ref_point = INVALID_POINT; - - if (vline == VLINE0) - ref_point = pipe_ctx->stream->periodic_interrupt0.ref_point; - else if (vline == VLINE1) - ref_point = pipe_ctx->stream->periodic_interrupt1.ref_point; - - switch (ref_point) { + switch (pipe_ctx->stream->periodic_interrupt.ref_point) { case START_V_UPDATE: dcn10_calc_vupdate_position( dc, @@ -3656,7 +3648,9 @@ static void dcn10_cal_vline_position( end_line); break; case START_V_SYNC: - // Suppose to do nothing because vsync is 0; + // vsync is line 0 so start_line is just the requested line offset + *start_line = pipe_ctx->stream->periodic_interrupt.lines_offset; + *end_line = *start_line + 2; break; default: ASSERT(0); @@ -3666,24 +3660,15 @@ static void dcn10_cal_vline_position( void dcn10_setup_periodic_interrupt( struct dc *dc, - struct pipe_ctx *pipe_ctx, - enum vline_select vline) + struct pipe_ctx *pipe_ctx) { struct timing_generator *tg = pipe_ctx->stream_res.tg; + uint32_t start_line = 0; + uint32_t end_line = 0; - if (vline == VLINE0) { - uint32_t start_line = 0; - uint32_t end_line = 0; + dcn10_cal_vline_position(dc, pipe_ctx, &start_line, &end_line); - dcn10_cal_vline_position(dc, pipe_ctx, vline, &start_line, &end_line); - - tg->funcs->setup_vertical_interrupt0(tg, start_line, end_line); - - } else if (vline == VLINE1) { - pipe_ctx->stream_res.tg->funcs->setup_vertical_interrupt1( - tg, - pipe_ctx->stream->periodic_interrupt1.lines_offset); - } + tg->funcs->setup_vertical_interrupt0(tg, start_line, end_line); } void dcn10_setup_vupdate_interrupt(struct dc *dc, struct pipe_ctx *pipe_ctx) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h index e5691e499023..81b5057d5ff1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h @@ -174,8 +174,7 @@ void dcn10_set_cursor_attribute(struct pipe_ctx *pipe_ctx); void dcn10_set_cursor_sdr_white_level(struct pipe_ctx *pipe_ctx); void dcn10_setup_periodic_interrupt( struct dc *dc, - struct pipe_ctx *pipe_ctx, - enum vline_select vline); + struct pipe_ctx *pipe_ctx); enum dc_status dcn10_set_clock(struct dc *dc, enum dc_clock_type clock_type, uint32_t clk_khz, diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h index 64c1be818b0e..3165a66c5362 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h @@ -32,11 +32,6 @@ #include "inc/hw/link_encoder.h" #include "core_status.h" -enum vline_select { - VLINE0, - VLINE1 -}; - struct pipe_ctx; struct dc_state; struct dc_stream_status; @@ -112,8 +107,7 @@ struct hw_sequencer_funcs { int group_index, int group_size, struct pipe_ctx *grouped_pipes[]); void (*setup_periodic_interrupt)(struct dc *dc, - struct pipe_ctx *pipe_ctx, - enum vline_select vline); + struct pipe_ctx *pipe_ctx); void (*set_drr)(struct pipe_ctx **pipe_ctx, int num_pipes, unsigned int vmin, unsigned int vmax, unsigned int vmid, unsigned int vmid_frame_number); From 82e0d91484f79475b59a586be8815098a0e7b1e2 Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Mon, 25 Jul 2022 18:16:22 +0800 Subject: [PATCH 0340/1477] ARM: dts: imx7d-sdb: config the max pressure for tsc2046 [ Upstream commit e7c4ebe2f9cd68588eb24ba4ed122e696e2d5272 ] Use the general touchscreen method to config the max pressure for touch tsc2046(data sheet suggest 8 bit pressure), otherwise, for ABS_PRESSURE, when config the same max and min value, weston will meet the following issue, [17:19:39.183] event1 - ADS7846 Touchscreen: is tagged by udev as: Touchscreen [17:19:39.183] event1 - ADS7846 Touchscreen: kernel bug: device has min == max on ABS_PRESSURE [17:19:39.183] event1 - ADS7846 Touchscreen: was rejected [17:19:39.183] event1 - not using input device '/dev/input/event1' This will then cause the APP weston-touch-calibrator can't list touch devices. root@imx6ul7d:~# weston-touch-calibrator could not load cursor 'dnd-move' could not load cursor 'dnd-copy' could not load cursor 'dnd-none' No devices listed. And accroding to binding Doc, "ti,x-max", "ti,y-max", "ti,pressure-max" belong to the deprecated properties, so remove them. Also for "ti,x-min", "ti,y-min", "ti,x-plate-ohms", the value set in dts equal to the default value in driver, so are redundant, also remove here. Signed-off-by: Haibo Chen Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/imx7d-sdb.dts | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/arch/arm/boot/dts/imx7d-sdb.dts b/arch/arm/boot/dts/imx7d-sdb.dts index 6823b9f1a2a3..6d562ebe9029 100644 --- a/arch/arm/boot/dts/imx7d-sdb.dts +++ b/arch/arm/boot/dts/imx7d-sdb.dts @@ -199,12 +199,7 @@ interrupt-parent = <&gpio2>; interrupts = <29 0>; pendown-gpio = <&gpio2 29 GPIO_ACTIVE_HIGH>; - ti,x-min = /bits/ 16 <0>; - ti,x-max = /bits/ 16 <0>; - ti,y-min = /bits/ 16 <0>; - ti,y-max = /bits/ 16 <0>; - ti,pressure-max = /bits/ 16 <0>; - ti,x-plate-ohms = /bits/ 16 <400>; + touchscreen-max-pressure = <255>; wakeup-source; }; }; From 2763a3b43ac3a71a73944b9f2ce472931d152a1e Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Fri, 26 Aug 2022 07:53:31 +0200 Subject: [PATCH 0341/1477] ARM: dts: imx6q: add missing properties for sram [ Upstream commit b11d083c5dcec7c42fe982c854706d404ddd3a5f ] All 3 properties are required by sram.yaml. Fixes the dtbs_check warning: sram@900000: '#address-cells' is a required property sram@900000: '#size-cells' is a required property sram@900000: 'ranges' is a required property Signed-off-by: Alexander Stein Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/imx6q.dtsi | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/boot/dts/imx6q.dtsi b/arch/arm/boot/dts/imx6q.dtsi index 5277e3903291..afec1677e6ba 100644 --- a/arch/arm/boot/dts/imx6q.dtsi +++ b/arch/arm/boot/dts/imx6q.dtsi @@ -163,6 +163,9 @@ ocram: sram@900000 { compatible = "mmio-sram"; reg = <0x00900000 0x40000>; + ranges = <0 0x00900000 0x40000>; + #address-cells = <1>; + #size-cells = <1>; clocks = <&clks IMX6QDL_CLK_OCRAM>; }; From 0c3a0b3d5e9c681b19c2d845714bdd7f906dd5e7 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Fri, 26 Aug 2022 07:53:32 +0200 Subject: [PATCH 0342/1477] ARM: dts: imx6dl: add missing properties for sram [ Upstream commit f5848b95633d598bacf0500e0108dc5961af88c0 ] All 3 properties are required by sram.yaml. Fixes the dtbs_check warning: sram@900000: '#address-cells' is a required property sram@900000: '#size-cells' is a required property sram@900000: 'ranges' is a required property Signed-off-by: Alexander Stein Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/imx6dl.dtsi | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/boot/dts/imx6dl.dtsi b/arch/arm/boot/dts/imx6dl.dtsi index fdd81fdc3f35..cd3183c36488 100644 --- a/arch/arm/boot/dts/imx6dl.dtsi +++ b/arch/arm/boot/dts/imx6dl.dtsi @@ -84,6 +84,9 @@ ocram: sram@900000 { compatible = "mmio-sram"; reg = <0x00900000 0x20000>; + ranges = <0 0x00900000 0x20000>; + #address-cells = <1>; + #size-cells = <1>; clocks = <&clks IMX6QDL_CLK_OCRAM>; }; From 2829b6ad30c20c5dc7911c31b23e01ca53cc2056 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Fri, 26 Aug 2022 07:53:33 +0200 Subject: [PATCH 0343/1477] ARM: dts: imx6qp: add missing properties for sram [ Upstream commit 088fe5237435ee2f7ed4450519b2ef58b94c832f ] All 3 properties are required by sram.yaml. Fixes the dtbs_check warning: sram@940000: '#address-cells' is a required property sram@940000: '#size-cells' is a required property sram@940000: 'ranges' is a required property Signed-off-by: Alexander Stein Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/imx6qp.dtsi | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm/boot/dts/imx6qp.dtsi b/arch/arm/boot/dts/imx6qp.dtsi index b310f13a53f2..4d23c92aa8a6 100644 --- a/arch/arm/boot/dts/imx6qp.dtsi +++ b/arch/arm/boot/dts/imx6qp.dtsi @@ -9,12 +9,18 @@ ocram2: sram@940000 { compatible = "mmio-sram"; reg = <0x00940000 0x20000>; + ranges = <0 0x00940000 0x20000>; + #address-cells = <1>; + #size-cells = <1>; clocks = <&clks IMX6QDL_CLK_OCRAM>; }; ocram3: sram@960000 { compatible = "mmio-sram"; reg = <0x00960000 0x20000>; + ranges = <0 0x00960000 0x20000>; + #address-cells = <1>; + #size-cells = <1>; clocks = <&clks IMX6QDL_CLK_OCRAM>; }; From 9735f2b62be928d96414ecadcf79ea45032ef059 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Fri, 26 Aug 2022 07:53:34 +0200 Subject: [PATCH 0344/1477] ARM: dts: imx6sl: add missing properties for sram [ Upstream commit 60c9213a1d9941a8b33db570796c3f9be8984974 ] All 3 properties are required by sram.yaml. Fixes the dtbs_check warning: sram@900000: '#address-cells' is a required property sram@900000: '#size-cells' is a required property sram@900000: 'ranges' is a required property Signed-off-by: Alexander Stein Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/imx6sl.dtsi | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/boot/dts/imx6sl.dtsi b/arch/arm/boot/dts/imx6sl.dtsi index 91a8c54d5e11..c184a6d5bc42 100644 --- a/arch/arm/boot/dts/imx6sl.dtsi +++ b/arch/arm/boot/dts/imx6sl.dtsi @@ -114,6 +114,9 @@ ocram: sram@900000 { compatible = "mmio-sram"; reg = <0x00900000 0x20000>; + ranges = <0 0x00900000 0x20000>; + #address-cells = <1>; + #size-cells = <1>; clocks = <&clks IMX6SL_CLK_OCRAM>; }; From 9d3ca48722d37243edd8dc892a272cc3540c3d3f Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Fri, 26 Aug 2022 07:53:35 +0200 Subject: [PATCH 0345/1477] ARM: dts: imx6sll: add missing properties for sram [ Upstream commit 7492a83ed9b7a151e2dd11d64b06da7a7f0fa7f9 ] All 3 properties are required by sram.yaml. Fixes the dtbs_check warning: sram@900000: '#address-cells' is a required property sram@900000: '#size-cells' is a required property sram@900000: 'ranges' is a required property Signed-off-by: Alexander Stein Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/imx6sll.dtsi | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/boot/dts/imx6sll.dtsi b/arch/arm/boot/dts/imx6sll.dtsi index 0b622201a1f3..bf5b262b91f9 100644 --- a/arch/arm/boot/dts/imx6sll.dtsi +++ b/arch/arm/boot/dts/imx6sll.dtsi @@ -115,6 +115,9 @@ ocram: sram@900000 { compatible = "mmio-sram"; reg = <0x00900000 0x20000>; + ranges = <0 0x00900000 0x20000>; + #address-cells = <1>; + #size-cells = <1>; }; intc: interrupt-controller@a01000 { From 017cabfb3f869aed2cb62b9bebf46348ad0326ae Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Fri, 26 Aug 2022 07:53:36 +0200 Subject: [PATCH 0346/1477] ARM: dts: imx6sx: add missing properties for sram [ Upstream commit 415432c008b2bce8138841356ba444631cabaa50 ] All 3 properties are required by sram.yaml. Fixes the dtbs_check warning: sram@900000: '#address-cells' is a required property sram@900000: '#size-cells' is a required property sram@900000: 'ranges' is a required property Signed-off-by: Alexander Stein Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/imx6sx.dtsi | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm/boot/dts/imx6sx.dtsi b/arch/arm/boot/dts/imx6sx.dtsi index dfdca1804f9f..c399919943c3 100644 --- a/arch/arm/boot/dts/imx6sx.dtsi +++ b/arch/arm/boot/dts/imx6sx.dtsi @@ -161,12 +161,18 @@ ocram_s: sram@8f8000 { compatible = "mmio-sram"; reg = <0x008f8000 0x4000>; + ranges = <0 0x008f8000 0x4000>; + #address-cells = <1>; + #size-cells = <1>; clocks = <&clks IMX6SX_CLK_OCRAM_S>; }; ocram: sram@900000 { compatible = "mmio-sram"; reg = <0x00900000 0x20000>; + ranges = <0 0x00900000 0x20000>; + #address-cells = <1>; + #size-cells = <1>; clocks = <&clks IMX6SX_CLK_OCRAM>; }; From 451ce2521c2191054c578ba715da29db5a7720a7 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 29 Aug 2022 17:06:56 +0100 Subject: [PATCH 0347/1477] kselftest/arm64: Fix validatation termination record after EXTRA_CONTEXT [ Upstream commit 5c152c2f66f9368394b89ac90dc7483476ef7b88 ] When arm64 signal context data overflows the base struct sigcontext it gets placed in an extra buffer pointed to by a record of type EXTRA_CONTEXT in the base struct sigcontext which is required to be the last record in the base struct sigframe. The current validation code attempts to check this by using GET_RESV_NEXT_HEAD() to step forward from the current record to the next but that is a macro which assumes it is being provided with a struct _aarch64_ctx and uses the size there to skip forward to the next record. Instead validate_extra_context() passes it a struct extra_context which has a separate size field. This compiles but results in us trying to validate a termination record in completely the wrong place, at best failing validation and at worst just segfaulting. Fix this by passing the struct _aarch64_ctx we meant to into the macro. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220829160703.874492-4-broonie@kernel.org Signed-off-by: Catalin Marinas Signed-off-by: Sasha Levin --- tools/testing/selftests/arm64/signal/testcases/testcases.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.c b/tools/testing/selftests/arm64/signal/testcases/testcases.c index 61ebcdf63831..a3ac5c2d8aac 100644 --- a/tools/testing/selftests/arm64/signal/testcases/testcases.c +++ b/tools/testing/selftests/arm64/signal/testcases/testcases.c @@ -33,7 +33,7 @@ bool validate_extra_context(struct extra_context *extra, char **err) return false; fprintf(stderr, "Validating EXTRA...\n"); - term = GET_RESV_NEXT_HEAD(extra); + term = GET_RESV_NEXT_HEAD(&extra->head); if (!term || term->magic || term->size) { *err = "Missing terminator after EXTRA context"; return false; From 8f554dd23c18fa4a1ca77c8ce2a49e6fb8d8f9c8 Mon Sep 17 00:00:00 2001 From: Sebastian Krzyszkowiak Date: Fri, 2 Sep 2022 10:42:13 +0200 Subject: [PATCH 0348/1477] arm64: dts: imx8mq-librem5: Add bq25895 as max17055's power supply [ Upstream commit 6effe295e1a87408033c29dbcea9d5a5c8b937d5 ] This allows the userspace to notice that there's not enough current provided to charge the battery, and also fixes issues with 0% SOC values being considered invalid. Signed-off-by: Sebastian Krzyszkowiak Signed-off-by: Martin Kepplinger Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/freescale/imx8mq-librem5.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/freescale/imx8mq-librem5.dtsi b/arch/arm64/boot/dts/freescale/imx8mq-librem5.dtsi index e3c6d1272198..325ea100969a 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq-librem5.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mq-librem5.dtsi @@ -899,6 +899,7 @@ interrupts = <20 IRQ_TYPE_LEVEL_LOW>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_gauge>; + power-supplies = <&bq25895>; maxim,over-heat-temp = <700>; maxim,over-volt = <4500>; maxim,rsns-microohm = <5000>; From 70f8b48d0b61b2b31f58cc2890ba86fdf89e5c53 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 2 Aug 2022 14:53:03 +0800 Subject: [PATCH 0349/1477] btrfs: scrub: try to fix super block errors [ Upstream commit f9eab5f0bba76742af654f33d517bf62a0db8f12 ] [BUG] The following script shows that, although scrub can detect super block errors, it never tries to fix it: mkfs.btrfs -f -d raid1 -m raid1 $dev1 $dev2 xfs_io -c "pwrite 67108864 4k" $dev2 mount $dev1 $mnt btrfs scrub start -B $dev2 btrfs scrub start -Br $dev2 umount $mnt The first scrub reports the super error correctly: scrub done for f3289218-abd3-41ac-a630-202f766c0859 Scrub started: Tue Aug 2 14:44:11 2022 Status: finished Duration: 0:00:00 Total to scrub: 1.26GiB Rate: 0.00B/s Error summary: super=1 Corrected: 0 Uncorrectable: 0 Unverified: 0 But the second read-only scrub still reports the same super error: Scrub started: Tue Aug 2 14:44:11 2022 Status: finished Duration: 0:00:00 Total to scrub: 1.26GiB Rate: 0.00B/s Error summary: super=1 Corrected: 0 Uncorrectable: 0 Unverified: 0 [CAUSE] The comments already shows that super block can be easily fixed by committing a transaction: /* * If we find an error in a super block, we just report it. * They will get written with the next transaction commit * anyway */ But the truth is, such assumption is not always true, and since scrub should try to repair every error it found (except for read-only scrub), we should really actively commit a transaction to fix this. [FIX] Just commit a transaction if we found any super block errors, after everything else is done. We cannot do this just after scrub_supers(), as btrfs_commit_transaction() will try to pause and wait for the running scrub, thus we can not call it with scrub_lock hold. Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/scrub.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 0392c556af60..88b9a5394561 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -3811,6 +3811,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, int ret; struct btrfs_device *dev; unsigned int nofs_flag; + bool need_commit = false; if (btrfs_fs_closing(fs_info)) return -EAGAIN; @@ -3924,6 +3925,12 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, */ nofs_flag = memalloc_nofs_save(); if (!is_dev_replace) { + u64 old_super_errors; + + spin_lock(&sctx->stat_lock); + old_super_errors = sctx->stat.super_errors; + spin_unlock(&sctx->stat_lock); + btrfs_info(fs_info, "scrub: started on devid %llu", devid); /* * by holding device list mutex, we can @@ -3932,6 +3939,16 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, mutex_lock(&fs_info->fs_devices->device_list_mutex); ret = scrub_supers(sctx, dev); mutex_unlock(&fs_info->fs_devices->device_list_mutex); + + spin_lock(&sctx->stat_lock); + /* + * Super block errors found, but we can not commit transaction + * at current context, since btrfs_commit_transaction() needs + * to pause the current running scrub (hold by ourselves). + */ + if (sctx->stat.super_errors > old_super_errors && !sctx->readonly) + need_commit = true; + spin_unlock(&sctx->stat_lock); } if (!ret) @@ -3958,6 +3975,25 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, scrub_workers_put(fs_info); scrub_put_ctx(sctx); + /* + * We found some super block errors before, now try to force a + * transaction commit, as scrub has finished. + */ + if (need_commit) { + struct btrfs_trans_handle *trans; + + trans = btrfs_start_transaction(fs_info->tree_root, 0); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + btrfs_err(fs_info, + "scrub: failed to start transaction to fix super block errors: %d", ret); + return ret; + } + ret = btrfs_commit_transaction(trans); + if (ret < 0) + btrfs_err(fs_info, + "scrub: failed to commit transaction to fix super block errors: %d", ret); + } return ret; out: scrub_workers_put(fs_info); From d9e2585c3bcecb1c83febad31b9f450e93d2509e Mon Sep 17 00:00:00 2001 From: Ian Nam Date: Tue, 10 May 2022 12:31:54 +0530 Subject: [PATCH 0350/1477] clk: zynqmp: Fix stack-out-of-bounds in strncpy` [ Upstream commit dd80fb2dbf1cd8751efbe4e53e54056f56a9b115 ] "BUG: KASAN: stack-out-of-bounds in strncpy+0x30/0x68" Linux-ATF interface is using 16 bytes of SMC payload. In case clock name is longer than 15 bytes, string terminated NULL character will not be received by Linux. Add explicit NULL character at last byte to fix issues when clock name is longer. This fixes below bug reported by KASAN: ================================================================== BUG: KASAN: stack-out-of-bounds in strncpy+0x30/0x68 Read of size 1 at addr ffff0008c89a7410 by task swapper/0/1 CPU: 1 PID: 1 Comm: swapper/0 Not tainted 5.4.0-00396-g81ef9e7-dirty #3 Hardware name: Xilinx Versal vck190 Eval board revA (QSPI) (DT) Call trace: dump_backtrace+0x0/0x1e8 show_stack+0x14/0x20 dump_stack+0xd4/0x108 print_address_description.isra.0+0xbc/0x37c __kasan_report+0x144/0x198 kasan_report+0xc/0x18 __asan_load1+0x5c/0x68 strncpy+0x30/0x68 zynqmp_clock_probe+0x238/0x7b8 platform_drv_probe+0x6c/0xc8 really_probe+0x14c/0x418 driver_probe_device+0x74/0x130 __device_attach_driver+0xc4/0xe8 bus_for_each_drv+0xec/0x150 __device_attach+0x160/0x1d8 device_initial_probe+0x10/0x18 bus_probe_device+0xe0/0xf0 device_add+0x528/0x950 of_device_add+0x5c/0x80 of_platform_device_create_pdata+0x120/0x168 of_platform_bus_create+0x244/0x4e0 of_platform_populate+0x50/0xe8 zynqmp_firmware_probe+0x370/0x3a8 platform_drv_probe+0x6c/0xc8 really_probe+0x14c/0x418 driver_probe_device+0x74/0x130 device_driver_attach+0x94/0xa0 __driver_attach+0x70/0x108 bus_for_each_dev+0xe4/0x158 driver_attach+0x30/0x40 bus_add_driver+0x21c/0x2b8 driver_register+0xbc/0x1d0 __platform_driver_register+0x7c/0x88 zynqmp_firmware_driver_init+0x1c/0x24 do_one_initcall+0xa4/0x234 kernel_init_freeable+0x1b0/0x24c kernel_init+0x10/0x110 ret_from_fork+0x10/0x18 The buggy address belongs to the page: page:ffff0008f9be1c88 refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 raw: 0008d00000000000 ffff0008f9be1c90 ffff0008f9be1c90 0000000000000000 raw: 0000000000000000 0000000000000000 00000000ffffffff page dumped because: kasan: bad access detected addr ffff0008c89a7410 is located in stack of task swapper/0/1 at offset 112 in frame: zynqmp_clock_probe+0x0/0x7b8 this frame has 3 objects: [32, 44) 'response' [64, 80) 'ret_payload' [96, 112) 'name' Memory state around the buggy address: ffff0008c89a7300: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ffff0008c89a7380: 00 00 00 00 f1 f1 f1 f1 00 04 f2 f2 00 00 f2 f2 >ffff0008c89a7400: 00 00 f3 f3 00 00 00 00 00 00 00 00 00 00 00 00 ^ ffff0008c89a7480: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ffff0008c89a7500: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ================================================================== Signed-off-by: Ian Nam Signed-off-by: Shubhrajyoti Datta Link: https://lore.kernel.org/r/20220510070154.29528-3-shubhrajyoti.datta@xilinx.com Acked-by: Michal Simek Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/zynqmp/clkc.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/clk/zynqmp/clkc.c b/drivers/clk/zynqmp/clkc.c index db8d0d7161ce..9c82ae240c40 100644 --- a/drivers/clk/zynqmp/clkc.c +++ b/drivers/clk/zynqmp/clkc.c @@ -687,6 +687,13 @@ static void zynqmp_get_clock_info(void) FIELD_PREP(CLK_ATTR_NODE_INDEX, i); zynqmp_pm_clock_get_name(clock[i].clk_id, &name); + + /* + * Terminate with NULL character in case name provided by firmware + * is longer and truncated due to size limit. + */ + name.name[sizeof(name.name) - 1] = '\0'; + if (!strcmp(name.name, RESERVED_CLK_NAME)) continue; strncpy(clock[i].clk_name, name.name, MAX_NAME_LEN); From c2257c8a501537afab276c306cb717b7260276e1 Mon Sep 17 00:00:00 2001 From: Zheyu Ma Date: Thu, 28 Jul 2022 04:23:38 +0200 Subject: [PATCH 0351/1477] media: cx88: Fix a null-ptr-deref bug in buffer_prepare() [ Upstream commit 2b064d91440b33fba5b452f2d1b31f13ae911d71 ] When the driver calls cx88_risc_buffer() to prepare the buffer, the function call may fail, resulting in a empty buffer and null-ptr-deref later in buffer_queue(). The following log can reveal it: [ 41.822762] general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] PREEMPT SMP KASAN PTI [ 41.824488] KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] [ 41.828027] RIP: 0010:buffer_queue+0xc2/0x500 [ 41.836311] Call Trace: [ 41.836945] __enqueue_in_driver+0x141/0x360 [ 41.837262] vb2_start_streaming+0x62/0x4a0 [ 41.838216] vb2_core_streamon+0x1da/0x2c0 [ 41.838516] __vb2_init_fileio+0x981/0xbc0 [ 41.839141] __vb2_perform_fileio+0xbf9/0x1120 [ 41.840072] vb2_fop_read+0x20e/0x400 [ 41.840346] v4l2_read+0x215/0x290 [ 41.840603] vfs_read+0x162/0x4c0 Fix this by checking the return value of cx88_risc_buffer() [hverkuil: fix coding style issues] Signed-off-by: Zheyu Ma Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/pci/cx88/cx88-vbi.c | 9 +++--- drivers/media/pci/cx88/cx88-video.c | 43 +++++++++++++++-------------- 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/drivers/media/pci/cx88/cx88-vbi.c b/drivers/media/pci/cx88/cx88-vbi.c index 58489ea0c1da..7cf2271866d0 100644 --- a/drivers/media/pci/cx88/cx88-vbi.c +++ b/drivers/media/pci/cx88/cx88-vbi.c @@ -144,11 +144,10 @@ static int buffer_prepare(struct vb2_buffer *vb) return -EINVAL; vb2_set_plane_payload(vb, 0, size); - cx88_risc_buffer(dev->pci, &buf->risc, sgt->sgl, - 0, VBI_LINE_LENGTH * lines, - VBI_LINE_LENGTH, 0, - lines); - return 0; + return cx88_risc_buffer(dev->pci, &buf->risc, sgt->sgl, + 0, VBI_LINE_LENGTH * lines, + VBI_LINE_LENGTH, 0, + lines); } static void buffer_finish(struct vb2_buffer *vb) diff --git a/drivers/media/pci/cx88/cx88-video.c b/drivers/media/pci/cx88/cx88-video.c index 8cffdacf6007..e5adffa3a99a 100644 --- a/drivers/media/pci/cx88/cx88-video.c +++ b/drivers/media/pci/cx88/cx88-video.c @@ -431,6 +431,7 @@ static int queue_setup(struct vb2_queue *q, static int buffer_prepare(struct vb2_buffer *vb) { + int ret; struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); struct cx8800_dev *dev = vb->vb2_queue->drv_priv; struct cx88_core *core = dev->core; @@ -445,35 +446,35 @@ static int buffer_prepare(struct vb2_buffer *vb) switch (core->field) { case V4L2_FIELD_TOP: - cx88_risc_buffer(dev->pci, &buf->risc, - sgt->sgl, 0, UNSET, - buf->bpl, 0, core->height); + ret = cx88_risc_buffer(dev->pci, &buf->risc, + sgt->sgl, 0, UNSET, + buf->bpl, 0, core->height); break; case V4L2_FIELD_BOTTOM: - cx88_risc_buffer(dev->pci, &buf->risc, - sgt->sgl, UNSET, 0, - buf->bpl, 0, core->height); + ret = cx88_risc_buffer(dev->pci, &buf->risc, + sgt->sgl, UNSET, 0, + buf->bpl, 0, core->height); break; case V4L2_FIELD_SEQ_TB: - cx88_risc_buffer(dev->pci, &buf->risc, - sgt->sgl, - 0, buf->bpl * (core->height >> 1), - buf->bpl, 0, - core->height >> 1); + ret = cx88_risc_buffer(dev->pci, &buf->risc, + sgt->sgl, + 0, buf->bpl * (core->height >> 1), + buf->bpl, 0, + core->height >> 1); break; case V4L2_FIELD_SEQ_BT: - cx88_risc_buffer(dev->pci, &buf->risc, - sgt->sgl, - buf->bpl * (core->height >> 1), 0, - buf->bpl, 0, - core->height >> 1); + ret = cx88_risc_buffer(dev->pci, &buf->risc, + sgt->sgl, + buf->bpl * (core->height >> 1), 0, + buf->bpl, 0, + core->height >> 1); break; case V4L2_FIELD_INTERLACED: default: - cx88_risc_buffer(dev->pci, &buf->risc, - sgt->sgl, 0, buf->bpl, - buf->bpl, buf->bpl, - core->height >> 1); + ret = cx88_risc_buffer(dev->pci, &buf->risc, + sgt->sgl, 0, buf->bpl, + buf->bpl, buf->bpl, + core->height >> 1); break; } dprintk(2, @@ -481,7 +482,7 @@ static int buffer_prepare(struct vb2_buffer *vb) buf, buf->vb.vb2_buf.index, __func__, core->width, core->height, dev->fmt->depth, dev->fmt->fourcc, (unsigned long)buf->risc.dma); - return 0; + return ret; } static void buffer_finish(struct vb2_buffer *vb) From 12d31182de8deb7a8e0be6246d661291773427c8 Mon Sep 17 00:00:00 2001 From: Quanyang Wang Date: Fri, 26 Aug 2022 22:20:30 +0800 Subject: [PATCH 0352/1477] clk: zynqmp: pll: rectify rate rounding in zynqmp_pll_round_rate [ Upstream commit 30eaf02149ecc3c5815e45d27187bf09e925071d ] The function zynqmp_pll_round_rate is used to find a most appropriate PLL frequency which the hardware can generate according to the desired frequency. For example, if the desired frequency is 297MHz, considering the limited range from PS_PLL_VCO_MIN (1.5GHz) to PS_PLL_VCO_MAX (3.0GHz) of PLL, zynqmp_pll_round_rate should return 1.872GHz (297MHz * 5). There are two problems with the current code of zynqmp_pll_round_rate: 1) When the rate is below PS_PLL_VCO_MIN, it can't find a correct rate when the parameter "rate" is an integer multiple of *prate, in other words, if "f" is zero, zynqmp_pll_round_rate won't return a valid frequency which is from PS_PLL_VCO_MIN to PS_PLL_VCO_MAX. For example, *prate is 33MHz and the rate is 660MHz, zynqmp_pll_round_rate will not boost up rate and just return 660MHz, and this will cause clk_calc_new_rates failure since zynqmp_pll_round_rate returns an invalid rate out of its boundaries. 2) Even if the rate is higher than PS_PLL_VCO_MIN, there is still a risk that zynqmp_pll_round_rate returns an invalid rate because the function DIV_ROUND_CLOSEST makes some loss in the fractional part. If the parent clock *prate is 33333333Hz and we want to set the PLL rate to 1.5GHz, this function will return 1499999985Hz by using the formula below: value = *prate * DIV_ROUND_CLOSEST(rate, *prate)). This value is also invalid since it's slightly smaller than PS_PLL_VCO_MIN. because DIV_ROUND_CLOSEST makes some loss in the fractional part. Signed-off-by: Quanyang Wang Link: https://lore.kernel.org/r/20220826142030.213805-1-quanyang.wang@windriver.com Reviewed-by: Shubhrajyoti Datta Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/zynqmp/pll.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/drivers/clk/zynqmp/pll.c b/drivers/clk/zynqmp/pll.c index abe6afbf3407..2ae7f9129b07 100644 --- a/drivers/clk/zynqmp/pll.c +++ b/drivers/clk/zynqmp/pll.c @@ -99,26 +99,25 @@ static long zynqmp_pll_round_rate(struct clk_hw *hw, unsigned long rate, unsigned long *prate) { u32 fbdiv; - long rate_div, f; + u32 mult, div; - /* Enable the fractional mode if needed */ - rate_div = (rate * FRAC_DIV) / *prate; - f = rate_div % FRAC_DIV; - if (f) { - if (rate > PS_PLL_VCO_MAX) { - fbdiv = rate / PS_PLL_VCO_MAX; - rate = rate / (fbdiv + 1); - } - if (rate < PS_PLL_VCO_MIN) { - fbdiv = DIV_ROUND_UP(PS_PLL_VCO_MIN, rate); - rate = rate * fbdiv; - } - return rate; + /* Let rate fall inside the range PS_PLL_VCO_MIN ~ PS_PLL_VCO_MAX */ + if (rate > PS_PLL_VCO_MAX) { + div = DIV_ROUND_UP(rate, PS_PLL_VCO_MAX); + rate = rate / div; + } + if (rate < PS_PLL_VCO_MIN) { + mult = DIV_ROUND_UP(PS_PLL_VCO_MIN, rate); + rate = rate * mult; } fbdiv = DIV_ROUND_CLOSEST(rate, *prate); - fbdiv = clamp_t(u32, fbdiv, PLL_FBDIV_MIN, PLL_FBDIV_MAX); - return *prate * fbdiv; + if (fbdiv < PLL_FBDIV_MIN || fbdiv > PLL_FBDIV_MAX) { + fbdiv = clamp_t(u32, fbdiv, PLL_FBDIV_MIN, PLL_FBDIV_MAX); + rate = *prate * fbdiv; + } + + return rate; } /** From c13d0d2f5a4806cef87693966091b5fd3f5c225f Mon Sep 17 00:00:00 2001 From: Justin Chen Date: Wed, 10 Aug 2022 15:27:34 -0700 Subject: [PATCH 0353/1477] usb: host: xhci-plat: suspend and resume clocks [ Upstream commit 8bd954c56197caf5e3a804d989094bc3fe6329aa ] Introduce XHCI_SUSPEND_RESUME_CLKS quirk as a means to suspend and resume clocks if the hardware is capable of doing so. We assume that clocks will be needed if the device may wake. Reviewed-by: Florian Fainelli Signed-off-by: Justin Chen Link: https://lore.kernel.org/r/1660170455-15781-2-git-send-email-justinpopo6@gmail.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/host/xhci-plat.c | 16 +++++++++++++++- drivers/usb/host/xhci.h | 1 + 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index dc570ce4e831..2687662f26b6 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -447,7 +447,16 @@ static int __maybe_unused xhci_plat_suspend(struct device *dev) * xhci_suspend() needs `do_wakeup` to know whether host is allowed * to do wakeup during suspend. */ - return xhci_suspend(xhci, device_may_wakeup(dev)); + ret = xhci_suspend(xhci, device_may_wakeup(dev)); + if (ret) + return ret; + + if (!device_may_wakeup(dev) && (xhci->quirks & XHCI_SUSPEND_RESUME_CLKS)) { + clk_disable_unprepare(xhci->clk); + clk_disable_unprepare(xhci->reg_clk); + } + + return 0; } static int __maybe_unused xhci_plat_resume(struct device *dev) @@ -456,6 +465,11 @@ static int __maybe_unused xhci_plat_resume(struct device *dev) struct xhci_hcd *xhci = hcd_to_xhci(hcd); int ret; + if (!device_may_wakeup(dev) && (xhci->quirks & XHCI_SUSPEND_RESUME_CLKS)) { + clk_prepare_enable(xhci->clk); + clk_prepare_enable(xhci->reg_clk); + } + ret = xhci_priv_resume_quirk(hcd); if (ret) return ret; diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 6f16a05b1958..e668740000b2 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1888,6 +1888,7 @@ struct xhci_hcd { #define XHCI_SG_TRB_CACHE_SIZE_QUIRK BIT_ULL(39) #define XHCI_NO_SOFT_RETRY BIT_ULL(40) #define XHCI_EP_CTX_BROKEN_DCS BIT_ULL(42) +#define XHCI_SUSPEND_RESUME_CLKS BIT_ULL(43) unsigned int num_active_eps; unsigned int limit_active_eps; From 48727117bd6243f1c7fc6ed35ad131d27c505be8 Mon Sep 17 00:00:00 2001 From: Justin Chen Date: Wed, 10 Aug 2022 15:27:35 -0700 Subject: [PATCH 0354/1477] usb: host: xhci-plat: suspend/resume clks for brcm [ Upstream commit c69400b09e471a3f1167adead55a808f0da6534a ] The xhci_plat_brcm xhci block can enter suspend with clock disabled to save power and re-enable them on resume. Make use of the XHCI_SUSPEND_RESUME_CLKS quirk to do so. Reviewed-by: Florian Fainelli Signed-off-by: Justin Chen Link: https://lore.kernel.org/r/1660170455-15781-3-git-send-email-justinpopo6@gmail.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/host/xhci-plat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index 2687662f26b6..972a44b2a7f1 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -134,7 +134,7 @@ static const struct xhci_plat_priv xhci_plat_renesas_rcar_gen3 = { }; static const struct xhci_plat_priv xhci_plat_brcm = { - .quirks = XHCI_RESET_ON_RESUME, + .quirks = XHCI_RESET_ON_RESUME | XHCI_SUSPEND_RESUME_CLKS, }; static const struct of_device_id usb_xhci_of_match[] = { From 5942e5c63dc99b51fd5cbda8aa30099e4d0b4a35 Mon Sep 17 00:00:00 2001 From: Letu Ren Date: Mon, 29 Aug 2022 19:01:15 +0800 Subject: [PATCH 0355/1477] scsi: 3w-9xxx: Avoid disabling device if failing to enable it [ Upstream commit 7eff437b5ee1309b34667844361c6bbb5c97df05 ] The original code will "goto out_disable_device" and call pci_disable_device() if pci_enable_device() fails. The kernel will generate a warning message like "3w-9xxx 0000:00:05.0: disabling already-disabled device". We shouldn't disable a device that failed to be enabled. A simple return is fine. Link: https://lore.kernel.org/r/20220829110115.38789-1-fantasquex@gmail.com Reported-by: Zheyu Ma Signed-off-by: Letu Ren Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/3w-9xxx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/3w-9xxx.c b/drivers/scsi/3w-9xxx.c index 3337b1e80412..f6f92033132a 100644 --- a/drivers/scsi/3w-9xxx.c +++ b/drivers/scsi/3w-9xxx.c @@ -2014,7 +2014,7 @@ static int twa_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id) retval = pci_enable_device(pdev); if (retval) { TW_PRINTK(host, TW_DRIVER, 0x34, "Failed to enable pci device"); - goto out_disable_device; + return -ENODEV; } pci_set_master(pdev); From b2700f98b3f4dd19fb4315b70581e5caff89eb49 Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Thu, 8 Sep 2022 01:35:02 +0900 Subject: [PATCH 0356/1477] nbd: Fix hung when signal interrupts nbd_start_device_ioctl() [ Upstream commit 1de7c3cf48fc41cd95adb12bd1ea9033a917798a ] syzbot reported hung task [1]. The following program is a simplified version of the reproducer: int main(void) { int sv[2], fd; if (socketpair(AF_UNIX, SOCK_STREAM, 0, sv) < 0) return 1; if ((fd = open("/dev/nbd0", 0)) < 0) return 1; if (ioctl(fd, NBD_SET_SIZE_BLOCKS, 0x81) < 0) return 1; if (ioctl(fd, NBD_SET_SOCK, sv[0]) < 0) return 1; if (ioctl(fd, NBD_DO_IT) < 0) return 1; return 0; } When signal interrupt nbd_start_device_ioctl() waiting the condition atomic_read(&config->recv_threads) == 0, the task can hung because it waits the completion of the inflight IOs. This patch fixes the issue by clearing queue, not just shutdown, when signal interrupt nbd_start_device_ioctl(). Link: https://syzkaller.appspot.com/bug?id=7d89a3ffacd2b83fdd39549bc4d8e0a89ef21239 [1] Reported-by: syzbot+38e6c55d4969a14c1534@syzkaller.appspotmail.com Signed-off-by: Shigeru Yoshida Reviewed-by: Josef Bacik Link: https://lore.kernel.org/r/20220907163502.577561-1-syoshida@redhat.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/nbd.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 4a6b82d434ee..b0d3dadeb964 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1342,10 +1342,12 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b mutex_unlock(&nbd->config_lock); ret = wait_event_interruptible(config->recv_wq, atomic_read(&config->recv_threads) == 0); - if (ret) + if (ret) { sock_shutdown(nbd); - flush_workqueue(nbd->recv_workq); + nbd_clear_que(nbd); + } + flush_workqueue(nbd->recv_workq); mutex_lock(&nbd->config_lock); nbd_bdev_reset(bdev); /* user requested, ignore socket errors */ From 89f305a71418591cdda18180f712f91c9820f03b Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 27 Aug 2022 07:32:23 +0000 Subject: [PATCH 0357/1477] power: supply: adp5061: fix out-of-bounds read in adp5061_get_chg_type() [ Upstream commit 9d47e01b9d807808224347935562f7043a358054 ] ADP5061_CHG_STATUS_1_CHG_STATUS is masked with 0x07, which means a length of 8, but adp5061_chg_type array size is 4, may end up reading 4 elements beyond the end of the adp5061_chg_type[] array. Signed-off-by: Wei Yongjun Acked-by: Michael Hennerich Signed-off-by: Sebastian Reichel Signed-off-by: Sasha Levin --- drivers/power/supply/adp5061.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/power/supply/adp5061.c b/drivers/power/supply/adp5061.c index 003557043ab3..daee1161c305 100644 --- a/drivers/power/supply/adp5061.c +++ b/drivers/power/supply/adp5061.c @@ -427,11 +427,11 @@ static int adp5061_get_chg_type(struct adp5061_state *st, if (ret < 0) return ret; - chg_type = adp5061_chg_type[ADP5061_CHG_STATUS_1_CHG_STATUS(status1)]; - if (chg_type > ADP5061_CHG_FAST_CV) + chg_type = ADP5061_CHG_STATUS_1_CHG_STATUS(status1); + if (chg_type >= ARRAY_SIZE(adp5061_chg_type)) val->intval = POWER_SUPPLY_STATUS_UNKNOWN; else - val->intval = chg_type; + val->intval = adp5061_chg_type[chg_type]; return ret; } From 1b3cebeca99e8e0aa4fa57faac8dbf41e967317a Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Fri, 9 Sep 2022 16:13:39 +0200 Subject: [PATCH 0358/1477] staging: vt6655: fix potential memory leak [ Upstream commit c8ff91535880d41b49699b3829fb6151942de29e ] In function device_init_td0_ring, memory is allocated for member td_info of priv->apTD0Rings[i], with i increasing from 0. In case of allocation failure, the memory is freed in reversed order, with i decreasing to 0. However, the case i=0 is left out and thus memory is leaked. Modify the memory freeing loop to include the case i=0. Tested-by: Philipp Hortmann Signed-off-by: Nam Cao Link: https://lore.kernel.org/r/20220909141338.19343-1-namcaov@gmail.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/staging/vt6655/device_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/vt6655/device_main.c b/drivers/staging/vt6655/device_main.c index 0dd70173a754..343f0de03154 100644 --- a/drivers/staging/vt6655/device_main.c +++ b/drivers/staging/vt6655/device_main.c @@ -675,7 +675,7 @@ static int device_init_td0_ring(struct vnt_private *priv) return 0; err_free_desc: - while (--i) { + while (i--) { desc = &priv->apTD0Rings[i]; kfree(desc->td_info); } From 19c010ae44f0ce52b5436080492a61a092ee0cf4 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Mon, 29 Aug 2022 10:22:38 +0800 Subject: [PATCH 0359/1477] blk-throttle: prevent overflow while calculating wait time [ Upstream commit 8d6bbaada2e0a65f9012ac4c2506460160e7237a ] There is a problem found by code review in tg_with_in_bps_limit() that 'bps_limit * jiffy_elapsed_rnd' might overflow. Fix the problem by calling mul_u64_u64_div_u64() instead. Signed-off-by: Yu Kuai Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20220829022240.3348319-3-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/blk-throttle.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/block/blk-throttle.c b/block/blk-throttle.c index c53a254171a2..c526fdd0a7b9 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -944,7 +944,7 @@ static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio, u64 bps_limit, unsigned long *wait) { bool rw = bio_data_dir(bio); - u64 bytes_allowed, extra_bytes, tmp; + u64 bytes_allowed, extra_bytes; unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd; unsigned int bio_size = throtl_bio_data_size(bio); @@ -961,10 +961,8 @@ static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio, jiffy_elapsed_rnd = tg->td->throtl_slice; jiffy_elapsed_rnd = roundup(jiffy_elapsed_rnd, tg->td->throtl_slice); - - tmp = bps_limit * jiffy_elapsed_rnd; - do_div(tmp, HZ); - bytes_allowed = tmp; + bytes_allowed = mul_u64_u64_div_u64(bps_limit, (u64)jiffy_elapsed_rnd, + (u64)HZ); if (tg->bytes_disp[rw] + bio_size <= bytes_allowed) { if (wait) From fcad2ac86399e7cc2df3170b4464821f1247c792 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Fri, 9 Sep 2022 22:36:06 +0300 Subject: [PATCH 0360/1477] ata: libahci_platform: Sanity check the DT child nodes number [ Upstream commit 3c132ea6508b34956e5ed88d04936983ec230601 ] Having greater than AHCI_MAX_PORTS (32) ports detected isn't that critical from the further AHCI-platform initialization point of view since exceeding the ports upper limit will cause allocating more resources than will be used afterwards. But detecting too many child DT-nodes doesn't seem right since it's very unlikely to have it on an ordinary platform. In accordance with the AHCI specification there can't be more than 32 ports implemented at least due to having the CAP.NP field of 5 bits wide and the PI register of dword size. Thus if such situation is found the DTB must have been corrupted and the data read from it shouldn't be reliable. Let's consider that as an erroneous situation and halt further resources allocation. Note it's logically more correct to have the nports set only after the initialization value is checked for being sane. So while at it let's make sure nports is assigned with a correct value. Signed-off-by: Serge Semin Reviewed-by: Hannes Reinecke Signed-off-by: Damien Le Moal Signed-off-by: Sasha Levin --- drivers/ata/libahci_platform.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c index 0910441321f7..64d6da0a5303 100644 --- a/drivers/ata/libahci_platform.c +++ b/drivers/ata/libahci_platform.c @@ -451,14 +451,24 @@ struct ahci_host_priv *ahci_platform_get_resources(struct platform_device *pdev, } } - hpriv->nports = child_nodes = of_get_child_count(dev->of_node); + /* + * Too many sub-nodes most likely means having something wrong with + * the firmware. + */ + child_nodes = of_get_child_count(dev->of_node); + if (child_nodes > AHCI_MAX_PORTS) { + rc = -EINVAL; + goto err_out; + } /* * If no sub-node was found, we still need to set nports to * one in order to be able to use the * ahci_platform_[en|dis]able_[phys|regulators] functions. */ - if (!child_nodes) + if (child_nodes) + hpriv->nports = child_nodes; + else hpriv->nports = 1; hpriv->phys = devm_kcalloc(dev, hpriv->nports, sizeof(*hpriv->phys), GFP_KERNEL); From c263516c2c20df9c29f33baeb4a817af7212fb69 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Tue, 20 Sep 2022 00:16:47 +0800 Subject: [PATCH 0361/1477] bcache: fix set_at_max_writeback_rate() for multiple attached devices [ Upstream commit d2d05b88035d2d51a5bb6c5afec88a0880c73df4 ] Inside set_at_max_writeback_rate() the calculation in following if() check is wrong, if (atomic_inc_return(&c->idle_counter) < atomic_read(&c->attached_dev_nr) * 6) Because each attached backing device has its own writeback thread running and increasing c->idle_counter, the counter increates much faster than expected. The correct calculation should be, (counter / dev_nr) < dev_nr * 6 which equals to, counter < dev_nr * dev_nr * 6 This patch fixes the above mistake with correct calculation, and helper routine idle_counter_exceeded() is added to make code be more clear. Reported-by: Mingzhe Zou Signed-off-by: Coly Li Acked-by: Mingzhe Zou Link: https://lore.kernel.org/r/20220919161647.81238-6-colyli@suse.de Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/md/bcache/writeback.c | 73 +++++++++++++++++++++++++---------- 1 file changed, 52 insertions(+), 21 deletions(-) diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index a878b959fbcd..3aa73da2c67b 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -119,6 +119,53 @@ static void __update_writeback_rate(struct cached_dev *dc) dc->writeback_rate_target = target; } +static bool idle_counter_exceeded(struct cache_set *c) +{ + int counter, dev_nr; + + /* + * If c->idle_counter is overflow (idel for really long time), + * reset as 0 and not set maximum rate this time for code + * simplicity. + */ + counter = atomic_inc_return(&c->idle_counter); + if (counter <= 0) { + atomic_set(&c->idle_counter, 0); + return false; + } + + dev_nr = atomic_read(&c->attached_dev_nr); + if (dev_nr == 0) + return false; + + /* + * c->idle_counter is increased by writeback thread of all + * attached backing devices, in order to represent a rough + * time period, counter should be divided by dev_nr. + * Otherwise the idle time cannot be larger with more backing + * device attached. + * The following calculation equals to checking + * (counter / dev_nr) < (dev_nr * 6) + */ + if (counter < (dev_nr * dev_nr * 6)) + return false; + + return true; +} + +/* + * Idle_counter is increased every time when update_writeback_rate() is + * called. If all backing devices attached to the same cache set have + * identical dc->writeback_rate_update_seconds values, it is about 6 + * rounds of update_writeback_rate() on each backing device before + * c->at_max_writeback_rate is set to 1, and then max wrteback rate set + * to each dc->writeback_rate.rate. + * In order to avoid extra locking cost for counting exact dirty cached + * devices number, c->attached_dev_nr is used to calculate the idle + * throushold. It might be bigger if not all cached device are in write- + * back mode, but it still works well with limited extra rounds of + * update_writeback_rate(). + */ static bool set_at_max_writeback_rate(struct cache_set *c, struct cached_dev *dc) { @@ -129,21 +176,8 @@ static bool set_at_max_writeback_rate(struct cache_set *c, /* Don't set max writeback rate if gc is running */ if (!c->gc_mark_valid) return false; - /* - * Idle_counter is increased everytime when update_writeback_rate() is - * called. If all backing devices attached to the same cache set have - * identical dc->writeback_rate_update_seconds values, it is about 6 - * rounds of update_writeback_rate() on each backing device before - * c->at_max_writeback_rate is set to 1, and then max wrteback rate set - * to each dc->writeback_rate.rate. - * In order to avoid extra locking cost for counting exact dirty cached - * devices number, c->attached_dev_nr is used to calculate the idle - * throushold. It might be bigger if not all cached device are in write- - * back mode, but it still works well with limited extra rounds of - * update_writeback_rate(). - */ - if (atomic_inc_return(&c->idle_counter) < - atomic_read(&c->attached_dev_nr) * 6) + + if (!idle_counter_exceeded(c)) return false; if (atomic_read(&c->at_max_writeback_rate) != 1) @@ -157,13 +191,10 @@ static bool set_at_max_writeback_rate(struct cache_set *c, dc->writeback_rate_change = 0; /* - * Check c->idle_counter and c->at_max_writeback_rate agagain in case - * new I/O arrives during before set_at_max_writeback_rate() returns. - * Then the writeback rate is set to 1, and its new value should be - * decided via __update_writeback_rate(). + * In case new I/O arrives during before + * set_at_max_writeback_rate() returns. */ - if ((atomic_read(&c->idle_counter) < - atomic_read(&c->attached_dev_nr) * 6) || + if (!idle_counter_exceeded(c) || !atomic_read(&c->at_max_writeback_rate)) return false; From f04a673d4a27438d19b08a4d2842c5426b1cbeb5 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Fri, 16 Sep 2022 11:35:05 +0100 Subject: [PATCH 0362/1477] soundwire: cadence: Don't overwrite msg->buf during write commands [ Upstream commit ba05b39d265bdd16913f7684600d9d41e2796745 ] The buf passed in struct sdw_msg must only be written for a READ, in that case the RDATA part of the response is the data value of the register. For a write command there is no RDATA, and buf should be assumed to be const and unmodifable. The original caller should not expect its data buffer to be corrupted by an sdw_nwrite(). Signed-off-by: Richard Fitzgerald Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20220916103505.1562210-1-rf@opensource.cirrus.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/soundwire/cadence_master.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/soundwire/cadence_master.c b/drivers/soundwire/cadence_master.c index c6d421a4b91b..a3247692ddc0 100644 --- a/drivers/soundwire/cadence_master.c +++ b/drivers/soundwire/cadence_master.c @@ -501,9 +501,12 @@ cdns_fill_msg_resp(struct sdw_cdns *cdns, return SDW_CMD_IGNORED; } - /* fill response */ - for (i = 0; i < count; i++) - msg->buf[i + offset] = FIELD_GET(CDNS_MCP_RESP_RDATA, cdns->response_buf[i]); + if (msg->flags == SDW_MSG_FLAG_READ) { + /* fill response */ + for (i = 0; i < count; i++) + msg->buf[i + offset] = FIELD_GET(CDNS_MCP_RESP_RDATA, + cdns->response_buf[i]); + } return SDW_CMD_OK; } From f00c049ede46226ee4ececbbcb547abe35b1a45b Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Tue, 20 Sep 2022 01:57:11 +0800 Subject: [PATCH 0363/1477] soundwire: intel: fix error handling on dai registration issues [ Upstream commit c6867cda906aadbce5e71efde9c78a26108b2bad ] The call to intel_register_dai() may fail because of memory allocation issues or problems reported by the ASoC core. In all cases, when a error is thrown the component is not registered, it's invalid to unregister it. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Rander Wang Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20220919175721.354679-2-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/soundwire/intel.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/soundwire/intel.c b/drivers/soundwire/intel.c index 824d9f900aca..942d2fe13218 100644 --- a/drivers/soundwire/intel.c +++ b/drivers/soundwire/intel.c @@ -1470,7 +1470,6 @@ int intel_master_startup(struct platform_device *pdev) ret = intel_register_dai(sdw); if (ret) { dev_err(dev, "DAI registration failed: %d\n", ret); - snd_soc_unregister_component(dev); goto err_interrupt; } From dbcca76435a606a352c794956e6df62eedd3a353 Mon Sep 17 00:00:00 2001 From: Hyunwoo Kim Date: Sun, 4 Sep 2022 12:31:15 -0700 Subject: [PATCH 0364/1477] HID: roccat: Fix use-after-free in roccat_read() [ Upstream commit cacdb14b1c8d3804a3a7d31773bc7569837b71a4 ] roccat_report_event() is responsible for registering roccat-related reports in struct roccat_device. int roccat_report_event(int minor, u8 const *data) { struct roccat_device *device; struct roccat_reader *reader; struct roccat_report *report; uint8_t *new_value; device = devices[minor]; new_value = kmemdup(data, device->report_size, GFP_ATOMIC); if (!new_value) return -ENOMEM; report = &device->cbuf[device->cbuf_end]; /* passing NULL is safe */ kfree(report->value); ... The registered report is stored in the struct roccat_device member "struct roccat_report cbuf[ROCCAT_CBUF_SIZE];". If more reports are received than the "ROCCAT_CBUF_SIZE" value, kfree() the saved report from cbuf[0] and allocates a new reprot. Since there is no lock when this kfree() is performed, kfree() can be performed even while reading the saved report. static ssize_t roccat_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) { struct roccat_reader *reader = file->private_data; struct roccat_device *device = reader->device; struct roccat_report *report; ssize_t retval = 0, len; DECLARE_WAITQUEUE(wait, current); mutex_lock(&device->cbuf_lock); ... report = &device->cbuf[reader->cbuf_start]; /* * If report is larger than requested amount of data, rest of report * is lost! */ len = device->report_size > count ? count : device->report_size; if (copy_to_user(buffer, report->value, len)) { retval = -EFAULT; goto exit_unlock; } ... The roccat_read() function receives the device->cbuf report and delivers it to the user through copy_to_user(). If the N+ROCCAT_CBUF_SIZE th report is received while copying of the Nth report->value is in progress, the pointer that copy_to_user() is working on is kfree()ed and UAF read may occur. (race condition) Since the device node of this driver does not set separate permissions, this is not a security vulnerability, but because it is used for requesting screen display of profile or dpi settings, a user using the roccat device can apply udev to this device node or There is a possibility to use it by giving. Signed-off-by: Hyunwoo Kim Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-roccat.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/hid/hid-roccat.c b/drivers/hid/hid-roccat.c index 26373b82fe81..6da80e442fdd 100644 --- a/drivers/hid/hid-roccat.c +++ b/drivers/hid/hid-roccat.c @@ -257,6 +257,8 @@ int roccat_report_event(int minor, u8 const *data) if (!new_value) return -ENOMEM; + mutex_lock(&device->cbuf_lock); + report = &device->cbuf[device->cbuf_end]; /* passing NULL is safe */ @@ -276,6 +278,8 @@ int roccat_report_event(int minor, u8 const *data) reader->cbuf_start = (reader->cbuf_start + 1) % ROCCAT_CBUF_SIZE; } + mutex_unlock(&device->cbuf_lock); + wake_up_interruptible(&device->wait); return 0; } From 782b3e71c957991ac8ae53318bc369049d49bb53 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Wed, 21 Sep 2022 10:28:37 -0600 Subject: [PATCH 0365/1477] md/raid5: Wait for MD_SB_CHANGE_PENDING in raid5d [ Upstream commit 5e2cf333b7bd5d3e62595a44d598a254c697cd74 ] A complicated deadlock exists when using the journal and an elevated group_thrtead_cnt. It was found with loop devices, but its not clear whether it can be seen with real disks. The deadlock can occur simply by writing data with an fio script. When the deadlock occurs, multiple threads will hang in different ways: 1) The group threads will hang in the blk-wbt code with bios waiting to be submitted to the block layer: io_schedule+0x70/0xb0 rq_qos_wait+0x153/0x210 wbt_wait+0x115/0x1b0 io_schedule+0x70/0xb0 rq_qos_wait+0x153/0x210 wbt_wait+0x115/0x1b0 __rq_qos_throttle+0x38/0x60 blk_mq_submit_bio+0x589/0xcd0 wbt_wait+0x115/0x1b0 __rq_qos_throttle+0x38/0x60 blk_mq_submit_bio+0x589/0xcd0 __submit_bio+0xe6/0x100 submit_bio_noacct_nocheck+0x42e/0x470 submit_bio_noacct+0x4c2/0xbb0 ops_run_io+0x46b/0x1a30 handle_stripe+0xcd3/0x36b0 handle_active_stripes.constprop.0+0x6f6/0xa60 raid5_do_work+0x177/0x330 Or: io_schedule+0x70/0xb0 rq_qos_wait+0x153/0x210 wbt_wait+0x115/0x1b0 __rq_qos_throttle+0x38/0x60 blk_mq_submit_bio+0x589/0xcd0 __submit_bio+0xe6/0x100 submit_bio_noacct_nocheck+0x42e/0x470 submit_bio_noacct+0x4c2/0xbb0 flush_deferred_bios+0x136/0x170 raid5_do_work+0x262/0x330 2) The r5l_reclaim thread will hang in the same way, submitting a bio to the block layer: io_schedule+0x70/0xb0 rq_qos_wait+0x153/0x210 wbt_wait+0x115/0x1b0 __rq_qos_throttle+0x38/0x60 blk_mq_submit_bio+0x589/0xcd0 __submit_bio+0xe6/0x100 submit_bio_noacct_nocheck+0x42e/0x470 submit_bio_noacct+0x4c2/0xbb0 submit_bio+0x3f/0xf0 md_super_write+0x12f/0x1b0 md_update_sb.part.0+0x7c6/0xff0 md_update_sb+0x30/0x60 r5l_do_reclaim+0x4f9/0x5e0 r5l_reclaim_thread+0x69/0x30b However, before hanging, the MD_SB_CHANGE_PENDING flag will be set for sb_flags in r5l_write_super_and_discard_space(). This flag will never be cleared because the submit_bio() call never returns. 3) Due to the MD_SB_CHANGE_PENDING flag being set, handle_stripe() will do no processing on any pending stripes and re-set STRIPE_HANDLE. This will cause the raid5d thread to enter an infinite loop, constantly trying to handle the same stripes stuck in the queue. The raid5d thread has a blk_plug that holds a number of bios that are also stuck waiting seeing the thread is in a loop that never schedules. These bios have been accounted for by blk-wbt thus preventing the other threads above from continuing when they try to submit bios. --Deadlock. To fix this, add the same wait_event() that is used in raid5_do_work() to raid5d() such that if MD_SB_CHANGE_PENDING is set, the thread will schedule and wait until the flag is cleared. The schedule action will flush the plug which will allow the r5l_reclaim thread to continue, thus preventing the deadlock. However, md_check_recovery() calls can also clear MD_SB_CHANGE_PENDING from the same thread and can thus deadlock if the thread is put to sleep. So avoid waiting if md_check_recovery() is being called in the loop. It's not clear when the deadlock was introduced, but the similar wait_event() call in raid5_do_work() was added in 2017 by this commit: 16d997b78b15 ("md/raid5: simplfy delaying of writes while metadata is updated.") Link: https://lore.kernel.org/r/7f3b87b6-b52a-f737-51d7-a4eec5c44112@deltatee.com Signed-off-by: Logan Gunthorpe Signed-off-by: Song Liu Signed-off-by: Sasha Levin --- drivers/md/raid5.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 00995e60d46b..9f114b9d8dc6 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -36,6 +36,7 @@ */ #include +#include #include #include #include @@ -6519,7 +6520,18 @@ static void raid5d(struct md_thread *thread) spin_unlock_irq(&conf->device_lock); md_check_recovery(mddev); spin_lock_irq(&conf->device_lock); + + /* + * Waiting on MD_SB_CHANGE_PENDING below may deadlock + * seeing md_check_recovery() is needed to clear + * the flag when using mdmon. + */ + continue; } + + wait_event_lock_irq(mddev->sb_wait, + !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags), + conf->device_lock); } pr_debug("%d stripes handled\n", handled); From 91271a3e772e180bbb8afb114c72fd294a02f93d Mon Sep 17 00:00:00 2001 From: Jianglei Nie Date: Wed, 21 Sep 2022 15:34:45 +0300 Subject: [PATCH 0366/1477] usb: host: xhci: Fix potential memory leak in xhci_alloc_stream_info() [ Upstream commit 7e271f42a5cc3768cd2622b929ba66859ae21f97 ] xhci_alloc_stream_info() allocates stream context array for stream_info ->stream_ctx_array with xhci_alloc_stream_ctx(). When some error occurs, stream_info->stream_ctx_array is not released, which will lead to a memory leak. We can fix it by releasing the stream_info->stream_ctx_array with xhci_free_stream_ctx() on the error path to avoid the potential memory leak. Signed-off-by: Jianglei Nie Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20220921123450.671459-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/host/xhci-mem.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 024e8911df34..1fba5605a88e 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -659,7 +659,7 @@ struct xhci_stream_info *xhci_alloc_stream_info(struct xhci_hcd *xhci, num_stream_ctxs, &stream_info->ctx_array_dma, mem_flags); if (!stream_info->stream_ctx_array) - goto cleanup_ctx; + goto cleanup_ring_array; memset(stream_info->stream_ctx_array, 0, sizeof(struct xhci_stream_ctx)*num_stream_ctxs); @@ -720,6 +720,11 @@ cleanup_rings: } xhci_free_command(xhci, stream_info->free_streams_command); cleanup_ctx: + xhci_free_stream_ctx(xhci, + stream_info->num_stream_ctxs, + stream_info->stream_ctx_array, + stream_info->ctx_array_dma); +cleanup_ring_array: kfree(stream_info->stream_rings); cleanup_info: kfree(stream_info); From acf0006f2b2b2ca672988875fd154429aafb2a9b Mon Sep 17 00:00:00 2001 From: Robin Guo Date: Tue, 6 Sep 2022 10:21:19 +0800 Subject: [PATCH 0367/1477] usb: musb: Fix musb_gadget.c rxstate overflow bug [ Upstream commit eea4c860c3b366369eff0489d94ee4f0571d467d ] The usb function device call musb_gadget_queue() adds the passed request to musb_ep::req_list,If the (request->length > musb_ep->packet_sz) and (is_buffer_mapped(req) return false),the rxstate() will copy all data in fifo to request->buf which may cause request->buf out of bounds. Fix it by add the length check : fifocnt = min_t(unsigned, request->length - request->actual, fifocnt); Signed-off-by: Robin Guo Link: https://lore.kernel.org/r/20220906102119.1b071d07a8391ff115e6d1ef@inspur.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/musb/musb_gadget.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c index fb806b33178a..c273eee35aaa 100644 --- a/drivers/usb/musb/musb_gadget.c +++ b/drivers/usb/musb/musb_gadget.c @@ -760,6 +760,9 @@ static void rxstate(struct musb *musb, struct musb_request *req) musb_writew(epio, MUSB_RXCSR, csr); buffer_aint_mapped: + fifo_count = min_t(unsigned int, + request->length - request->actual, + (unsigned int)fifo_count); musb_read_fifo(musb_ep->hw_ep, fifo_count, (u8 *) (request->buf + request->actual)); request->actual += fifo_count; From 3a5a34ed9d68ffb008a60946ac1619b40a111716 Mon Sep 17 00:00:00 2001 From: sunghwan jung Date: Tue, 13 Sep 2022 20:49:13 +0900 Subject: [PATCH 0368/1477] Revert "usb: storage: Add quirk for Samsung Fit flash" [ Upstream commit ad5dbfc123e6ffbbde194e2a4603323e09f741ee ] This reverts commit 86d92f5465958752481269348d474414dccb1552, which fix the timeout issue for "Samsung Fit Flash". But the commit affects not only "Samsung Fit Flash" but also other usb storages that use the same controller and causes severe performance regression. # hdparm -t /dev/sda (without the quirk) Timing buffered disk reads: 622 MB in 3.01 seconds = 206.66 MB/sec # hdparm -t /dev/sda (with the quirk) Timing buffered disk reads: 220 MB in 3.00 seconds = 73.32 MB/sec The commit author mentioned that "Issue was reproduced after device has bad block", so this quirk should be applied when we have the timeout issue with a device that has bad blocks. We revert the commit so that we apply this quirk by adding kernel paramters using a bootloader or other ways when we really need it, without the performance regression with devices that don't have the issue. Signed-off-by: sunghwan jung Link: https://lore.kernel.org/r/20220913114913.3073-1-onenowy@gmail.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/storage/unusual_devs.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index 4993227ab293..20dcbccb290b 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -1275,12 +1275,6 @@ UNUSUAL_DEV( 0x090a, 0x1200, 0x0000, 0x9999, USB_SC_RBC, USB_PR_BULK, NULL, 0 ), -UNUSUAL_DEV(0x090c, 0x1000, 0x1100, 0x1100, - "Samsung", - "Flash Drive FIT", - USB_SC_DEVICE, USB_PR_DEVICE, NULL, - US_FL_MAX_SECTORS_64), - /* aeb */ UNUSUAL_DEV( 0x090c, 0x1132, 0x0000, 0xffff, "Feiya", From e6cc39db24a63f68314473621020ed8cad7be423 Mon Sep 17 00:00:00 2001 From: Xiaoke Wang Date: Fri, 9 Sep 2022 19:27:21 +0800 Subject: [PATCH 0369/1477] staging: rtl8723bs: fix a potential memory leak in rtw_init_cmd_priv() [ Upstream commit 708056fba733a73d926772ea4ce9a42d240345da ] In rtw_init_cmd_priv(), if `pcmdpriv->rsp_allocated_buf` is allocated in failure, then `pcmdpriv->cmd_allocated_buf` will be not properly released. Besides, considering there are only two error paths and the first one can directly return, so we do not need implicitly jump to the `exit` tag to execute the error handler. So this patch added `kfree(pcmdpriv->cmd_allocated_buf);` on the error path to release the resource and simplified the return logic of rtw_init_cmd_priv(). As there is no proper device to test with, no runtime testing was performed. Signed-off-by: Xiaoke Wang Link: https://lore.kernel.org/r/tencent_2B7931B79BA38E22205C5A09EFDF11E48805@qq.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/staging/rtl8723bs/core/rtw_cmd.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/staging/rtl8723bs/core/rtw_cmd.c b/drivers/staging/rtl8723bs/core/rtw_cmd.c index 2abe205e3453..cee05385f872 100644 --- a/drivers/staging/rtl8723bs/core/rtw_cmd.c +++ b/drivers/staging/rtl8723bs/core/rtw_cmd.c @@ -165,8 +165,6 @@ No irqsave is necessary. int rtw_init_cmd_priv(struct cmd_priv *pcmdpriv) { - int res = 0; - init_completion(&pcmdpriv->cmd_queue_comp); init_completion(&pcmdpriv->terminate_cmdthread_comp); @@ -178,18 +176,16 @@ int rtw_init_cmd_priv(struct cmd_priv *pcmdpriv) pcmdpriv->cmd_allocated_buf = rtw_zmalloc(MAX_CMDSZ + CMDBUFF_ALIGN_SZ); - if (!pcmdpriv->cmd_allocated_buf) { - res = -ENOMEM; - goto exit; - } + if (!pcmdpriv->cmd_allocated_buf) + return -ENOMEM; pcmdpriv->cmd_buf = pcmdpriv->cmd_allocated_buf + CMDBUFF_ALIGN_SZ - ((SIZE_PTR)(pcmdpriv->cmd_allocated_buf) & (CMDBUFF_ALIGN_SZ-1)); pcmdpriv->rsp_allocated_buf = rtw_zmalloc(MAX_RSPSZ + 4); if (!pcmdpriv->rsp_allocated_buf) { - res = -ENOMEM; - goto exit; + kfree(pcmdpriv->cmd_allocated_buf); + return -ENOMEM; } pcmdpriv->rsp_buf = pcmdpriv->rsp_allocated_buf + 4 - ((SIZE_PTR)(pcmdpriv->rsp_allocated_buf) & 3); @@ -199,8 +195,8 @@ int rtw_init_cmd_priv(struct cmd_priv *pcmdpriv) pcmdpriv->rsp_cnt = 0; mutex_init(&pcmdpriv->sctx_mutex); -exit: - return res; + + return 0; } static void c2h_wk_callback(_workitem * work); From b79da0080d81063868d1cf4715d67ed5075dd9cc Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 19 Sep 2022 12:45:08 -0700 Subject: [PATCH 0370/1477] nvme: copy firmware_rev on each init [ Upstream commit a8eb6c1ba48bddea82e8d74cbe6e119f006be97d ] The firmware revision can change on after a reset so copy the most recent info each time instead of just the first time, otherwise the sysfs firmware_rev entry may contain stale data. Reported-by: Jeff Lien Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Reviewed-by: Chao Leng Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/host/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 265d9199b657..e9c13804760e 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2949,7 +2949,6 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) nvme_init_subnqn(subsys, ctrl, id); memcpy(subsys->serial, id->sn, sizeof(subsys->serial)); memcpy(subsys->model, id->mn, sizeof(subsys->model)); - memcpy(subsys->firmware_rev, id->fr, sizeof(subsys->firmware_rev)); subsys->vendor_id = le16_to_cpu(id->vid); subsys->cmic = id->cmic; subsys->awupf = le16_to_cpu(id->awupf); @@ -3110,6 +3109,8 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) ctrl->quirks |= core_quirks[i].quirks; } } + memcpy(ctrl->subsys->firmware_rev, id->fr, + sizeof(ctrl->subsys->firmware_rev)); if (force_apst && (ctrl->quirks & NVME_QUIRK_NO_DEEPEST_PS)) { dev_warn(ctrl->device, "forcibly allowing all power states due to nvme_core.force_apst -- use at your own risk\n"); From d5bb45f47b37d10f010355686b28c9ebacb361d4 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Wed, 21 Sep 2022 00:06:49 +0530 Subject: [PATCH 0371/1477] nvmet-tcp: add bounds check on Transfer Tag [ Upstream commit b6a545ffa2c192b1e6da4a7924edac5ba9f4ea2b ] ttag is used as an index to get cmd in nvmet_tcp_handle_h2c_data_pdu(), add a bounds check to avoid out-of-bounds access. Signed-off-by: Varun Prakash Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/target/tcp.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index e3e35b9bd684..2ddbd4f4f628 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -922,10 +922,17 @@ static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue) struct nvme_tcp_data_pdu *data = &queue->pdu.data; struct nvmet_tcp_cmd *cmd; - if (likely(queue->nr_cmds)) + if (likely(queue->nr_cmds)) { + if (unlikely(data->ttag >= queue->nr_cmds)) { + pr_err("queue %d: received out of bound ttag %u, nr_cmds %u\n", + queue->idx, data->ttag, queue->nr_cmds); + nvmet_tcp_fatal_error(queue); + return -EPROTO; + } cmd = &queue->cmds[data->ttag]; - else + } else { cmd = &queue->connect; + } if (le32_to_cpu(data->data_offset) != cmd->rbytes_done) { pr_err("ttag %u unexpected data offset %u (expected %u)\n", From b8bbae3236ab7dccc66c42bc3f7cdbcfc0786e54 Mon Sep 17 00:00:00 2001 From: Dongliang Mu Date: Thu, 22 Sep 2022 21:48:44 +0800 Subject: [PATCH 0372/1477] usb: idmouse: fix an uninit-value in idmouse_open [ Upstream commit bce2b0539933e485d22d6f6f076c0fcd6f185c4c ] In idmouse_create_image, if any ftip_command fails, it will go to the reset label. However, this leads to the data in bulk_in_buffer[HEADER..IMGSIZE] uninitialized. And the check for valid image incurs an uninitialized dereference. Fix this by moving the check before reset label since this check only be valid if the data after bulk_in_buffer[HEADER] has concrete data. Note that this is found by KMSAN, so only kernel compilation is tested. Reported-by: syzbot+79832d33eb89fb3cd092@syzkaller.appspotmail.com Signed-off-by: Dongliang Mu Link: https://lore.kernel.org/r/20220922134847.1101921-1-dzm91@hust.edu.cn Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/misc/idmouse.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/usb/misc/idmouse.c b/drivers/usb/misc/idmouse.c index e9437a176518..ea39243efee3 100644 --- a/drivers/usb/misc/idmouse.c +++ b/drivers/usb/misc/idmouse.c @@ -177,10 +177,6 @@ static int idmouse_create_image(struct usb_idmouse *dev) bytes_read += bulk_read; } - /* reset the device */ -reset: - ftip_command(dev, FTIP_RELEASE, 0, 0); - /* check for valid image */ /* right border should be black (0x00) */ for (bytes_read = sizeof(HEADER)-1 + WIDTH-1; bytes_read < IMGSIZE; bytes_read += WIDTH) @@ -192,6 +188,10 @@ reset: if (dev->bulk_in_buffer[bytes_read] != 0xFF) return -EAGAIN; + /* reset the device */ +reset: + ftip_command(dev, FTIP_RELEASE, 0, 0); + /* should be IMGSIZE == 65040 */ dev_dbg(&dev->interface->dev, "read %d bytes fingerprint data\n", bytes_read); From 9b4e849777a9a5828844259059981f8f8265687f Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 26 Sep 2022 10:45:09 +0200 Subject: [PATCH 0373/1477] clk: bcm2835: Make peripheral PLLC critical MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 6c5422851d8be8c7451e968fd2e6da41b6109e17 ] When testing for a series affecting the VEC, it was discovered that turning off and on the VEC clock is crashing the system. It turns out that, when disabling the VEC clock, it's the only child of the PLLC-per clock which will also get disabled. The source of the crash is PLLC-per being disabled. It's likely that some other device might not take a clock reference that it actually needs, but it's unclear which at this point. Let's make PLLC-per critical so that we don't have that crash. Reported-by: Noralf Trønnes Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20220926084509.12233-1-maxime@cerno.tech Reviewed-by: Stefan Wahren Acked-by: Noralf Trønnes Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/bcm/clk-bcm2835.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/bcm/clk-bcm2835.c b/drivers/clk/bcm/clk-bcm2835.c index f306b959297d..b7f89873fcf5 100644 --- a/drivers/clk/bcm/clk-bcm2835.c +++ b/drivers/clk/bcm/clk-bcm2835.c @@ -1786,7 +1786,7 @@ static const struct bcm2835_clk_desc clk_desc_array[] = { .load_mask = CM_PLLC_LOADPER, .hold_mask = CM_PLLC_HOLDPER, .fixed_divider = 1, - .flags = CLK_SET_RATE_PARENT), + .flags = CLK_IS_CRITICAL | CLK_SET_RATE_PARENT), /* * PLLD is the display PLL, used to drive DSI display panels. From b5dc2f25789dde48e518bfb9ded5cc12a0166fc0 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 12 Oct 2022 11:22:58 +0300 Subject: [PATCH 0374/1477] perf intel-pt: Fix segfault in intel_pt_print_info() with uClibc commit 5a3d47071f0ced0431ef82a5fb6bd077ed9493db upstream. uClibc segfaulted because NULL was passed as the format to fprintf(). That happened because one of the format strings was missing and intel_pt_print_info() didn't check that before calling fprintf(). Add the missing format string, and check format is not NULL before calling fprintf(). Fixes: 11fa7cb86b56d361 ("perf tools: Pass Intel PT information for decoding MTC and CYC") Signed-off-by: Adrian Hunter Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20221012082259.22394-2-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/intel-pt.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 5163d2ffea70..453773ce6f45 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -3279,6 +3279,7 @@ static const char * const intel_pt_info_fmts[] = { [INTEL_PT_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n", [INTEL_PT_PER_CPU_MMAPS] = " Per-cpu maps %"PRId64"\n", [INTEL_PT_MTC_BIT] = " MTC bit %#"PRIx64"\n", + [INTEL_PT_MTC_FREQ_BITS] = " MTC freq bits %#"PRIx64"\n", [INTEL_PT_TSC_CTC_N] = " TSC:CTC numerator %"PRIu64"\n", [INTEL_PT_TSC_CTC_D] = " TSC:CTC denominator %"PRIu64"\n", [INTEL_PT_CYC_BIT] = " CYC bit %#"PRIx64"\n", @@ -3293,8 +3294,12 @@ static void intel_pt_print_info(__u64 *arr, int start, int finish) if (!dump_trace) return; - for (i = start; i <= finish; i++) - fprintf(stdout, intel_pt_info_fmts[i], arr[i]); + for (i = start; i <= finish; i++) { + const char *fmt = intel_pt_info_fmts[i]; + + if (fmt) + fprintf(stdout, fmt, arr[i]); + } } static void intel_pt_print_info_str(const char *name, const char *str) From 904f881b57360cf85de962d84d8614d94431f60e Mon Sep 17 00:00:00 2001 From: Sergey Shtylyov Date: Fri, 16 Sep 2022 23:17:07 +0300 Subject: [PATCH 0375/1477] arm64: topology: fix possible overflow in amu_fie_setup() commit d4955c0ad77dbc684fc716387070ac24801b8bca upstream. cpufreq_get_hw_max_freq() returns max frequency in kHz as *unsigned int*, while freq_inv_set_max_ratio() gets passed this frequency in Hz as 'u64'. Multiplying max frequency by 1000 can potentially result in overflow -- multiplying by 1000ULL instead should avoid that... Found by Linux Verification Center (linuxtesting.org) with the SVACE static analysis tool. Fixes: cd0ed03a8903 ("arm64: use activity monitors for frequency invariance") Signed-off-by: Sergey Shtylyov Link: https://lore.kernel.org/r/01493d64-2bce-d968-86dc-11a122a9c07d@omp.ru Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 543c67cae02f..4358bc319306 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -158,7 +158,7 @@ static int validate_cpu_freq_invariance_counters(int cpu) } /* Convert maximum frequency from KHz to Hz and validate */ - max_freq_hz = cpufreq_get_hw_max_freq(cpu) * 1000; + max_freq_hz = cpufreq_get_hw_max_freq(cpu) * 1000ULL; if (unlikely(!max_freq_hz)) { pr_debug("CPU%d: invalid maximum frequency.\n", cpu); return -EINVAL; From 67cbc8865a66533fa08c1c13fe9acbaaae63c403 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 16 Oct 2022 23:31:25 +0100 Subject: [PATCH 0376/1477] io_uring: correct pinned_vm accounting [ upstream commit 42b6419d0aba47c5d8644cdc0b68502254671de5 ] ->mm_account should be released only after we free all registered buffers, otherwise __io_sqe_buffers_unregister() will see a NULL ->mm_account and skip locked_vm accounting. Cc: Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/6d798f65ed4ab8db3664c4d3397d4af16ca98846.1664849932.git.asml.silence@gmail.com Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- fs/io_uring.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 9654b60a06a5..b82a446d5e59 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8436,8 +8436,6 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx) if (ctx->sqo_task) { put_task_struct(ctx->sqo_task); ctx->sqo_task = NULL; - mmdrop(ctx->mm_account); - ctx->mm_account = NULL; } #ifdef CONFIG_BLK_CGROUP @@ -8456,6 +8454,11 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx) } #endif + if (ctx->mm_account) { + mmdrop(ctx->mm_account); + ctx->mm_account = NULL; + } + io_mem_free(ctx->rings); io_mem_free(ctx->sq_sqes); From c378c479c5175833bb22ff71974cda47d7b05401 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 16 Oct 2022 23:31:26 +0100 Subject: [PATCH 0377/1477] io_uring/af_unix: defer registered files gc to io_uring release [ upstream commit 0091bfc81741b8d3aeb3b7ab8636f911b2de6e80 ] Instead of putting io_uring's registered files in unix_gc() we want it to be done by io_uring itself. The trick here is to consider io_uring registered files for cycle detection but not actually putting them down. Because io_uring can't register other ring instances, this will remove all refs to the ring file triggering the ->release path and clean up with io_ring_ctx_free(). Cc: stable@vger.kernel.org Fixes: 6b06314c47e1 ("io_uring: add file set registration") Reported-and-tested-by: David Bouman Signed-off-by: Pavel Begunkov Signed-off-by: Thadeu Lima de Souza Cascardo [axboe: add kerneldoc comment to skb, fold in skb leak fix] Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- fs/io_uring.c | 1 + include/linux/skbuff.h | 2 ++ net/unix/garbage.c | 20 ++++++++++++++++++++ 3 files changed, 23 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index b82a446d5e59..05f360b66b07 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7301,6 +7301,7 @@ static int __io_sqe_files_scm(struct io_ring_ctx *ctx, int nr, int offset) } skb->sk = sk; + skb->scm_io_uring = 1; nr_files = 0; fpl->user = get_uid(ctx->user); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 61fc053a4a4e..462b0e3ef2b2 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -681,6 +681,7 @@ typedef unsigned char *sk_buff_data_t; * @csum_level: indicates the number of consecutive checksums found in * the packet minus one that have been verified as * CHECKSUM_UNNECESSARY (max 3) + * @scm_io_uring: SKB holds io_uring registered files * @dst_pending_confirm: need to confirm neighbour * @decrypted: Decrypted SKB * @napi_id: id of the NAPI struct this skb came from @@ -858,6 +859,7 @@ struct sk_buff { #ifdef CONFIG_TLS_DEVICE __u8 decrypted:1; #endif + __u8 scm_io_uring:1; #ifdef CONFIG_NET_SCHED __u16 tc_index; /* traffic control index */ diff --git a/net/unix/garbage.c b/net/unix/garbage.c index d45d5366115a..dc2763540393 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -204,6 +204,7 @@ void wait_for_unix_gc(void) /* The external entry point: unix_gc() */ void unix_gc(void) { + struct sk_buff *next_skb, *skb; struct unix_sock *u; struct unix_sock *next; struct sk_buff_head hitlist; @@ -297,11 +298,30 @@ void unix_gc(void) spin_unlock(&unix_gc_lock); + /* We need io_uring to clean its registered files, ignore all io_uring + * originated skbs. It's fine as io_uring doesn't keep references to + * other io_uring instances and so killing all other files in the cycle + * will put all io_uring references forcing it to go through normal + * release.path eventually putting registered files. + */ + skb_queue_walk_safe(&hitlist, skb, next_skb) { + if (skb->scm_io_uring) { + __skb_unlink(skb, &hitlist); + skb_queue_tail(&skb->sk->sk_receive_queue, skb); + } + } + /* Here we are. Hitlist is filled. Die. */ __skb_queue_purge(&hitlist); spin_lock(&unix_gc_lock); + /* There could be io_uring registered files, just push them back to + * the inflight list + */ + list_for_each_entry_safe(u, next, &gc_candidates, link) + list_move_tail(&u->link, &gc_inflight_list); + /* All candidates should have been detached by now. */ BUG_ON(!list_empty(&gc_candidates)); From 45c33966759ea1b4040c08dacda99ef623c0ca29 Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Fri, 23 Sep 2022 12:21:13 +0800 Subject: [PATCH 0378/1477] mm: hugetlb: fix UAF in hugetlb_handle_userfault commit 958f32ce832ba781ac20e11bb2d12a9352ea28fc upstream. The vma_lock and hugetlb_fault_mutex are dropped before handling userfault and reacquire them again after handle_userfault(), but reacquire the vma_lock could lead to UAF[1,2] due to the following race, hugetlb_fault hugetlb_no_page /*unlock vma_lock */ hugetlb_handle_userfault handle_userfault /* unlock mm->mmap_lock*/ vm_mmap_pgoff do_mmap mmap_region munmap_vma_range /* clean old vma */ /* lock vma_lock again <--- UAF */ /* unlock vma_lock */ Since the vma_lock will unlock immediately after hugetlb_handle_userfault(), let's drop the unneeded lock and unlock in hugetlb_handle_userfault() to fix the issue. [1] https://lore.kernel.org/linux-mm/000000000000d5e00a05e834962e@google.com/ [2] https://lore.kernel.org/linux-mm/20220921014457.1668-1-liuzixian4@huawei.com/ Link: https://lkml.kernel.org/r/20220923042113.137273-1-liushixin2@huawei.com Fixes: 1a1aad8a9b7b ("userfaultfd: hugetlbfs: add userfaultfd hugetlb hook") Signed-off-by: Liu Shixin Signed-off-by: Kefeng Wang Reported-by: syzbot+193f9cee8638750b23cf@syzkaller.appspotmail.com Reported-by: Liu Zixian Reviewed-by: Mike Kravetz Cc: David Hildenbrand Cc: John Hubbard Cc: Muchun Song Cc: Sidhartha Kumar Cc: [4.14+] Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/hugetlb.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index c42c76447e10..c57c165bfbbc 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4337,6 +4337,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, spinlock_t *ptl; unsigned long haddr = address & huge_page_mask(h); bool new_page = false; + u32 hash = hugetlb_fault_mutex_hash(mapping, idx); /* * Currently, we are forced to kill the process in the event the @@ -4346,7 +4347,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, if (is_vma_resv_set(vma, HPAGE_RESV_UNMAPPED)) { pr_warn_ratelimited("PID %d killed due to inadequate hugepage pool\n", current->pid); - return ret; + goto out; } /* @@ -4365,7 +4366,6 @@ retry: * Check for page in userfault range */ if (userfaultfd_missing(vma)) { - u32 hash; struct vm_fault vmf = { .vma = vma, .address = haddr, @@ -4380,17 +4380,14 @@ retry: }; /* - * hugetlb_fault_mutex and i_mmap_rwsem must be - * dropped before handling userfault. Reacquire - * after handling fault to make calling code simpler. + * vma_lock and hugetlb_fault_mutex must be dropped + * before handling userfault. Also mmap_lock will + * be dropped during handling userfault, any vma + * operation should be careful from here. */ - hash = hugetlb_fault_mutex_hash(mapping, idx); mutex_unlock(&hugetlb_fault_mutex_table[hash]); i_mmap_unlock_read(mapping); - ret = handle_userfault(&vmf, VM_UFFD_MISSING); - i_mmap_lock_read(mapping); - mutex_lock(&hugetlb_fault_mutex_table[hash]); - goto out; + return handle_userfault(&vmf, VM_UFFD_MISSING); } page = alloc_huge_page(vma, haddr, 0); @@ -4497,6 +4494,8 @@ retry: unlock_page(page); out: + mutex_unlock(&hugetlb_fault_mutex_table[hash]); + i_mmap_unlock_read(mapping); return ret; backout: @@ -4592,10 +4591,12 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, mutex_lock(&hugetlb_fault_mutex_table[hash]); entry = huge_ptep_get(ptep); - if (huge_pte_none(entry)) { - ret = hugetlb_no_page(mm, vma, mapping, idx, address, ptep, flags); - goto out_mutex; - } + if (huge_pte_none(entry)) + /* + * hugetlb_no_page will drop vma lock and hugetlb fault + * mutex internally, which make us return immediately. + */ + return hugetlb_no_page(mm, vma, mapping, idx, address, ptep, flags); ret = 0; From dc54ff9fc4a4886f4cd409054c6505ca352a146a Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 5 Oct 2022 22:02:37 -0400 Subject: [PATCH 0379/1477] net: ieee802154: return -EINVAL for unknown addr type commit 30393181fdbc1608cc683b4ee99dcce05ffcc8c7 upstream. This patch adds handling to return -EINVAL for an unknown addr type. The current behaviour is to return 0 as successful but the size of an unknown addr type is not defined and should return an error like -EINVAL. Fixes: 94160108a70c ("net/ieee802154: fix uninit value bug in dgram_sendmsg") Signed-off-by: Alexander Aring Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/ieee802154_netdev.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h index a8994f307fc3..03b64bf876a4 100644 --- a/include/net/ieee802154_netdev.h +++ b/include/net/ieee802154_netdev.h @@ -185,21 +185,27 @@ static inline int ieee802154_sockaddr_check_size(struct sockaddr_ieee802154 *daddr, int len) { struct ieee802154_addr_sa *sa; + int ret = 0; sa = &daddr->addr; if (len < IEEE802154_MIN_NAMELEN) return -EINVAL; switch (sa->addr_type) { + case IEEE802154_ADDR_NONE: + break; case IEEE802154_ADDR_SHORT: if (len < IEEE802154_NAMELEN_SHORT) - return -EINVAL; + ret = -EINVAL; break; case IEEE802154_ADDR_LONG: if (len < IEEE802154_NAMELEN_LONG) - return -EINVAL; + ret = -EINVAL; + break; + default: + ret = -EINVAL; break; } - return 0; + return ret; } static inline void ieee802154_addr_from_sa(struct ieee802154_addr *a, From a96336a5f28b91eba4c5575e0434cbf02d8597fd Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Tue, 4 Oct 2022 21:47:49 -0400 Subject: [PATCH 0380/1477] Revert "net/ieee802154: reject zero-sized raw_sendmsg()" [ Upstream commit 2eb2756f6c9e9621e022d78321ce40a62c4520b5 ] This reverts commit 3a4d061c699bd3eedc80dc97a4b2a2e1af83c6f5. There is a v2 which does return zero if zero length is given. Signed-off-by: Alexander Aring Link: https://lore.kernel.org/r/20221005014750.3685555-1-aahringo@redhat.com Signed-off-by: Stefan Schmidt Signed-off-by: Sasha Levin --- net/ieee802154/socket.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index d4c162d63634..7edec210780a 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -252,9 +252,6 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) return -EOPNOTSUPP; } - if (!size) - return -EINVAL; - lock_sock(sk); if (!sk->sk_bound_dev_if) dev = dev_getfirstbyhwtype(sock_net(sk), ARPHRD_IEEE802154); From 791489a5c56396ddfed75fc525066d4738dace46 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Tue, 4 Oct 2022 21:47:50 -0400 Subject: [PATCH 0381/1477] net/ieee802154: don't warn zero-sized raw_sendmsg() [ Upstream commit b12e924a2f5b960373459c8f8a514f887adf5cac ] syzbot is hitting skb_assert_len() warning at __dev_queue_xmit() [1], for PF_IEEE802154 socket's zero-sized raw_sendmsg() request is hitting __dev_queue_xmit() with skb->len == 0. Since PF_IEEE802154 socket's zero-sized raw_sendmsg() request was able to return 0, don't call __dev_queue_xmit() if packet length is 0. ---------- #include #include int main(int argc, char *argv[]) { struct sockaddr_in addr = { .sin_family = AF_INET, .sin_addr.s_addr = htonl(INADDR_LOOPBACK) }; struct iovec iov = { }; struct msghdr hdr = { .msg_name = &addr, .msg_namelen = sizeof(addr), .msg_iov = &iov, .msg_iovlen = 1 }; sendmsg(socket(PF_IEEE802154, SOCK_RAW, 0), &hdr, 0); return 0; } ---------- Note that this might be a sign that commit fd1894224407c484 ("bpf: Don't redirect packets with invalid pkt_len") should be reverted, for skb->len == 0 was acceptable for at least PF_IEEE802154 socket. Link: https://syzkaller.appspot.com/bug?extid=5ea725c25d06fb9114c4 [1] Reported-by: syzbot Fixes: fd1894224407c484 ("bpf: Don't redirect packets with invalid pkt_len") Signed-off-by: Tetsuo Handa Signed-off-by: Alexander Aring Link: https://lore.kernel.org/r/20221005014750.3685555-2-aahringo@redhat.com Signed-off-by: Stefan Schmidt Signed-off-by: Sasha Levin --- net/ieee802154/socket.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index 7edec210780a..ecc0d5fbde04 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -273,6 +273,10 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) err = -EMSGSIZE; goto out_dev; } + if (!size) { + err = 0; + goto out_dev; + } hlen = LL_RESERVED_SPACE(dev); tlen = dev->needed_tailroom; From 98ab15bfdcda748aa1ecf2d91dede893934bca60 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Mon, 17 Oct 2022 19:07:45 -0600 Subject: [PATCH 0382/1477] Revert "drm/amdgpu: move nbio sdma_doorbell_range() into sdma code for vega" This reverts commit 9f55f36f749a7608eeef57d7d72991a9bd557341 which is commit e3163bc8ffdfdb405e10530b140135b2ee487f89 upstream. This commit causes repeated WARN_ONs from drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amd gpu_dm.c:7391 amdgpu_dm_atomic_commit_tail+0x23b9/0x2430 [amdgpu] dmesg fills up with the following messages and drm initialization takes a very long time. Cc: # 5.10 Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 5 ----- drivers/gpu/drm/amd/amdgpu/soc15.c | 25 +++++++++++++++++++++++++ 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index a1a8e026b9fa..1f2e2460e121 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -1475,11 +1475,6 @@ static int sdma_v4_0_start(struct amdgpu_device *adev) WREG32_SDMA(i, mmSDMA0_CNTL, temp); if (!amdgpu_sriov_vf(adev)) { - ring = &adev->sdma.instance[i].ring; - adev->nbio.funcs->sdma_doorbell_range(adev, i, - ring->use_doorbell, ring->doorbell_index, - adev->doorbell_index.sdma_doorbell_range); - /* unhalt engine */ temp = RREG32_SDMA(i, mmSDMA0_F32_CNTL); temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index abd649285a22..7212b9900e0a 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1332,6 +1332,25 @@ static int soc15_common_sw_fini(void *handle) return 0; } +static void soc15_doorbell_range_init(struct amdgpu_device *adev) +{ + int i; + struct amdgpu_ring *ring; + + /* sdma/ih doorbell range are programed by hypervisor */ + if (!amdgpu_sriov_vf(adev)) { + for (i = 0; i < adev->sdma.num_instances; i++) { + ring = &adev->sdma.instance[i].ring; + adev->nbio.funcs->sdma_doorbell_range(adev, i, + ring->use_doorbell, ring->doorbell_index, + adev->doorbell_index.sdma_doorbell_range); + } + + adev->nbio.funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell, + adev->irq.ih.doorbell_index); + } +} + static int soc15_common_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -1351,6 +1370,12 @@ static int soc15_common_hw_init(void *handle) /* enable the doorbell aperture */ soc15_enable_doorbell_aperture(adev, true); + /* HW doorbell routing policy: doorbell writing not + * in SDMA/IH/MM/ACV range will be routed to CP. So + * we need to init SDMA/IH/MM/ACV doorbell range prior + * to CP ip block init and ring test. + */ + soc15_doorbell_range_init(adev); return 0; } From 357db159e965efa6a0b7b6f9efa1c34bf876db2d Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Mon, 17 Oct 2022 19:07:46 -0600 Subject: [PATCH 0383/1477] Revert "drm/amdgpu: use dirty framebuffer helper" This reverts commit 867b2b2b6802fb3995a0065fc39e0e7e20d8004d which is commit 66f99628eb24409cb8feb5061f78283c8b65f820 upstream. With this commit, dmesg fills up with the following messages and drm initialization takes a very long time. This commit has bee reverted from 5.4 [drm] Fence fallback timer expired on ring sdma0 [drm] Fence fallback timer expired on ring gfx [drm] Fence fallback timer expired on ring sdma0 [drm] Fence fallback timer expired on ring gfx [drm] Fence fallback timer expired on ring sdma0 [drm] Fence fallback timer expired on ring sdma0 [drm] Fence fallback timer expired on ring sdma0 [drm] Fence fallback timer expired on ring gfx Cc: # 5.10 Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 947f50e402ba..7cc7af2a6822 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -35,7 +35,6 @@ #include #include #include -#include #include #include #include @@ -499,7 +498,6 @@ bool amdgpu_display_ddc_probe(struct amdgpu_connector *amdgpu_connector, static const struct drm_framebuffer_funcs amdgpu_fb_funcs = { .destroy = drm_gem_fb_destroy, .create_handle = drm_gem_fb_create_handle, - .dirty = drm_atomic_helper_dirtyfb, }; uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev, From 67de22cb0b6c6a6a71a472dbe8f0b3f3eb60565d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jerry=20Lee=20=E6=9D=8E=E4=BF=AE=E8=B3=A2?= Date: Mon, 18 Jul 2022 10:25:19 +0000 Subject: [PATCH 0384/1477] ext4: continue to expand file system when the target size doesn't reach MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit df3cb754d13d2cd5490db9b8d536311f8413a92e upstream. When expanding a file system from (16TiB-2MiB) to 18TiB, the operation exits early which leads to result inconsistency between resize2fs and Ext4 kernel driver. === before === ○ → resize2fs /dev/mapper/thin resize2fs 1.45.5 (07-Jan-2020) Filesystem at /dev/mapper/thin is mounted on /mnt/test; on-line resizing required old_desc_blocks = 2048, new_desc_blocks = 2304 The filesystem on /dev/mapper/thin is now 4831837696 (4k) blocks long. [ 865.186308] EXT4-fs (dm-5): mounted filesystem with ordered data mode. Opts: (null). Quota mode: none. [ 912.091502] dm-4: detected capacity change from 34359738368 to 38654705664 [ 970.030550] dm-5: detected capacity change from 34359734272 to 38654701568 [ 1000.012751] EXT4-fs (dm-5): resizing filesystem from 4294966784 to 4831837696 blocks [ 1000.012878] EXT4-fs (dm-5): resized filesystem to 4294967296 === after === [ 129.104898] EXT4-fs (dm-5): mounted filesystem with ordered data mode. Opts: (null). Quota mode: none. [ 143.773630] dm-4: detected capacity change from 34359738368 to 38654705664 [ 198.203246] dm-5: detected capacity change from 34359734272 to 38654701568 [ 207.918603] EXT4-fs (dm-5): resizing filesystem from 4294966784 to 4831837696 blocks [ 207.918754] EXT4-fs (dm-5): resizing filesystem from 4294967296 to 4831837696 blocks [ 207.918758] EXT4-fs (dm-5): Converting file system to meta_bg [ 207.918790] EXT4-fs (dm-5): resizing filesystem from 4294967296 to 4831837696 blocks [ 221.454050] EXT4-fs (dm-5): resized to 4658298880 blocks [ 227.634613] EXT4-fs (dm-5): resized filesystem to 4831837696 Signed-off-by: Jerry Lee Link: https://lore.kernel.org/r/PU1PR04MB22635E739BD21150DC182AC6A18C9@PU1PR04MB2263.apcprd04.prod.outlook.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/resize.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index f6409ddfd117..c55ba0390021 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -2068,7 +2068,7 @@ retry: goto out; } - if (ext4_blocks_count(es) == n_blocks_count) + if (ext4_blocks_count(es) == n_blocks_count && n_blocks_count_retry == 0) goto out; err = ext4_alloc_flex_bg_array(sb, n_group + 1); From f039b43cbaea5e0700980c2f0052da05a70782e0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 20 Dec 2021 06:33:30 -0800 Subject: [PATCH 0385/1477] inet: fully convert sk->sk_rx_dst to RCU rules commit 8f905c0e7354ef261360fb7535ea079b1082c105 upstream. syzbot reported various issues around early demux, one being included in this changelog [1] sk->sk_rx_dst is using RCU protection without clearly documenting it. And following sequences in tcp_v4_do_rcv()/tcp_v6_do_rcv() are not following standard RCU rules. [a] dst_release(dst); [b] sk->sk_rx_dst = NULL; They look wrong because a delete operation of RCU protected pointer is supposed to clear the pointer before the call_rcu()/synchronize_rcu() guarding actual memory freeing. In some cases indeed, dst could be freed before [b] is done. We could cheat by clearing sk_rx_dst before calling dst_release(), but this seems the right time to stick to standard RCU annotations and debugging facilities. [1] BUG: KASAN: use-after-free in dst_check include/net/dst.h:470 [inline] BUG: KASAN: use-after-free in tcp_v4_early_demux+0x95b/0x960 net/ipv4/tcp_ipv4.c:1792 Read of size 2 at addr ffff88807f1cb73a by task syz-executor.5/9204 CPU: 0 PID: 9204 Comm: syz-executor.5 Not tainted 5.16.0-rc5-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 print_address_description.constprop.0.cold+0x8d/0x320 mm/kasan/report.c:247 __kasan_report mm/kasan/report.c:433 [inline] kasan_report.cold+0x83/0xdf mm/kasan/report.c:450 dst_check include/net/dst.h:470 [inline] tcp_v4_early_demux+0x95b/0x960 net/ipv4/tcp_ipv4.c:1792 ip_rcv_finish_core.constprop.0+0x15de/0x1e80 net/ipv4/ip_input.c:340 ip_list_rcv_finish.constprop.0+0x1b2/0x6e0 net/ipv4/ip_input.c:583 ip_sublist_rcv net/ipv4/ip_input.c:609 [inline] ip_list_rcv+0x34e/0x490 net/ipv4/ip_input.c:644 __netif_receive_skb_list_ptype net/core/dev.c:5508 [inline] __netif_receive_skb_list_core+0x549/0x8e0 net/core/dev.c:5556 __netif_receive_skb_list net/core/dev.c:5608 [inline] netif_receive_skb_list_internal+0x75e/0xd80 net/core/dev.c:5699 gro_normal_list net/core/dev.c:5853 [inline] gro_normal_list net/core/dev.c:5849 [inline] napi_complete_done+0x1f1/0x880 net/core/dev.c:6590 virtqueue_napi_complete drivers/net/virtio_net.c:339 [inline] virtnet_poll+0xca2/0x11b0 drivers/net/virtio_net.c:1557 __napi_poll+0xaf/0x440 net/core/dev.c:7023 napi_poll net/core/dev.c:7090 [inline] net_rx_action+0x801/0xb40 net/core/dev.c:7177 __do_softirq+0x29b/0x9c2 kernel/softirq.c:558 invoke_softirq kernel/softirq.c:432 [inline] __irq_exit_rcu+0x123/0x180 kernel/softirq.c:637 irq_exit_rcu+0x5/0x20 kernel/softirq.c:649 common_interrupt+0x52/0xc0 arch/x86/kernel/irq.c:240 asm_common_interrupt+0x1e/0x40 arch/x86/include/asm/idtentry.h:629 RIP: 0033:0x7f5e972bfd57 Code: 39 d1 73 14 0f 1f 80 00 00 00 00 48 8b 50 f8 48 83 e8 08 48 39 ca 77 f3 48 39 c3 73 3e 48 89 13 48 8b 50 f8 48 89 38 49 8b 0e <48> 8b 3e 48 83 c3 08 48 83 c6 08 eb bc 48 39 d1 72 9e 48 39 d0 73 RSP: 002b:00007fff8a413210 EFLAGS: 00000283 RAX: 00007f5e97108990 RBX: 00007f5e97108338 RCX: ffffffff81d3aa45 RDX: ffffffff81d3aa45 RSI: 00007f5e97108340 RDI: ffffffff81d3aa45 RBP: 00007f5e97107eb8 R08: 00007f5e97108d88 R09: 0000000093c2e8d9 R10: 0000000000000000 R11: 0000000000000000 R12: 00007f5e97107eb0 R13: 00007f5e97108338 R14: 00007f5e97107ea8 R15: 0000000000000019 Allocated by task 13: kasan_save_stack+0x1e/0x50 mm/kasan/common.c:38 kasan_set_track mm/kasan/common.c:46 [inline] set_alloc_info mm/kasan/common.c:434 [inline] __kasan_slab_alloc+0x90/0xc0 mm/kasan/common.c:467 kasan_slab_alloc include/linux/kasan.h:259 [inline] slab_post_alloc_hook mm/slab.h:519 [inline] slab_alloc_node mm/slub.c:3234 [inline] slab_alloc mm/slub.c:3242 [inline] kmem_cache_alloc+0x202/0x3a0 mm/slub.c:3247 dst_alloc+0x146/0x1f0 net/core/dst.c:92 rt_dst_alloc+0x73/0x430 net/ipv4/route.c:1613 ip_route_input_slow+0x1817/0x3a20 net/ipv4/route.c:2340 ip_route_input_rcu net/ipv4/route.c:2470 [inline] ip_route_input_noref+0x116/0x2a0 net/ipv4/route.c:2415 ip_rcv_finish_core.constprop.0+0x288/0x1e80 net/ipv4/ip_input.c:354 ip_list_rcv_finish.constprop.0+0x1b2/0x6e0 net/ipv4/ip_input.c:583 ip_sublist_rcv net/ipv4/ip_input.c:609 [inline] ip_list_rcv+0x34e/0x490 net/ipv4/ip_input.c:644 __netif_receive_skb_list_ptype net/core/dev.c:5508 [inline] __netif_receive_skb_list_core+0x549/0x8e0 net/core/dev.c:5556 __netif_receive_skb_list net/core/dev.c:5608 [inline] netif_receive_skb_list_internal+0x75e/0xd80 net/core/dev.c:5699 gro_normal_list net/core/dev.c:5853 [inline] gro_normal_list net/core/dev.c:5849 [inline] napi_complete_done+0x1f1/0x880 net/core/dev.c:6590 virtqueue_napi_complete drivers/net/virtio_net.c:339 [inline] virtnet_poll+0xca2/0x11b0 drivers/net/virtio_net.c:1557 __napi_poll+0xaf/0x440 net/core/dev.c:7023 napi_poll net/core/dev.c:7090 [inline] net_rx_action+0x801/0xb40 net/core/dev.c:7177 __do_softirq+0x29b/0x9c2 kernel/softirq.c:558 Freed by task 13: kasan_save_stack+0x1e/0x50 mm/kasan/common.c:38 kasan_set_track+0x21/0x30 mm/kasan/common.c:46 kasan_set_free_info+0x20/0x30 mm/kasan/generic.c:370 ____kasan_slab_free mm/kasan/common.c:366 [inline] ____kasan_slab_free mm/kasan/common.c:328 [inline] __kasan_slab_free+0xff/0x130 mm/kasan/common.c:374 kasan_slab_free include/linux/kasan.h:235 [inline] slab_free_hook mm/slub.c:1723 [inline] slab_free_freelist_hook+0x8b/0x1c0 mm/slub.c:1749 slab_free mm/slub.c:3513 [inline] kmem_cache_free+0xbd/0x5d0 mm/slub.c:3530 dst_destroy+0x2d6/0x3f0 net/core/dst.c:127 rcu_do_batch kernel/rcu/tree.c:2506 [inline] rcu_core+0x7ab/0x1470 kernel/rcu/tree.c:2741 __do_softirq+0x29b/0x9c2 kernel/softirq.c:558 Last potentially related work creation: kasan_save_stack+0x1e/0x50 mm/kasan/common.c:38 __kasan_record_aux_stack+0xf5/0x120 mm/kasan/generic.c:348 __call_rcu kernel/rcu/tree.c:2985 [inline] call_rcu+0xb1/0x740 kernel/rcu/tree.c:3065 dst_release net/core/dst.c:177 [inline] dst_release+0x79/0xe0 net/core/dst.c:167 tcp_v4_do_rcv+0x612/0x8d0 net/ipv4/tcp_ipv4.c:1712 sk_backlog_rcv include/net/sock.h:1030 [inline] __release_sock+0x134/0x3b0 net/core/sock.c:2768 release_sock+0x54/0x1b0 net/core/sock.c:3300 tcp_sendmsg+0x36/0x40 net/ipv4/tcp.c:1441 inet_sendmsg+0x99/0xe0 net/ipv4/af_inet.c:819 sock_sendmsg_nosec net/socket.c:704 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:724 sock_write_iter+0x289/0x3c0 net/socket.c:1057 call_write_iter include/linux/fs.h:2162 [inline] new_sync_write+0x429/0x660 fs/read_write.c:503 vfs_write+0x7cd/0xae0 fs/read_write.c:590 ksys_write+0x1ee/0x250 fs/read_write.c:643 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae The buggy address belongs to the object at ffff88807f1cb700 which belongs to the cache ip_dst_cache of size 176 The buggy address is located 58 bytes inside of 176-byte region [ffff88807f1cb700, ffff88807f1cb7b0) The buggy address belongs to the page: page:ffffea0001fc72c0 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x7f1cb flags: 0xfff00000000200(slab|node=0|zone=1|lastcpupid=0x7ff) raw: 00fff00000000200 dead000000000100 dead000000000122 ffff8881413bb780 raw: 0000000000000000 0000000000100010 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected page_owner tracks the page as allocated page last allocated via order 0, migratetype Unmovable, gfp_mask 0x112a20(GFP_ATOMIC|__GFP_NOWARN|__GFP_NORETRY|__GFP_HARDWALL), pid 5, ts 108466983062, free_ts 108048976062 prep_new_page mm/page_alloc.c:2418 [inline] get_page_from_freelist+0xa72/0x2f50 mm/page_alloc.c:4149 __alloc_pages+0x1b2/0x500 mm/page_alloc.c:5369 alloc_pages+0x1a7/0x300 mm/mempolicy.c:2191 alloc_slab_page mm/slub.c:1793 [inline] allocate_slab mm/slub.c:1930 [inline] new_slab+0x32d/0x4a0 mm/slub.c:1993 ___slab_alloc+0x918/0xfe0 mm/slub.c:3022 __slab_alloc.constprop.0+0x4d/0xa0 mm/slub.c:3109 slab_alloc_node mm/slub.c:3200 [inline] slab_alloc mm/slub.c:3242 [inline] kmem_cache_alloc+0x35c/0x3a0 mm/slub.c:3247 dst_alloc+0x146/0x1f0 net/core/dst.c:92 rt_dst_alloc+0x73/0x430 net/ipv4/route.c:1613 __mkroute_output net/ipv4/route.c:2564 [inline] ip_route_output_key_hash_rcu+0x921/0x2d00 net/ipv4/route.c:2791 ip_route_output_key_hash+0x18b/0x300 net/ipv4/route.c:2619 __ip_route_output_key include/net/route.h:126 [inline] ip_route_output_flow+0x23/0x150 net/ipv4/route.c:2850 ip_route_output_key include/net/route.h:142 [inline] geneve_get_v4_rt+0x3a6/0x830 drivers/net/geneve.c:809 geneve_xmit_skb drivers/net/geneve.c:899 [inline] geneve_xmit+0xc4a/0x3540 drivers/net/geneve.c:1082 __netdev_start_xmit include/linux/netdevice.h:4994 [inline] netdev_start_xmit include/linux/netdevice.h:5008 [inline] xmit_one net/core/dev.c:3590 [inline] dev_hard_start_xmit+0x1eb/0x920 net/core/dev.c:3606 __dev_queue_xmit+0x299a/0x3650 net/core/dev.c:4229 page last free stack trace: reset_page_owner include/linux/page_owner.h:24 [inline] free_pages_prepare mm/page_alloc.c:1338 [inline] free_pcp_prepare+0x374/0x870 mm/page_alloc.c:1389 free_unref_page_prepare mm/page_alloc.c:3309 [inline] free_unref_page+0x19/0x690 mm/page_alloc.c:3388 qlink_free mm/kasan/quarantine.c:146 [inline] qlist_free_all+0x5a/0xc0 mm/kasan/quarantine.c:165 kasan_quarantine_reduce+0x180/0x200 mm/kasan/quarantine.c:272 __kasan_slab_alloc+0xa2/0xc0 mm/kasan/common.c:444 kasan_slab_alloc include/linux/kasan.h:259 [inline] slab_post_alloc_hook mm/slab.h:519 [inline] slab_alloc_node mm/slub.c:3234 [inline] kmem_cache_alloc_node+0x255/0x3f0 mm/slub.c:3270 __alloc_skb+0x215/0x340 net/core/skbuff.c:414 alloc_skb include/linux/skbuff.h:1126 [inline] alloc_skb_with_frags+0x93/0x620 net/core/skbuff.c:6078 sock_alloc_send_pskb+0x783/0x910 net/core/sock.c:2575 mld_newpack+0x1df/0x770 net/ipv6/mcast.c:1754 add_grhead+0x265/0x330 net/ipv6/mcast.c:1857 add_grec+0x1053/0x14e0 net/ipv6/mcast.c:1995 mld_send_initial_cr.part.0+0xf6/0x230 net/ipv6/mcast.c:2242 mld_send_initial_cr net/ipv6/mcast.c:1232 [inline] mld_dad_work+0x1d3/0x690 net/ipv6/mcast.c:2268 process_one_work+0x9b2/0x1690 kernel/workqueue.c:2298 worker_thread+0x658/0x11f0 kernel/workqueue.c:2445 Memory state around the buggy address: ffff88807f1cb600: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88807f1cb680: fb fb fb fb fb fb fc fc fc fc fc fc fc fc fc fc >ffff88807f1cb700: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88807f1cb780: fb fb fb fb fb fb fc fc fc fc fc fc fc fc fc fc ffff88807f1cb800: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb Fixes: 41063e9dd119 ("ipv4: Early TCP socket demux.") Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20211220143330.680945-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski [cmllamas: fixed trivial merge conflict] Signed-off-by: Carlos Llamas Signed-off-by: Greg Kroah-Hartman --- include/net/sock.h | 2 +- net/ipv4/af_inet.c | 2 +- net/ipv4/tcp.c | 3 +-- net/ipv4/tcp_input.c | 2 +- net/ipv4/tcp_ipv4.c | 11 +++++++---- net/ipv4/udp.c | 6 +++--- net/ipv6/tcp_ipv6.c | 11 +++++++---- net/ipv6/udp.c | 4 ++-- 8 files changed, 23 insertions(+), 18 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index d53fb6437476..90a8b8b26a20 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -421,7 +421,7 @@ struct sock { #ifdef CONFIG_XFRM struct xfrm_policy __rcu *sk_policy[2]; #endif - struct dst_entry *sk_rx_dst; + struct dst_entry __rcu *sk_rx_dst; struct dst_entry __rcu *sk_dst_cache; atomic_t sk_omem_alloc; int sk_sndbuf; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index a733ce1a3f8f..87d73a3e92ba 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -158,7 +158,7 @@ void inet_sock_destruct(struct sock *sk) kfree(rcu_dereference_protected(inet->inet_opt, 1)); dst_release(rcu_dereference_protected(sk->sk_dst_cache, 1)); - dst_release(sk->sk_rx_dst); + dst_release(rcu_dereference_protected(sk->sk_rx_dst, 1)); sk_refcnt_debug_dec(sk); } EXPORT_SYMBOL(inet_sock_destruct); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index b0aa7cc69d51..a7127364253c 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2816,8 +2816,7 @@ int tcp_disconnect(struct sock *sk, int flags) icsk->icsk_ack.rcv_mss = TCP_MIN_MSS; memset(&tp->rx_opt, 0, sizeof(tp->rx_opt)); __sk_dst_reset(sk); - dst_release(sk->sk_rx_dst); - sk->sk_rx_dst = NULL; + dst_release(xchg((__force struct dst_entry **)&sk->sk_rx_dst, NULL)); tcp_saved_syn_free(tp); tp->compressed_ack = 0; tp->segs_in = 0; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 4ecd85b1e806..377cba9b124d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5777,7 +5777,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb) trace_tcp_probe(sk, skb); tcp_mstamp_refresh(tp); - if (unlikely(!sk->sk_rx_dst)) + if (unlikely(!rcu_access_pointer(sk->sk_rx_dst))) inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb); /* * Header prediction. diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 0d165ce2d80a..5c1e6b0687e2 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1670,15 +1670,18 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) struct sock *rsk; if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ - struct dst_entry *dst = sk->sk_rx_dst; + struct dst_entry *dst; + + dst = rcu_dereference_protected(sk->sk_rx_dst, + lockdep_sock_is_held(sk)); sock_rps_save_rxhash(sk, skb); sk_mark_napi_id(sk, skb); if (dst) { if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif || !dst->ops->check(dst, 0)) { + RCU_INIT_POINTER(sk->sk_rx_dst, NULL); dst_release(dst); - sk->sk_rx_dst = NULL; } } tcp_rcv_established(sk, skb); @@ -1753,7 +1756,7 @@ int tcp_v4_early_demux(struct sk_buff *skb) skb->sk = sk; skb->destructor = sock_edemux; if (sk_fullsock(sk)) { - struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst); + struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst); if (dst) dst = dst_check(dst, 0); @@ -2162,7 +2165,7 @@ void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) struct dst_entry *dst = skb_dst(skb); if (dst && dst_hold_safe(dst)) { - sk->sk_rx_dst = dst; + rcu_assign_pointer(sk->sk_rx_dst, dst); inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; } } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index e498c7666ec6..4446aa8237ff 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2193,7 +2193,7 @@ bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst) struct dst_entry *old; if (dst_hold_safe(dst)) { - old = xchg(&sk->sk_rx_dst, dst); + old = xchg((__force struct dst_entry **)&sk->sk_rx_dst, dst); dst_release(old); return old != dst; } @@ -2383,7 +2383,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, struct dst_entry *dst = skb_dst(skb); int ret; - if (unlikely(sk->sk_rx_dst != dst)) + if (unlikely(rcu_dereference(sk->sk_rx_dst) != dst)) udp_sk_rx_dst_set(sk, dst); ret = udp_unicast_rcv_skb(sk, skb, uh); @@ -2541,7 +2541,7 @@ int udp_v4_early_demux(struct sk_buff *skb) skb->sk = sk; skb->destructor = sock_efree; - dst = READ_ONCE(sk->sk_rx_dst); + dst = rcu_dereference(sk->sk_rx_dst); if (dst) dst = dst_check(dst, 0); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 8d91f36cb11b..c14eaec64a0b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -107,7 +107,7 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) if (dst && dst_hold_safe(dst)) { const struct rt6_info *rt = (const struct rt6_info *)dst; - sk->sk_rx_dst = dst; + rcu_assign_pointer(sk->sk_rx_dst, dst); inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt); } @@ -1482,15 +1482,18 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC)); if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ - struct dst_entry *dst = sk->sk_rx_dst; + struct dst_entry *dst; + + dst = rcu_dereference_protected(sk->sk_rx_dst, + lockdep_sock_is_held(sk)); sock_rps_save_rxhash(sk, skb); sk_mark_napi_id(sk, skb); if (dst) { if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif || dst->ops->check(dst, np->rx_dst_cookie) == NULL) { + RCU_INIT_POINTER(sk->sk_rx_dst, NULL); dst_release(dst); - sk->sk_rx_dst = NULL; } } @@ -1842,7 +1845,7 @@ INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb) skb->sk = sk; skb->destructor = sock_edemux; if (sk_fullsock(sk)) { - struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst); + struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst); if (dst) dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 4e90e5a52945..9b504bf49214 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -941,7 +941,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, struct dst_entry *dst = skb_dst(skb); int ret; - if (unlikely(sk->sk_rx_dst != dst)) + if (unlikely(rcu_dereference(sk->sk_rx_dst) != dst)) udp6_sk_rx_dst_set(sk, dst); if (!uh->check && !udp_sk(sk)->no_check6_rx) { @@ -1055,7 +1055,7 @@ INDIRECT_CALLABLE_SCOPE void udp_v6_early_demux(struct sk_buff *skb) skb->sk = sk; skb->destructor = sock_efree; - dst = READ_ONCE(sk->sk_rx_dst); + dst = rcu_dereference(sk->sk_rx_dst); if (dst) dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie); From 7b19858803d7f2f61449a62116721e8230e6d2fd Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 13 Oct 2022 14:50:28 +0200 Subject: [PATCH 0386/1477] thermal: intel_powerclamp: Use first online CPU as control_cpu commit 4bb7f6c2781e46fc5bd00475a66df2ea30ef330d upstream. Commit 68b99e94a4a2 ("thermal: intel_powerclamp: Use get_cpu() instead of smp_processor_id() to avoid crash") fixed an issue related to using smp_processor_id() in preemptible context by replacing it with a pair of get_cpu()/put_cpu(), but what is needed there really is any online CPU and not necessarily the one currently running the code. Arguably, getting the one that's running the code in there is confusing. For this reason, simply give the control CPU role to the first online one which automatically will be CPU0 if it is online, so one check can be dropped from the code for an added benefit. Link: https://lore.kernel.org/linux-pm/20221011113646.GA12080@duo.ucw.cz/ Fixes: 68b99e94a4a2 ("thermal: intel_powerclamp: Use get_cpu() instead of smp_processor_id() to avoid crash") Signed-off-by: Rafael J. Wysocki Reviewed-by: Chen Yu Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/intel/intel_powerclamp.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c index 14381f7587ff..fb04470d7d4b 100644 --- a/drivers/thermal/intel/intel_powerclamp.c +++ b/drivers/thermal/intel/intel_powerclamp.c @@ -531,11 +531,7 @@ static int start_power_clamp(void) get_online_cpus(); /* prefer BSP */ - control_cpu = 0; - if (!cpu_online(control_cpu)) { - control_cpu = get_cpu(); - put_cpu(); - } + control_cpu = cpumask_first(cpu_online_mask); clamping = true; schedule_delayed_work(&poll_pkg_cstate_work, 0); From cbf2c43b36e0c439fe4c47dd1cec78b18d54e980 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 29 Sep 2021 03:19:14 +0800 Subject: [PATCH 0387/1477] f2fs: fix wrong condition to trigger background checkpoint correctly commit cd6d697a6e2013a0a85f8b261b16c8cfd50c1f5f upstream. In f2fs_balance_fs_bg(), it needs to check both NAT_ENTRIES and INO_ENTRIES memory usage to decide whether we should skip background checkpoint, otherwise we may always skip checking INO_ENTRIES memory usage, so that INO_ENTRIES may potentially cause high memory footprint. Fixes: 493720a48543 ("f2fs: fix to avoid REQ_TIME and CP_TIME collision") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 3123fd49c8ce..68774d6198a5 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -550,7 +550,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg) goto do_sync; /* checkpoint is the only way to shrink partial cached entries */ - if (f2fs_available_free_memory(sbi, NAT_ENTRIES) || + if (f2fs_available_free_memory(sbi, NAT_ENTRIES) && f2fs_available_free_memory(sbi, INO_ENTRIES)) return; From 8026d58b495a2c1143b126b9161113696ec4e77e Mon Sep 17 00:00:00 2001 From: Martin Liska Date: Thu, 13 Oct 2022 09:40:59 +0200 Subject: [PATCH 0388/1477] gcov: support GCC 12.1 and newer compilers commit 977ef30a7d888eeb52fb6908f99080f33e5309a8 upstream. Starting with GCC 12.1, the created .gcda format can't be read by gcov tool. There are 2 significant changes to the .gcda file format that need to be supported: a) [gcov: Use system IO buffering] (23eb66d1d46a34cb28c4acbdf8a1deb80a7c5a05) changed that all sizes in the format are in bytes and not in words (4B) b) [gcov: make profile merging smarter] (72e0c742bd01f8e7e6dcca64042b9ad7e75979de) add a new checksum to the file header. Tested with GCC 7.5, 10.4, 12.2 and the current master. Link: https://lkml.kernel.org/r/624bda92-f307-30e9-9aaa-8cc678b2dfb2@suse.cz Signed-off-by: Martin Liska Tested-by: Peter Oberparleiter Reviewed-by: Peter Oberparleiter Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- kernel/gcov/gcc_4_7.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/kernel/gcov/gcc_4_7.c b/kernel/gcov/gcc_4_7.c index 53c67c87f141..c699feda21ac 100644 --- a/kernel/gcov/gcc_4_7.c +++ b/kernel/gcov/gcc_4_7.c @@ -33,6 +33,13 @@ #define GCOV_TAG_FUNCTION_LENGTH 3 +/* Since GCC 12.1 sizes are in BYTES and not in WORDS (4B). */ +#if (__GNUC__ >= 12) +#define GCOV_UNIT_SIZE 4 +#else +#define GCOV_UNIT_SIZE 1 +#endif + static struct gcov_info *gcov_info_head; /** @@ -451,12 +458,18 @@ static size_t convert_to_gcda(char *buffer, struct gcov_info *info) pos += store_gcov_u32(buffer, pos, info->version); pos += store_gcov_u32(buffer, pos, info->stamp); +#if (__GNUC__ >= 12) + /* Use zero as checksum of the compilation unit. */ + pos += store_gcov_u32(buffer, pos, 0); +#endif + for (fi_idx = 0; fi_idx < info->n_functions; fi_idx++) { fi_ptr = info->functions[fi_idx]; /* Function record. */ pos += store_gcov_u32(buffer, pos, GCOV_TAG_FUNCTION); - pos += store_gcov_u32(buffer, pos, GCOV_TAG_FUNCTION_LENGTH); + pos += store_gcov_u32(buffer, pos, + GCOV_TAG_FUNCTION_LENGTH * GCOV_UNIT_SIZE); pos += store_gcov_u32(buffer, pos, fi_ptr->ident); pos += store_gcov_u32(buffer, pos, fi_ptr->lineno_checksum); pos += store_gcov_u32(buffer, pos, fi_ptr->cfg_checksum); @@ -470,7 +483,8 @@ static size_t convert_to_gcda(char *buffer, struct gcov_info *info) /* Counter record. */ pos += store_gcov_u32(buffer, pos, GCOV_TAG_FOR_COUNTER(ct_idx)); - pos += store_gcov_u32(buffer, pos, ci_ptr->num * 2); + pos += store_gcov_u32(buffer, pos, + ci_ptr->num * 2 * GCOV_UNIT_SIZE); for (cv_idx = 0; cv_idx < ci_ptr->num; cv_idx++) { pos += store_gcov_u64(buffer, pos, From 243c8f42ba107216ab9c903e0470cc417672ae33 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 20 Oct 2022 11:38:57 -0400 Subject: [PATCH 0389/1477] Revert "drm/amdgpu: make sure to init common IP before gmc" This reverts commit 7b0db849ea030a70b8fb9c9afec67c81f955482e which is commit a8671493d2074950553da3cf07d1be43185ef6c6 upstream. The patches that this patch depends on were not backported properly and the patch that caused the regression that this patch set fixed was reverted in commit 412b844143e3 ("Revert "PCI/portdrv: Don't disable AER reporting in get_port_device_capability()""). This isn't necessary and causes a regression so drop it. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/2216 Cc: Shuah Khan Cc: Sasha Levin Signed-off-by: Alex Deucher Cc: # 5.10 Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 881045e600af..bde0496d2f15 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2179,16 +2179,8 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) } adev->ip_blocks[i].status.sw = true; - if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) { - /* need to do common hw init early so everything is set up for gmc */ - r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev); - if (r) { - DRM_ERROR("hw_init %d failed %d\n", i, r); - goto init_failed; - } - adev->ip_blocks[i].status.hw = true; - } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { - /* need to do gmc hw init early so we can allocate gpu mem */ + /* need to do gmc hw init early so we can allocate gpu mem */ + if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { /* Try to reserve bad pages early */ if (amdgpu_sriov_vf(adev)) amdgpu_virt_exchange_data(adev); @@ -2770,8 +2762,8 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) int i, r; static enum amd_ip_block_type ip_order[] = { - AMD_IP_BLOCK_TYPE_COMMON, AMD_IP_BLOCK_TYPE_GMC, + AMD_IP_BLOCK_TYPE_COMMON, AMD_IP_BLOCK_TYPE_PSP, AMD_IP_BLOCK_TYPE_IH, }; From a10a57a224f32035b58ef68b069f5b7491dd13e2 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 26 Oct 2022 13:25:57 +0200 Subject: [PATCH 0390/1477] Linux 5.10.150 Link: https://lore.kernel.org/r/20221024113022.510008560@linuxfoundation.org Tested-by: Pavel Machek (CIP) Tested-by: Jon Hunter Tested-by: Florian Fainelli Tested-by: Slade Watkins Tested-by: Guenter Roeck Tested-by: Linux Kernel Functional Testing Tested-by: Salvatore Bonaccorso Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 2af6f1e68a09..5c7075d3b2f6 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 10 -SUBLEVEL = 149 +SUBLEVEL = 150 EXTRAVERSION = NAME = Dare mighty things From bbaea0f1cd33d702d053d5bdaf6d6dec3932894c Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Wed, 19 Oct 2022 10:56:00 +0200 Subject: [PATCH 0391/1477] bpf: Generate BTF_KIND_FLOAT when linking vmlinux commit db16c1fe92d7ba7d39061faef897842baee2c887 upstream. [backported for dependency only extra_paholeopt variable setup and usage, we don't want floats generated in 5.10] pahole v1.21 supports the --btf_gen_floats flag, which makes it generate the information about the floating-point types [1]. Adjust link-vmlinux.sh to pass this flag to pahole in case it's supported, which is determined using a simple version check. [1] https://lore.kernel.org/dwarves/YHRiXNX1JUF2Az0A@kernel.org/ Signed-off-by: Ilya Leoshkevich Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210413190043.21918-1-iii@linux.ibm.com Signed-off-by: Jiri Olsa Signed-off-by: Greg Kroah-Hartman --- scripts/link-vmlinux.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index d0b44bee9286..cdfccbfed452 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -146,6 +146,7 @@ vmlinux_link() gen_btf() { local pahole_ver + local extra_paholeopt= if ! [ -x "$(command -v ${PAHOLE})" ]; then echo >&2 "BTF: ${1}: pahole (${PAHOLE}) is not available" @@ -161,7 +162,7 @@ gen_btf() vmlinux_link ${1} info "BTF" ${2} - LLVM_OBJCOPY=${OBJCOPY} ${PAHOLE} -J ${1} + LLVM_OBJCOPY=${OBJCOPY} ${PAHOLE} -J ${extra_paholeopt} ${1} # Create ${2} which contains just .BTF section but no symbols. Add # SHF_ALLOC because .BTF will be part of the vmlinux image. --strip-all From 06481cd9f7f692088bce03244f8cf132018f2fc6 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Wed, 19 Oct 2022 10:56:01 +0200 Subject: [PATCH 0392/1477] kbuild: Quote OBJCOPY var to avoid a pahole call break the build commit ff2e6efda0d5c51b33e2bcc0b0b981ac0a0ef214 upstream. [backported for dependency, skipped Makefile.modfinal change, because module BTF is not supported in 5.10] The ccache tool can be used to speed up cross-compilation, by calling the compiler and binutils through ccache. For example, following should work: $ export ARCH=arm64 CROSS_COMPILE="ccache aarch64-linux-gnu-" $ make M=drivers/gpu/drm/rockchip/ but pahole fails to extract the BTF info from DWARF, breaking the build: CC [M] drivers/gpu/drm/rockchip//rockchipdrm.mod.o LD [M] drivers/gpu/drm/rockchip//rockchipdrm.ko BTF [M] drivers/gpu/drm/rockchip//rockchipdrm.ko aarch64-linux-gnu-objcopy: invalid option -- 'J' Usage: aarch64-linux-gnu-objcopy [option(s)] in-file [out-file] Copies a binary file, possibly transforming it in the process ... make[1]: *** [scripts/Makefile.modpost:156: __modpost] Error 2 make: *** [Makefile:1866: modules] Error 2 this fails because OBJCOPY is set to "ccache aarch64-linux-gnu-copy" and later pahole is executed with the following command line: LLVM_OBJCOPY=$(OBJCOPY) $(PAHOLE) -J --btf_base vmlinux $@ which gets expanded to: LLVM_OBJCOPY=ccache aarch64-linux-gnu-objcopy pahole -J ... instead of: LLVM_OBJCOPY="ccache aarch64-linux-gnu-objcopy" pahole -J ... Fixes: 5f9ae91f7c0d ("kbuild: Build kernel module BTFs if BTF is enabled and pahole supports it") Signed-off-by: Javier Martinez Canillas Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Acked-by: Arnaldo Carvalho de Melo Link: https://lore.kernel.org/bpf/20210526215228.3729875-1-javierm@redhat.com Signed-off-by: Greg Kroah-Hartman --- scripts/link-vmlinux.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index cdfccbfed452..72bf14df6903 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -162,7 +162,7 @@ gen_btf() vmlinux_link ${1} info "BTF" ${2} - LLVM_OBJCOPY=${OBJCOPY} ${PAHOLE} -J ${extra_paholeopt} ${1} + LLVM_OBJCOPY="${OBJCOPY}" ${PAHOLE} -J ${extra_paholeopt} ${1} # Create ${2} which contains just .BTF section but no symbols. Add # SHF_ALLOC because .BTF will be part of the vmlinux image. --strip-all From f5f413cb3e8af235c5d310bea9942424fb242c2c Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 19 Oct 2022 10:56:02 +0200 Subject: [PATCH 0393/1477] kbuild: skip per-CPU BTF generation for pahole v1.18-v1.21 commit a0b8200d06ad6450c179407baa5f0f52f8cfcc97 upstream. [small context changes due to missing floats support in 5.10] Commit "mm/page_alloc: convert per-cpu list protection to local_lock" will introduce a zero-sized per-CPU variable, which causes pahole to generate invalid BTF. Only pahole versions 1.18 through 1.21 are impacted, as before 1.18 pahole doesn't know anything about per-CPU variables, and 1.22 contains the proper fix for the issue. Luckily, pahole 1.18 got --skip_encoding_btf_vars option disabling BTF generation for per-CPU variables in anticipation of some unanticipated problems. So use this escape hatch to disable per-CPU var BTF info on those problematic pahole versions. Users relying on availability of per-CPU var BTFs would need to upgrade to pahole 1.22+, but everyone won't notice any regressions. Link: https://lkml.kernel.org/r/20210530002536.3193829-1-andrii@kernel.org Signed-off-by: Andrii Nakryiko Acked-by: Mel Gorman Cc: Arnaldo Carvalho de Melo Cc: Hao Luo Cc: Michal Suchanek Cc: Jiri Olsa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Jiri Olsa Signed-off-by: Greg Kroah-Hartman --- scripts/link-vmlinux.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 72bf14df6903..bbb22be4c8f1 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -161,6 +161,11 @@ gen_btf() vmlinux_link ${1} + if [ "${pahole_ver}" -ge "118" ] && [ "${pahole_ver}" -le "121" ]; then + # pahole 1.18 through 1.21 can't handle zero-sized per-CPU vars + extra_paholeopt="${extra_paholeopt} --skip_encoding_btf_vars" + fi + info "BTF" ${2} LLVM_OBJCOPY="${OBJCOPY}" ${PAHOLE} -J ${extra_paholeopt} ${1} From c5006abb80e276896ff7237300a6d447708c7924 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 19 Oct 2022 10:56:03 +0200 Subject: [PATCH 0394/1477] kbuild: Unify options for BTF generation for vmlinux and modules commit 9741e07ece7c247dd65e1aa01e16b683f01c05a8 upstream. [skipped --btf_gen_floats option in pahole-flags.sh, skipped Makefile.modfinal change, because there's no BTF kmod support, squashing in 'exit 0' change from merge commit fc02cb2b37fe] Using new PAHOLE_FLAGS variable to pass extra arguments to pahole for both vmlinux and modules BTF data generation. Adding new scripts/pahole-flags.sh script that detect and prints pahole options. [ fixed issues found by kernel test robot ] Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211029125729.70002-1-jolsa@kernel.org Signed-off-by: Greg Kroah-Hartman --- Makefile | 3 +++ scripts/link-vmlinux.sh | 8 +------- scripts/pahole-flags.sh | 17 +++++++++++++++++ 3 files changed, 21 insertions(+), 7 deletions(-) create mode 100755 scripts/pahole-flags.sh diff --git a/Makefile b/Makefile index 5c7075d3b2f6..0ecf8333cc17 100644 --- a/Makefile +++ b/Makefile @@ -465,6 +465,8 @@ LZ4 = lz4c XZ = xz ZSTD = zstd +PAHOLE_FLAGS = $(shell PAHOLE=$(PAHOLE) $(srctree)/scripts/pahole-flags.sh) + CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ \ -Wbitwise -Wno-return-void -Wno-unknown-attribute $(CF) NOSTDINC_FLAGS := @@ -518,6 +520,7 @@ export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE export KBUILD_AFLAGS AFLAGS_KERNEL AFLAGS_MODULE export KBUILD_AFLAGS_MODULE KBUILD_CFLAGS_MODULE KBUILD_LDFLAGS_MODULE export KBUILD_AFLAGS_KERNEL KBUILD_CFLAGS_KERNEL +export PAHOLE_FLAGS # Files to ignore in find ... statements diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index bbb22be4c8f1..acd07a70a2f4 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -146,7 +146,6 @@ vmlinux_link() gen_btf() { local pahole_ver - local extra_paholeopt= if ! [ -x "$(command -v ${PAHOLE})" ]; then echo >&2 "BTF: ${1}: pahole (${PAHOLE}) is not available" @@ -161,13 +160,8 @@ gen_btf() vmlinux_link ${1} - if [ "${pahole_ver}" -ge "118" ] && [ "${pahole_ver}" -le "121" ]; then - # pahole 1.18 through 1.21 can't handle zero-sized per-CPU vars - extra_paholeopt="${extra_paholeopt} --skip_encoding_btf_vars" - fi - info "BTF" ${2} - LLVM_OBJCOPY="${OBJCOPY}" ${PAHOLE} -J ${extra_paholeopt} ${1} + LLVM_OBJCOPY="${OBJCOPY}" ${PAHOLE} -J ${PAHOLE_FLAGS} ${1} # Create ${2} which contains just .BTF section but no symbols. Add # SHF_ALLOC because .BTF will be part of the vmlinux image. --strip-all diff --git a/scripts/pahole-flags.sh b/scripts/pahole-flags.sh new file mode 100755 index 000000000000..27445cb72974 --- /dev/null +++ b/scripts/pahole-flags.sh @@ -0,0 +1,17 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +extra_paholeopt= + +if ! [ -x "$(command -v ${PAHOLE})" ]; then + exit 0 +fi + +pahole_ver=$(${PAHOLE} --version | sed -E 's/v([0-9]+)\.([0-9]+)/\1\2/') + +if [ "${pahole_ver}" -ge "118" ] && [ "${pahole_ver}" -le "121" ]; then + # pahole 1.18 through 1.21 can't handle zero-sized per-CPU vars + extra_paholeopt="${extra_paholeopt} --skip_encoding_btf_vars" +fi + +echo ${extra_paholeopt} From ecad3312111798d84dac1ce6a853e0ac9de8d505 Mon Sep 17 00:00:00 2001 From: Martin Rodriguez Reboredo Date: Wed, 19 Oct 2022 10:56:04 +0200 Subject: [PATCH 0395/1477] kbuild: Add skip_encoding_btf_enum64 option to pahole New pahole (version 1.24) generates by default new BTF_KIND_ENUM64 BTF tag, which is not supported by stable kernel. As a result the kernel with CONFIG_DEBUG_INFO_BTF option will fail to compile with following error: BTFIDS vmlinux FAILED: load BTF from vmlinux: Invalid argument New pahole provides --skip_encoding_btf_enum64 option to skip BTF_KIND_ENUM64 generation and produce BTF supported by stable kernel. Adding this option to scripts/pahole-flags.sh. This change does not have equivalent commit in linus tree, because linus tree has support for BTF_KIND_ENUM64 tag, so it does not need to be disabled. Signed-off-by: Martin Rodriguez Reboredo Signed-off-by: Jiri Olsa Signed-off-by: Greg Kroah-Hartman --- scripts/pahole-flags.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/pahole-flags.sh b/scripts/pahole-flags.sh index 27445cb72974..8c82173e42e5 100755 --- a/scripts/pahole-flags.sh +++ b/scripts/pahole-flags.sh @@ -14,4 +14,8 @@ if [ "${pahole_ver}" -ge "118" ] && [ "${pahole_ver}" -le "121" ]; then extra_paholeopt="${extra_paholeopt} --skip_encoding_btf_vars" fi +if [ "${pahole_ver}" -ge "124" ]; then + extra_paholeopt="${extra_paholeopt} --skip_encoding_btf_enum64" +fi + echo ${extra_paholeopt} From c34d1b22fef329d5cecd003d7be249937ec70a3c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 28 Oct 2022 13:02:36 +0200 Subject: [PATCH 0396/1477] Linux 5.10.151 Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0ecf8333cc17..0e22d4c8bc79 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 10 -SUBLEVEL = 150 +SUBLEVEL = 151 EXTRAVERSION = NAME = Dare mighty things From b838dcfda164cb3f8e2125b3e6bad6006843c67c Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Mon, 17 Oct 2022 21:02:27 +0800 Subject: [PATCH 0397/1477] ocfs2: clear dinode links count in case of error commit 28f4821b1b53e0649706912e810c6c232fc506f9 upstream. In ocfs2_mknod(), if error occurs after dinode successfully allocated, ocfs2 i_links_count will not be 0. So even though we clear inode i_nlink before iput in error handling, it still won't wipe inode since we'll refresh inode from dinode during inode lock. So just like clear inode i_nlink, we clear ocfs2 i_links_count as well. Also do the same change for ocfs2_symlink(). Link: https://lkml.kernel.org/r/20221017130227.234480-2-joseph.qi@linux.alibaba.com Signed-off-by: Joseph Qi Reported-by: Yan Wang Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/namei.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index c46bf7f581a1..db990df00bea 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -231,6 +231,7 @@ static int ocfs2_mknod(struct inode *dir, handle_t *handle = NULL; struct ocfs2_super *osb; struct ocfs2_dinode *dirfe; + struct ocfs2_dinode *fe = NULL; struct buffer_head *new_fe_bh = NULL; struct inode *inode = NULL; struct ocfs2_alloc_context *inode_ac = NULL; @@ -381,6 +382,7 @@ static int ocfs2_mknod(struct inode *dir, goto leave; } + fe = (struct ocfs2_dinode *) new_fe_bh->b_data; if (S_ISDIR(mode)) { status = ocfs2_fill_new_dir(osb, handle, dir, inode, new_fe_bh, data_ac, meta_ac); @@ -453,8 +455,11 @@ roll_back: leave: if (status < 0 && did_quota_inode) dquot_free_inode(inode); - if (handle) + if (handle) { + if (status < 0 && fe) + ocfs2_set_links_count(fe, 0); ocfs2_commit_trans(osb, handle); + } ocfs2_inode_unlock(dir, 1); if (did_block_signals) @@ -2023,8 +2028,11 @@ bail: ocfs2_clusters_to_bytes(osb->sb, 1)); if (status < 0 && did_quota_inode) dquot_free_inode(inode); - if (handle) + if (handle) { + if (status < 0 && fe) + ocfs2_set_links_count(fe, 0); ocfs2_commit_trans(osb, handle); + } ocfs2_inode_unlock(dir, 1); if (did_block_signals) From 0d65f040fdbb812c952fd31a7001baf2becab9eb Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Mon, 17 Oct 2022 21:02:26 +0800 Subject: [PATCH 0398/1477] ocfs2: fix BUG when iput after ocfs2_mknod fails commit 759a7c6126eef5635506453e9b9d55a6a3ac2084 upstream. Commit b1529a41f777 "ocfs2: should reclaim the inode if '__ocfs2_mknod_locked' returns an error" tried to reclaim the claimed inode if __ocfs2_mknod_locked() fails later. But this introduce a race, the freed bit may be reused immediately by another thread, which will update dinode, e.g. i_generation. Then iput this inode will lead to BUG: inode->i_generation != le32_to_cpu(fe->i_generation) We could make this inode as bad, but we did want to do operations like wipe in some cases. Since the claimed inode bit can only affect that an dinode is missing and will return back after fsck, it seems not a big problem. So just leave it as is by revert the reclaim logic. Link: https://lkml.kernel.org/r/20221017130227.234480-1-joseph.qi@linux.alibaba.com Fixes: b1529a41f777 ("ocfs2: should reclaim the inode if '__ocfs2_mknod_locked' returns an error") Signed-off-by: Joseph Qi Reported-by: Yan Wang Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/namei.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index db990df00bea..856474b0a1ae 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -636,18 +636,9 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb, return status; } - status = __ocfs2_mknod_locked(dir, inode, dev, new_fe_bh, + return __ocfs2_mknod_locked(dir, inode, dev, new_fe_bh, parent_fe_bh, handle, inode_ac, fe_blkno, suballoc_loc, suballoc_bit); - if (status < 0) { - u64 bg_blkno = ocfs2_which_suballoc_group(fe_blkno, suballoc_bit); - int tmp = ocfs2_free_suballoc_bits(handle, inode_ac->ac_inode, - inode_ac->ac_bh, suballoc_bit, bg_blkno, 1); - if (tmp) - mlog_errno(tmp); - } - - return status; } static int ocfs2_mkdir(struct inode *dir, From 2723875e9d677401d775a03a72abab7e9538c20c Mon Sep 17 00:00:00 2001 From: "GONG, Ruiqi" Date: Wed, 19 Oct 2022 10:57:10 +0800 Subject: [PATCH 0399/1477] selinux: enable use of both GFP_KERNEL and GFP_ATOMIC in convert_context() commit abe3c631447dcd1ba7af972fe6f054bee6f136fa upstream. The following warning was triggered on a hardware environment: SELinux: Converting 162 SID table entries... BUG: sleeping function called from invalid context at __might_sleep+0x60/0x74 0x0 in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid: 5943, name: tar CPU: 7 PID: 5943 Comm: tar Tainted: P O 5.10.0 #1 Call trace: dump_backtrace+0x0/0x1c8 show_stack+0x18/0x28 dump_stack+0xe8/0x15c ___might_sleep+0x168/0x17c __might_sleep+0x60/0x74 __kmalloc_track_caller+0xa0/0x7dc kstrdup+0x54/0xac convert_context+0x48/0x2e4 sidtab_context_to_sid+0x1c4/0x36c security_context_to_sid_core+0x168/0x238 security_context_to_sid_default+0x14/0x24 inode_doinit_use_xattr+0x164/0x1e4 inode_doinit_with_dentry+0x1c0/0x488 selinux_d_instantiate+0x20/0x34 security_d_instantiate+0x70/0xbc d_splice_alias+0x4c/0x3c0 ext4_lookup+0x1d8/0x200 [ext4] __lookup_slow+0x12c/0x1e4 walk_component+0x100/0x200 path_lookupat+0x88/0x118 filename_lookup+0x98/0x130 user_path_at_empty+0x48/0x60 vfs_statx+0x84/0x140 vfs_fstatat+0x20/0x30 __se_sys_newfstatat+0x30/0x74 __arm64_sys_newfstatat+0x1c/0x2c el0_svc_common.constprop.0+0x100/0x184 do_el0_svc+0x1c/0x2c el0_svc+0x20/0x34 el0_sync_handler+0x80/0x17c el0_sync+0x13c/0x140 SELinux: Context system_u:object_r:pssp_rsyslog_log_t:s0:c0 is not valid (left unmapped). It was found that within a critical section of spin_lock_irqsave in sidtab_context_to_sid(), convert_context() (hooked by sidtab_convert_params.func) might cause the process to sleep via allocating memory with GFP_KERNEL, which is problematic. As Ondrej pointed out [1], convert_context()/sidtab_convert_params.func has another caller sidtab_convert_tree(), which is okay with GFP_KERNEL. Therefore, fix this problem by adding a gfp_t argument for convert_context()/sidtab_convert_params.func and pass GFP_KERNEL/_ATOMIC properly in individual callers. Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20221018120111.1474581-1-gongruiqi1@huawei.com/ [1] Reported-by: Tan Ninghao Fixes: ee1a84fdfeed ("selinux: overhaul sidtab to fix bug and improve performance") Signed-off-by: GONG, Ruiqi Reviewed-by: Ondrej Mosnacek [PM: wrap long BUG() output lines, tweak subject line] Signed-off-by: Paul Moore Signed-off-by: Greg Kroah-Hartman --- security/selinux/ss/services.c | 5 +++-- security/selinux/ss/sidtab.c | 4 ++-- security/selinux/ss/sidtab.h | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 31d631fa846e..3db8bd2158d9 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -2011,7 +2011,8 @@ static inline int convert_context_handle_invalid_context( * in `newc'. Verify that the context is valid * under the new policy. */ -static int convert_context(struct context *oldc, struct context *newc, void *p) +static int convert_context(struct context *oldc, struct context *newc, void *p, + gfp_t gfp_flags) { struct convert_context_args *args; struct ocontext *oc; @@ -2025,7 +2026,7 @@ static int convert_context(struct context *oldc, struct context *newc, void *p) args = p; if (oldc->str) { - s = kstrdup(oldc->str, GFP_KERNEL); + s = kstrdup(oldc->str, gfp_flags); if (!s) return -ENOMEM; diff --git a/security/selinux/ss/sidtab.c b/security/selinux/ss/sidtab.c index 656d50b09f76..1981c5af13e0 100644 --- a/security/selinux/ss/sidtab.c +++ b/security/selinux/ss/sidtab.c @@ -325,7 +325,7 @@ int sidtab_context_to_sid(struct sidtab *s, struct context *context, } rc = convert->func(context, &dst_convert->context, - convert->args); + convert->args, GFP_ATOMIC); if (rc) { context_destroy(&dst->context); goto out_unlock; @@ -404,7 +404,7 @@ static int sidtab_convert_tree(union sidtab_entry_inner *edst, while (i < SIDTAB_LEAF_ENTRIES && *pos < count) { rc = convert->func(&esrc->ptr_leaf->entries[i].context, &edst->ptr_leaf->entries[i].context, - convert->args); + convert->args, GFP_KERNEL); if (rc) return rc; (*pos)++; diff --git a/security/selinux/ss/sidtab.h b/security/selinux/ss/sidtab.h index 4eff0e49dcb2..9fce0d553fe2 100644 --- a/security/selinux/ss/sidtab.h +++ b/security/selinux/ss/sidtab.h @@ -65,7 +65,7 @@ struct sidtab_isid_entry { }; struct sidtab_convert_params { - int (*func)(struct context *oldc, struct context *newc, void *args); + int (*func)(struct context *oldc, struct context *newc, void *args, gfp_t gfp_flags); void *args; struct sidtab *target; }; From 794ded0bc461287a268bed21fea2eebb6e5d232c Mon Sep 17 00:00:00 2001 From: Fabien Parent Date: Sat, 15 Oct 2022 15:04:23 +0200 Subject: [PATCH 0400/1477] cpufreq: qcom: fix writes in read-only memory region commit 01039fb8e90c9cb684430414bff70cea9eb168c5 upstream. This commit fixes a kernel oops because of a write in some read-only memory: [ 9.068287] Unable to handle kernel write to read-only memory at virtual address ffff800009240ad8 ..snip.. [ 9.138790] Internal error: Oops: 9600004f [#1] PREEMPT SMP ..snip.. [ 9.269161] Call trace: [ 9.276271] __memcpy+0x5c/0x230 [ 9.278531] snprintf+0x58/0x80 [ 9.282002] qcom_cpufreq_msm8939_name_version+0xb4/0x190 [ 9.284869] qcom_cpufreq_probe+0xc8/0x39c ..snip.. The following line defines a pointer that point to a char buffer stored in read-only memory: char *pvs_name = "speedXX-pvsXX-vXX"; This pointer is meant to hold a template "speedXX-pvsXX-vXX" where the XX values get overridden by the qcom_cpufreq_krait_name_version function. Since the template is actually stored in read-only memory, when the function executes the following call we get an oops: snprintf(*pvs_name, sizeof("speedXX-pvsXX-vXX"), "speed%d-pvs%d-v%d", speed, pvs, pvs_ver); To fix this issue, we instead store the template name onto the stack by using the following syntax: char pvs_name_buffer[] = "speedXX-pvsXX-vXX"; Because the `pvs_name` needs to be able to be assigned to NULL, the template buffer is stored in the pvs_name_buffer and not under the pvs_name variable. Cc: v5.7+ # v5.7+ Fixes: a8811ec764f9 ("cpufreq: qcom: Add support for krait based socs") Signed-off-by: Fabien Parent Signed-off-by: Viresh Kumar Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/qcom-cpufreq-nvmem.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/qcom-cpufreq-nvmem.c b/drivers/cpufreq/qcom-cpufreq-nvmem.c index 7fdd30e92e42..3af5c341d221 100644 --- a/drivers/cpufreq/qcom-cpufreq-nvmem.c +++ b/drivers/cpufreq/qcom-cpufreq-nvmem.c @@ -264,7 +264,8 @@ static int qcom_cpufreq_probe(struct platform_device *pdev) struct nvmem_cell *speedbin_nvmem; struct device_node *np; struct device *cpu_dev; - char *pvs_name = "speedXX-pvsXX-vXX"; + char pvs_name_buffer[] = "speedXX-pvsXX-vXX"; + char *pvs_name = pvs_name_buffer; unsigned cpu; const struct of_device_id *match; int ret; From 6dcf1f0802cc36df55c94e7ce1edd83666a1d8f4 Mon Sep 17 00:00:00 2001 From: Bryan O'Donoghue Date: Tue, 18 Oct 2022 03:19:20 +0100 Subject: [PATCH 0401/1477] i2c: qcom-cci: Fix ordering of pm_runtime_xx and i2c_add_adapter commit 61775d54d674ff8ec3658495e0dbc537227dc5c1 upstream. When we compile-in the CCI along with the imx412 driver and run on the RB5 we see that i2c_add_adapter() causes the probe of the imx412 driver to happen. This probe tries to perform an i2c xfer() and the xfer() in i2c-qcom-cci.c fails on pm_runtime_get() because the i2c-qcom-cci.c::probe() function has not completed to pm_runtime_enable(dev). Fix this sequence by ensuring pm_runtime_xxx() calls happen prior to adding the i2c adapter. Fixes: e517526195de ("i2c: Add Qualcomm CCI I2C driver") Reported-by: Vladimir Zapolskiy Reviewed-by: Vladimir Zapolskiy Tested-by: Vladimir Zapolskiy Cc: Signed-off-by: Bryan O'Donoghue Reviewed-by: Robert Foss Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-qcom-cci.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/i2c/busses/i2c-qcom-cci.c b/drivers/i2c/busses/i2c-qcom-cci.c index 09e599069a81..06c87c79bae7 100644 --- a/drivers/i2c/busses/i2c-qcom-cci.c +++ b/drivers/i2c/busses/i2c-qcom-cci.c @@ -638,6 +638,11 @@ static int cci_probe(struct platform_device *pdev) if (ret < 0) goto error; + pm_runtime_set_autosuspend_delay(dev, MSEC_PER_SEC); + pm_runtime_use_autosuspend(dev); + pm_runtime_set_active(dev); + pm_runtime_enable(dev); + for (i = 0; i < cci->data->num_masters; i++) { if (!cci->master[i].cci) continue; @@ -649,14 +654,12 @@ static int cci_probe(struct platform_device *pdev) } } - pm_runtime_set_autosuspend_delay(dev, MSEC_PER_SEC); - pm_runtime_use_autosuspend(dev); - pm_runtime_set_active(dev); - pm_runtime_enable(dev); - return 0; error_i2c: + pm_runtime_disable(dev); + pm_runtime_dont_use_autosuspend(dev); + for (--i ; i >= 0; i--) { if (cci->master[i].cci) { i2c_del_adapter(&cci->master[i].adap); From 0fb04676c4fd8b55bd0e0564aa49dcf90a7ac444 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 5 Oct 2022 12:00:08 +0200 Subject: [PATCH 0402/1477] x86/microcode/AMD: Apply the patch early on every logical thread MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e7ad18d1169c62e6c78c01ff693fd362d9d65278 upstream. Currently, the patch application logic checks whether the revision needs to be applied on each logical CPU (SMT thread). Therefore, on SMT designs where the microcode engine is shared between the two threads, the application happens only on one of them as that is enough to update the shared microcode engine. However, there are microcode patches which do per-thread modification, see Link tag below. Therefore, drop the revision check and try applying on each thread. This is what the BIOS does too so this method is very much tested. Btw, change only the early paths. On the late loading paths, there's no point in doing per-thread modification because if is it some case like in the bugzilla below - removing a CPUID flag - the kernel cannot go and un-use features it has detected are there early. For that, one should use early loading anyway. [ bp: Fixes does not contain the oldest commit which did check for equality but that is good enough. ] Fixes: 8801b3fcb574 ("x86/microcode/AMD: Rework container parsing") Reported-by: Ștefan Talpalaru Signed-off-by: Borislav Petkov Tested-by: Ștefan Talpalaru Cc: Link: https://bugzilla.kernel.org/show_bug.cgi?id=216211 Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/microcode/amd.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index c87936441339..234a96f25248 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -441,7 +441,13 @@ apply_microcode_early_amd(u32 cpuid_1_eax, void *ucode, size_t size, bool save_p return ret; native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); - if (rev >= mc->hdr.patch_id) + + /* + * Allow application of the same revision to pick up SMT-specific + * changes even if the revision of the other SMT thread is already + * up-to-date. + */ + if (rev > mc->hdr.patch_id) return ret; if (!__apply_microcode_amd(mc)) { @@ -523,8 +529,12 @@ void load_ucode_amd_ap(unsigned int cpuid_1_eax) native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); - /* Check whether we have saved a new patch already: */ - if (*new_rev && rev < mc->hdr.patch_id) { + /* + * Check whether a new patch has been saved already. Also, allow application of + * the same revision in order to pick up SMT-thread-specific configuration even + * if the sibling SMT thread already has an up-to-date revision. + */ + if (*new_rev && rev <= mc->hdr.patch_id) { if (!__apply_microcode_amd(mc)) { *new_rev = mc->hdr.patch_id; return; From d9f0159da05df869071164edf0c6d7302efc5eca Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Fri, 14 Oct 2022 17:01:45 +0800 Subject: [PATCH 0403/1477] hwmon/coretemp: Handle large core ID value commit 7108b80a542b9d65e44b36d64a700a83658c0b73 upstream. The coretemp driver supports up to a hard-coded limit of 128 cores. Today, the driver can not support a core with an ID above that limit. Yet, the encoding of core ID's is arbitrary (BIOS APIC-ID) and so they may be sparse and they may be large. Update the driver to map arbitrary core ID numbers into appropriate array indexes so that 128 cores can be supported, no matter the encoding of core ID's. Signed-off-by: Zhang Rui Signed-off-by: Dave Hansen Acked-by: Len Brown Acked-by: Guenter Roeck Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20221014090147.1836-3-rui.zhang@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/coretemp.c | 56 +++++++++++++++++++++++++++++----------- 1 file changed, 41 insertions(+), 15 deletions(-) diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index bb9211215a68..032129292957 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -46,9 +46,6 @@ MODULE_PARM_DESC(tjmax, "TjMax value in degrees Celsius"); #define TOTAL_ATTRS (MAX_CORE_ATTRS + 1) #define MAX_CORE_DATA (NUM_REAL_CORES + BASE_SYSFS_ATTR_NO) -#define TO_CORE_ID(cpu) (cpu_data(cpu).cpu_core_id) -#define TO_ATTR_NO(cpu) (TO_CORE_ID(cpu) + BASE_SYSFS_ATTR_NO) - #ifdef CONFIG_SMP #define for_each_sibling(i, cpu) \ for_each_cpu(i, topology_sibling_cpumask(cpu)) @@ -91,6 +88,8 @@ struct temp_data { struct platform_data { struct device *hwmon_dev; u16 pkg_id; + u16 cpu_map[NUM_REAL_CORES]; + struct ida ida; struct cpumask cpumask; struct temp_data *core_data[MAX_CORE_DATA]; struct device_attribute name_attr; @@ -441,7 +440,7 @@ static struct temp_data *init_temp_data(unsigned int cpu, int pkg_flag) MSR_IA32_THERM_STATUS; tdata->is_pkg_data = pkg_flag; tdata->cpu = cpu; - tdata->cpu_core_id = TO_CORE_ID(cpu); + tdata->cpu_core_id = topology_core_id(cpu); tdata->attr_size = MAX_CORE_ATTRS; mutex_init(&tdata->update_lock); return tdata; @@ -454,7 +453,7 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu, struct platform_data *pdata = platform_get_drvdata(pdev); struct cpuinfo_x86 *c = &cpu_data(cpu); u32 eax, edx; - int err, attr_no; + int err, index, attr_no; /* * Find attr number for sysfs: @@ -462,14 +461,26 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu, * The attr number is always core id + 2 * The Pkgtemp will always show up as temp1_*, if available */ - attr_no = pkg_flag ? PKG_SYSFS_ATTR_NO : TO_ATTR_NO(cpu); + if (pkg_flag) { + attr_no = PKG_SYSFS_ATTR_NO; + } else { + index = ida_alloc(&pdata->ida, GFP_KERNEL); + if (index < 0) + return index; + pdata->cpu_map[index] = topology_core_id(cpu); + attr_no = index + BASE_SYSFS_ATTR_NO; + } - if (attr_no > MAX_CORE_DATA - 1) - return -ERANGE; + if (attr_no > MAX_CORE_DATA - 1) { + err = -ERANGE; + goto ida_free; + } tdata = init_temp_data(cpu, pkg_flag); - if (!tdata) - return -ENOMEM; + if (!tdata) { + err = -ENOMEM; + goto ida_free; + } /* Test if we can access the status register */ err = rdmsr_safe_on_cpu(cpu, tdata->status_reg, &eax, &edx); @@ -505,6 +516,9 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu, exit_free: pdata->core_data[attr_no] = NULL; kfree(tdata); +ida_free: + if (!pkg_flag) + ida_free(&pdata->ida, index); return err; } @@ -524,6 +538,9 @@ static void coretemp_remove_core(struct platform_data *pdata, int indx) kfree(pdata->core_data[indx]); pdata->core_data[indx] = NULL; + + if (indx >= BASE_SYSFS_ATTR_NO) + ida_free(&pdata->ida, indx - BASE_SYSFS_ATTR_NO); } static int coretemp_probe(struct platform_device *pdev) @@ -537,6 +554,7 @@ static int coretemp_probe(struct platform_device *pdev) return -ENOMEM; pdata->pkg_id = pdev->id; + ida_init(&pdata->ida); platform_set_drvdata(pdev, pdata); pdata->hwmon_dev = devm_hwmon_device_register_with_groups(dev, DRVNAME, @@ -553,6 +571,7 @@ static int coretemp_remove(struct platform_device *pdev) if (pdata->core_data[i]) coretemp_remove_core(pdata, i); + ida_destroy(&pdata->ida); return 0; } @@ -647,7 +666,7 @@ static int coretemp_cpu_offline(unsigned int cpu) struct platform_device *pdev = coretemp_get_pdev(cpu); struct platform_data *pd; struct temp_data *tdata; - int indx, target; + int i, indx = -1, target; /* * Don't execute this on suspend as the device remove locks @@ -660,12 +679,19 @@ static int coretemp_cpu_offline(unsigned int cpu) if (!pdev) return 0; - /* The core id is too big, just return */ - indx = TO_ATTR_NO(cpu); - if (indx > MAX_CORE_DATA - 1) + pd = platform_get_drvdata(pdev); + + for (i = 0; i < NUM_REAL_CORES; i++) { + if (pd->cpu_map[i] == topology_core_id(cpu)) { + indx = i + BASE_SYSFS_ATTR_NO; + break; + } + } + + /* Too many cores and this core is not populated, just return */ + if (indx < 0) return 0; - pd = platform_get_drvdata(pdev); tdata = pd->core_data[indx]; cpumask_clear_cpu(cpu, &pd->cpumask); From 6a2aadcb0186c383e9e24450b80269a80ac2bd3f Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Wed, 12 Oct 2022 15:11:05 +0200 Subject: [PATCH 0404/1477] ata: ahci-imx: Fix MODULE_ALIAS commit 979556f1521a835a059de3b117b9c6c6642c7d58 upstream. 'ahci:' is an invalid prefix, preventing the module from autoloading. Fix this by using the 'platform:' prefix and DRV_NAME. Fixes: 9e54eae23bc9 ("ahci_imx: add ahci sata support on imx platforms") Cc: stable@vger.kernel.org Signed-off-by: Alexander Stein Reviewed-by: Fabio Estevam Signed-off-by: Damien Le Moal Signed-off-by: Greg Kroah-Hartman --- drivers/ata/ahci_imx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/ahci_imx.c b/drivers/ata/ahci_imx.c index 388baf528fa8..189f75d53741 100644 --- a/drivers/ata/ahci_imx.c +++ b/drivers/ata/ahci_imx.c @@ -1230,4 +1230,4 @@ module_platform_driver(imx_ahci_driver); MODULE_DESCRIPTION("Freescale i.MX AHCI SATA platform driver"); MODULE_AUTHOR("Richard Zhu "); MODULE_LICENSE("GPL"); -MODULE_ALIAS("ahci:imx"); +MODULE_ALIAS("platform:" DRV_NAME); From 303d0f761431d848dd8d7ff9fd9b8c101879cabe Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Tue, 11 Oct 2022 10:46:17 +0800 Subject: [PATCH 0405/1477] ata: ahci: Match EM_MAX_SLOTS with SATA_PMP_MAX_PORTS commit 1e41e693f458eef2d5728207dbd327cd3b16580a upstream. UBSAN complains about array-index-out-of-bounds: [ 1.980703] kernel: UBSAN: array-index-out-of-bounds in /build/linux-9H675w/linux-5.15.0/drivers/ata/libahci.c:968:41 [ 1.980709] kernel: index 15 is out of range for type 'ahci_em_priv [8]' [ 1.980713] kernel: CPU: 0 PID: 209 Comm: scsi_eh_8 Not tainted 5.15.0-25-generic #25-Ubuntu [ 1.980716] kernel: Hardware name: System manufacturer System Product Name/P5Q3, BIOS 1102 06/11/2010 [ 1.980718] kernel: Call Trace: [ 1.980721] kernel: [ 1.980723] kernel: show_stack+0x52/0x58 [ 1.980729] kernel: dump_stack_lvl+0x4a/0x5f [ 1.980734] kernel: dump_stack+0x10/0x12 [ 1.980736] kernel: ubsan_epilogue+0x9/0x45 [ 1.980739] kernel: __ubsan_handle_out_of_bounds.cold+0x44/0x49 [ 1.980742] kernel: ahci_qc_issue+0x166/0x170 [libahci] [ 1.980748] kernel: ata_qc_issue+0x135/0x240 [ 1.980752] kernel: ata_exec_internal_sg+0x2c4/0x580 [ 1.980754] kernel: ? vprintk_default+0x1d/0x20 [ 1.980759] kernel: ata_exec_internal+0x67/0xa0 [ 1.980762] kernel: sata_pmp_read+0x8d/0xc0 [ 1.980765] kernel: sata_pmp_read_gscr+0x3c/0x90 [ 1.980768] kernel: sata_pmp_attach+0x8b/0x310 [ 1.980771] kernel: ata_eh_revalidate_and_attach+0x28c/0x4b0 [ 1.980775] kernel: ata_eh_recover+0x6b6/0xb30 [ 1.980778] kernel: ? ahci_do_hardreset+0x180/0x180 [libahci] [ 1.980783] kernel: ? ahci_stop_engine+0xb0/0xb0 [libahci] [ 1.980787] kernel: ? ahci_do_softreset+0x290/0x290 [libahci] [ 1.980792] kernel: ? trace_event_raw_event_ata_eh_link_autopsy_qc+0xe0/0xe0 [ 1.980795] kernel: sata_pmp_eh_recover.isra.0+0x214/0x560 [ 1.980799] kernel: sata_pmp_error_handler+0x23/0x40 [ 1.980802] kernel: ahci_error_handler+0x43/0x80 [libahci] [ 1.980806] kernel: ata_scsi_port_error_handler+0x2b1/0x600 [ 1.980810] kernel: ata_scsi_error+0x9c/0xd0 [ 1.980813] kernel: scsi_error_handler+0xa1/0x180 [ 1.980817] kernel: ? scsi_unjam_host+0x1c0/0x1c0 [ 1.980820] kernel: kthread+0x12a/0x150 [ 1.980823] kernel: ? set_kthread_struct+0x50/0x50 [ 1.980826] kernel: ret_from_fork+0x22/0x30 [ 1.980831] kernel: This happens because sata_pmp_init_links() initialize link->pmp up to SATA_PMP_MAX_PORTS while em_priv is declared as 8 elements array. I can't find the maximum Enclosure Management ports specified in AHCI spec v1.3.1, but "12.2.1 LED message type" states that "Port Multiplier Information" can utilize 4 bits, which implies it can support up to 16 ports. Hence, use SATA_PMP_MAX_PORTS as EM_MAX_SLOTS to resolve the issue. BugLink: https://bugs.launchpad.net/bugs/1970074 Cc: stable@vger.kernel.org Signed-off-by: Kai-Heng Feng Signed-off-by: Damien Le Moal Signed-off-by: Greg Kroah-Hartman --- drivers/ata/ahci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h index d1f284f0c83d..1ce897356993 100644 --- a/drivers/ata/ahci.h +++ b/drivers/ata/ahci.h @@ -254,7 +254,7 @@ enum { PCS_7 = 0x94, /* 7+ port PCS (Denverton) */ /* em constants */ - EM_MAX_SLOTS = 8, + EM_MAX_SLOTS = SATA_PMP_MAX_PORTS, EM_MAX_RETRY = 5, /* em_ctl bits */ From e55feb31df3fc78b880d6e9d4b5853f05c974833 Mon Sep 17 00:00:00 2001 From: Fabien Parent Date: Sat, 15 Oct 2022 15:04:22 +0200 Subject: [PATCH 0406/1477] cpufreq: qcom: fix memory leak in error path commit 9f42cf54403a42cb092636804d2628d8ecf71e75 upstream. If for some reason the speedbin length is incorrect, then there is a memory leak in the error path because we never free the speedbin buffer. This commit fixes the error path to always free the speedbin buffer. Cc: v5.7+ # v5.7+ Fixes: a8811ec764f9 ("cpufreq: qcom: Add support for krait based socs") Signed-off-by: Fabien Parent Signed-off-by: Viresh Kumar Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/qcom-cpufreq-nvmem.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/qcom-cpufreq-nvmem.c b/drivers/cpufreq/qcom-cpufreq-nvmem.c index 3af5c341d221..9b3d24721d7b 100644 --- a/drivers/cpufreq/qcom-cpufreq-nvmem.c +++ b/drivers/cpufreq/qcom-cpufreq-nvmem.c @@ -215,6 +215,7 @@ static int qcom_cpufreq_krait_name_version(struct device *cpu_dev, int speed = 0, pvs = 0, pvs_ver = 0; u8 *speedbin; size_t len; + int ret = 0; speedbin = nvmem_cell_read(speedbin_nvmem, &len); @@ -232,7 +233,8 @@ static int qcom_cpufreq_krait_name_version(struct device *cpu_dev, break; default: dev_err(cpu_dev, "Unable to read nvmem data. Defaulting to 0!\n"); - return -ENODEV; + ret = -ENODEV; + goto len_error; } snprintf(*pvs_name, sizeof("speedXX-pvsXX-vXX"), "speed%d-pvs%d-v%d", @@ -240,8 +242,9 @@ static int qcom_cpufreq_krait_name_version(struct device *cpu_dev, drv->versions = (1 << speed); +len_error: kfree(speedbin); - return 0; + return ret; } static const struct qcom_cpufreq_match_data match_data_kryo = { From 34db701dc65f266b40f38273761ab4f05703f498 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 17 Oct 2022 20:45:39 +0200 Subject: [PATCH 0407/1477] kvm: Add support for arch compat vm ioctls commit ed51862f2f57cbce6fed2d4278cfe70a490899fd upstream. We will introduce the first architecture specific compat vm ioctl in the next patch. Add all necessary boilerplate to allow architectures to override compat vm ioctls when necessary. Signed-off-by: Alexander Graf Message-Id: <20221017184541.2658-2-graf@amazon.com> Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- include/linux/kvm_host.h | 2 ++ virt/kvm/kvm_main.c | 11 +++++++++++ 2 files changed, 13 insertions(+) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 896e563e2c18..9cb0a3d7874f 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -911,6 +911,8 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap); long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); +long kvm_arch_vm_compat_ioctl(struct file *filp, unsigned int ioctl, + unsigned long arg); int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu); int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index c4cce817a452..564d5c145fbe 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3966,6 +3966,12 @@ struct compat_kvm_clear_dirty_log { }; }; +long __weak kvm_arch_vm_compat_ioctl(struct file *filp, unsigned int ioctl, + unsigned long arg) +{ + return -ENOTTY; +} + static long kvm_vm_compat_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -3974,6 +3980,11 @@ static long kvm_vm_compat_ioctl(struct file *filp, if (kvm->mm != current->mm || kvm->vm_bugged) return -EIO; + + r = kvm_arch_vm_compat_ioctl(filp, ioctl, arg); + if (r != -ENOTTY) + return r; + switch (ioctl) { #ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT case KVM_CLEAR_DIRTY_LOG: { From 6d725672ce853f0d137231698ddddcdef607dff3 Mon Sep 17 00:00:00 2001 From: Eric Ren Date: Sat, 15 Oct 2022 11:19:28 +0800 Subject: [PATCH 0408/1477] KVM: arm64: vgic: Fix exit condition in scan_its_table() commit c000a2607145d28b06c697f968491372ea56c23a upstream. With some PCIe topologies, restoring a guest fails while parsing the ITS device tables. Reproducer hints: 1. Create ARM virt VM with pxb-pcie bus which adds extra host bridges, with qemu command like: ``` -device pxb-pcie,bus_nr=8,id=pci.x,numa_node=0,bus=pcie.0 \ -device pcie-root-port,..,bus=pci.x \ ... -device pxb-pcie,bus_nr=37,id=pci.y,numa_node=1,bus=pcie.0 \ -device pcie-root-port,..,bus=pci.y \ ... ``` 2. Ensure the guest uses 2-level device table 3. Perform VM migration which calls save/restore device tables In that setup, we get a big "offset" between 2 device_ids, which makes unsigned "len" round up a big positive number, causing the scan loop to continue with a bad GPA. For example: 1. L1 table has 2 entries; 2. and we are now scanning at L2 table entry index 2075 (pointed to by L1 first entry) 3. if next device id is 9472, we will get a big offset: 7397; 4. with unsigned 'len', 'len -= offset * esz', len will underflow to a positive number, mistakenly into next iteration with a bad GPA; (It should break out of the current L2 table scanning, and jump into the next L1 table entry) 5. that bad GPA fails the guest read. Fix it by stopping the L2 table scan when the next device id is outside of the current table, allowing the scan to continue from the next L1 table entry. Thanks to Eric Auger for the fix suggestion. Fixes: 920a7a8fa92a ("KVM: arm64: vgic-its: Add infrastructure for tableookup") Suggested-by: Eric Auger Signed-off-by: Eric Ren [maz: commit message tidy-up] Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/d9c3a564af9e2c5bf63f48a7dcbf08cd593c5c0b.1665802985.git.renzhengeek@gmail.com Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kvm/vgic/vgic-its.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index b9518f94bd43..23710bf5a86b 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -2096,7 +2096,7 @@ static int scan_its_table(struct vgic_its *its, gpa_t base, int size, u32 esz, memset(entry, 0, esz); - while (len > 0) { + while (true) { int next_offset; size_t byte_offset; @@ -2109,6 +2109,9 @@ static int scan_its_table(struct vgic_its *its, gpa_t base, int size, u32 esz, return next_offset; byte_offset = next_offset * esz; + if (byte_offset >= len) + break; + id += next_offset; gpa += byte_offset; len -= byte_offset; From bce5808fc95dbc8604e55a311881f04c5a9a37d6 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Fri, 2 Sep 2022 12:32:21 +0200 Subject: [PATCH 0409/1477] media: mceusb: set timeout to at least timeout provided commit 20b794ddce475ed012deb365000527c17b3e93e6 upstream. By rounding down, the actual timeout can be lower than requested. As a result, long spaces just below the requested timeout can be incorrectly reported as timeout and truncated. Fixes: 877f1a7cee3f ("media: rc: mceusb: allow the timeout to be configurable") Cc: stable@vger.kernel.org Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/rc/mceusb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/rc/mceusb.c b/drivers/media/rc/mceusb.c index de4cf6eb5258..142319c48405 100644 --- a/drivers/media/rc/mceusb.c +++ b/drivers/media/rc/mceusb.c @@ -1077,7 +1077,7 @@ static int mceusb_set_timeout(struct rc_dev *dev, unsigned int timeout) struct mceusb_dev *ir = dev->priv; unsigned int units; - units = DIV_ROUND_CLOSEST(timeout, MCE_TIME_UNIT); + units = DIV_ROUND_UP(timeout, MCE_TIME_UNIT); cmdbuf[2] = units >> 8; cmdbuf[3] = units; From ab6aaa821024978b203b3e4cc431ba61ea495c21 Mon Sep 17 00:00:00 2001 From: Bryan O'Donoghue Date: Tue, 26 Jul 2022 04:14:54 +0200 Subject: [PATCH 0410/1477] media: venus: dec: Handle the case where find_format fails commit 06a2da340f762addc5935bf851d95b14d4692db2 upstream. Debugging the decoder on msm8916 I noticed the vdec probe was crashing if the fmt pointer was NULL. A similar fix from Colin Ian King found by Coverity was implemented for the encoder. Implement the same fix on the decoder. Fixes: 7472c1c69138 ("[media] media: venus: vdec: add video decoder files") Cc: stable@vger.kernel.org # v4.13+ Signed-off-by: Bryan O'Donoghue Signed-off-by: Stanimir Varbanov Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/qcom/venus/vdec.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/media/platform/qcom/venus/vdec.c b/drivers/media/platform/qcom/venus/vdec.c index ea13170a6a2c..de34a87d1130 100644 --- a/drivers/media/platform/qcom/venus/vdec.c +++ b/drivers/media/platform/qcom/venus/vdec.c @@ -158,6 +158,8 @@ vdec_try_fmt_common(struct venus_inst *inst, struct v4l2_format *f) else return NULL; fmt = find_format(inst, pixmp->pixelformat, f->type); + if (!fmt) + return NULL; } pixmp->width = clamp(pixmp->width, frame_width_min(inst), From 392536023da18086d57565e716ed50193869b8e7 Mon Sep 17 00:00:00 2001 From: Lei Chen Date: Mon, 30 Nov 2020 10:20:52 +0800 Subject: [PATCH 0411/1477] block: wbt: Remove unnecessary invoking of wbt_update_limits in wbt_init commit 5a20d073ec54a72d9a732fa44bfe14954eb6332f upstream. It's unnecessary to call wbt_update_limits explicitly within wbt_init, because it will be called in the following function wbt_queue_depth_changed. Signed-off-by: Lei Chen Signed-off-by: Jens Axboe Signed-off-by: Yu Kuai Signed-off-by: Greg Kroah-Hartman --- block/blk-wbt.c | 1 - 1 file changed, 1 deletion(-) diff --git a/block/blk-wbt.c b/block/blk-wbt.c index 35d81b5deae1..4ec0a018a2ad 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -840,7 +840,6 @@ int wbt_init(struct request_queue *q) rwb->enable_state = WBT_STATE_ON_DEFAULT; rwb->wc = 1; rwb->rq_depth.default_depth = RWB_DEF_DEPTH; - wbt_update_limits(rwb); /* * Assign rwb and add the stats callback. From 910ba49b33450a878128adc7d9c419dd97efd923 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Tue, 13 Sep 2022 18:57:49 +0800 Subject: [PATCH 0412/1477] blk-wbt: call rq_qos_add() after wb_normal is initialized MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 8c5035dfbb9475b67c82b3fdb7351236525bf52b upstream. Our test found a problem that wbt inflight counter is negative, which will cause io hang(noted that this problem doesn't exist in mainline): t1: device create t2: issue io add_disk blk_register_queue wbt_enable_default wbt_init rq_qos_add // wb_normal is still 0 /* * in mainline, disk can't be opened before * bdev_add(), however, in old kernels, disk * can be opened before blk_register_queue(). */ blkdev_issue_flush // disk size is 0, however, it's not checked submit_bio_wait submit_bio blk_mq_submit_bio rq_qos_throttle wbt_wait bio_to_wbt_flags rwb_enabled // wb_normal is 0, inflight is not increased wbt_queue_depth_changed(&rwb->rqos); wbt_update_limits // wb_normal is initialized rq_qos_track wbt_track rq->wbt_flags |= bio_to_wbt_flags(rwb, bio); // wb_normal is not 0,wbt_flags will be set t3: io completion blk_mq_free_request rq_qos_done wbt_done wbt_is_tracked // return true __wbt_done wbt_rqw_done atomic_dec_return(&rqw->inflight); // inflight is decreased commit 8235b5c1e8c1 ("block: call bdev_add later in device_add_disk") can avoid this problem, however it's better to fix this problem in wbt: 1) Lower kernel can't backport this patch due to lots of refactor. 2) Root cause is that wbt call rq_qos_add() before wb_normal is initialized. Fixes: e34cbd307477 ("blk-wbt: add general throttling mechanism") Cc: Signed-off-by: Yu Kuai Link: https://lore.kernel.org/r/20220913105749.3086243-1-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/blk-wbt.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/block/blk-wbt.c b/block/blk-wbt.c index 4ec0a018a2ad..bafdb8098893 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -840,6 +840,10 @@ int wbt_init(struct request_queue *q) rwb->enable_state = WBT_STATE_ON_DEFAULT; rwb->wc = 1; rwb->rq_depth.default_depth = RWB_DEF_DEPTH; + rwb->min_lat_nsec = wbt_default_latency_nsec(q); + + wbt_queue_depth_changed(&rwb->rqos); + wbt_set_write_cache(q, test_bit(QUEUE_FLAG_WC, &q->queue_flags)); /* * Assign rwb and add the stats callback. @@ -847,10 +851,5 @@ int wbt_init(struct request_queue *q) rq_qos_add(q, &rwb->rqos); blk_stat_add_callback(q, rwb->cb); - rwb->min_lat_nsec = wbt_default_latency_nsec(q); - - wbt_queue_depth_changed(&rwb->rqos); - wbt_set_write_cache(q, test_bit(QUEUE_FLAG_WC, &q->queue_flags)); - return 0; } From 51b96ecaedc0a12f6827f189a94f59012dde8208 Mon Sep 17 00:00:00 2001 From: James Morse Date: Thu, 14 Jul 2022 17:15:23 +0100 Subject: [PATCH 0413/1477] arm64: errata: Remove AES hwcap for COMPAT tasks commit 44b3834b2eed595af07021b1c64e6f9bc396398b upstream. Cortex-A57 and Cortex-A72 have an erratum where an interrupt that occurs between a pair of AES instructions in aarch32 mode may corrupt the ELR. The task will subsequently produce the wrong AES result. The AES instructions are part of the cryptographic extensions, which are optional. User-space software will detect the support for these instructions from the hwcaps. If the platform doesn't support these instructions a software implementation should be used. Remove the hwcap bits on affected parts to indicate user-space should not use the AES instructions. Acked-by: Ard Biesheuvel Signed-off-by: James Morse Link: https://lore.kernel.org/r/20220714161523.279570-3-james.morse@arm.com Signed-off-by: Will Deacon [florian: removed arch/arm64/tools/cpucaps and fixup cpufeature.c] Signed-off-by: Florian Fainelli Signed-off-by: Greg Kroah-Hartman --- Documentation/arm64/silicon-errata.rst | 4 ++++ arch/arm64/Kconfig | 16 ++++++++++++++++ arch/arm64/include/asm/cpucaps.h | 3 ++- arch/arm64/kernel/cpu_errata.c | 16 ++++++++++++++++ arch/arm64/kernel/cpufeature.c | 13 ++++++++++++- 5 files changed, 50 insertions(+), 2 deletions(-) diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index 22a07c208fee..4f3206495217 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -76,10 +76,14 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A57 | #1319537 | ARM64_ERRATUM_1319367 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A57 | #1742098 | ARM64_ERRATUM_1742098 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A72 | #853709 | N/A | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A72 | #1319367 | ARM64_ERRATUM_1319367 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A72 | #1655431 | ARM64_ERRATUM_1742098 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A73 | #858921 | ARM64_ERRATUM_858921 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A76 | #1188873,1418040| ARM64_ERRATUM_1418040 | diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index af65ab83e63d..34bd4cba81e6 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -481,6 +481,22 @@ config ARM64_ERRATUM_834220 If unsure, say Y. +config ARM64_ERRATUM_1742098 + bool "Cortex-A57/A72: 1742098: ELR recorded incorrectly on interrupt taken between cryptographic instructions in a sequence" + depends on COMPAT + default y + help + This option removes the AES hwcap for aarch32 user-space to + workaround erratum 1742098 on Cortex-A57 and Cortex-A72. + + Affected parts may corrupt the AES state if an interrupt is + taken between a pair of AES instructions. These instructions + are only present if the cryptography extensions are present. + All software should have a fallback implementation for CPUs + that don't implement the cryptography extensions. + + If unsure, say Y. + config ARM64_ERRATUM_845719 bool "Cortex-A53: 845719: a load might read incorrect data" depends on COMPAT diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 53030d3c03a2..d2080a41f6e6 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -68,7 +68,8 @@ #define ARM64_WORKAROUND_1508412 58 #define ARM64_SPECTRE_BHB 59 #define ARM64_WORKAROUND_2457168 60 +#define ARM64_WORKAROUND_1742098 61 -#define ARM64_NCAPS 61 +#define ARM64_NCAPS 62 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index aaacca6fd52f..5d6f19bc628c 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -356,6 +356,14 @@ static const struct midr_range erratum_1463225[] = { }; #endif +#ifdef CONFIG_ARM64_ERRATUM_1742098 +static struct midr_range broken_aarch32_aes[] = { + MIDR_RANGE(MIDR_CORTEX_A57, 0, 1, 0xf, 0xf), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), + {}, +}; +#endif + const struct arm64_cpu_capabilities arm64_errata[] = { #ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE { @@ -554,6 +562,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = { /* Cortex-A510 r0p0-r1p1 */ CAP_MIDR_RANGE(MIDR_CORTEX_A510, 0, 0, 1, 1) }, +#endif +#ifdef CONFIG_ARM64_ERRATUM_1742098 + { + .desc = "ARM erratum 1742098", + .capability = ARM64_WORKAROUND_1742098, + CAP_MIDR_RANGE_LIST(broken_aarch32_aes), + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + }, #endif { } diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index e72c90b82656..f3767c144593 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -76,6 +76,7 @@ #include #include #include +#include #include #include #include @@ -1730,6 +1731,14 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap) } #endif /* CONFIG_ARM64_MTE */ +static void elf_hwcap_fixup(void) +{ +#ifdef CONFIG_ARM64_ERRATUM_1742098 + if (cpus_have_const_cap(ARM64_WORKAROUND_1742098)) + compat_elf_hwcap2 &= ~COMPAT_HWCAP2_AES; +#endif /* ARM64_ERRATUM_1742098 */ +} + /* Internal helper functions to match cpu capability type */ static bool cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap) @@ -2735,8 +2744,10 @@ void __init setup_cpu_features(void) setup_system_capabilities(); setup_elf_hwcaps(arm64_elf_hwcaps); - if (system_supports_32bit_el0()) + if (system_supports_32bit_el0()) { setup_elf_hwcaps(compat_elf_hwcaps); + elf_hwcap_fixup(); + } if (system_uses_ttbr0_pan()) pr_info("emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching\n"); From cc841a8a704c1de58491b8f158d661c065cfb831 Mon Sep 17 00:00:00 2001 From: Jean-Francois Le Fillatre Date: Wed, 24 Aug 2022 21:14:36 +0200 Subject: [PATCH 0414/1477] r8152: add PID for the Lenovo OneLink+ Dock commit 1bd3a383075c64d638e65d263c9267b08ee7733c upstream. The Lenovo OneLink+ Dock contains an RTL8153 controller that behaves as a broken CDC device by default. Add the custom Lenovo PID to the r8152 driver to support it properly. Also, systems compatible with this dock provide a BIOS option to enable MAC address passthrough (as per Lenovo document "ThinkPad Docking Solutions 2017"). Add the custom PID to the MAC passthrough list too. Tested on a ThinkPad 13 1st gen with the expected results: passthrough disabled: Invalid header when reading pass-thru MAC addr passthrough enabled: Using pass-thru MAC addr XX:XX:XX:XX:XX:XX Signed-off-by: Jean-Francois Le Fillatre Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/cdc_ether.c | 7 +++++++ drivers/net/usb/r8152.c | 1 + 2 files changed, 8 insertions(+) diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 43ddbe61dc58..935cd296887f 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -763,6 +763,13 @@ static const struct usb_device_id products[] = { }, #endif +/* Lenovo ThinkPad OneLink+ Dock (based on Realtek RTL8153) */ +{ + USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0x3054, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), + .driver_info = 0, +}, + /* ThinkPad USB-C Dock (based on Realtek RTL8153) */ { USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0x3062, USB_CLASS_COMM, diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index a526242a3e36..f9a79d67d6d4 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -6870,6 +6870,7 @@ static const struct usb_device_id rtl8152_table[] = { {REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x0927)}, {REALTEK_USB_DEVICE(VENDOR_ID_SAMSUNG, 0xa101)}, {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x304f)}, + {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x3054)}, {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x3062)}, {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x3069)}, {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x3082)}, From 590929ef6972ee33e20ab439979de6d3208a3e2e Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 11 Oct 2022 13:16:51 +0100 Subject: [PATCH 0415/1477] btrfs: fix processing of delayed data refs during backref walking [ Upstream commit 4fc7b57228243d09c0d878873bf24fa64a90fa01 ] When processing delayed data references during backref walking and we are using a share context (we are being called through fiemap), whenever we find a delayed data reference for an inode different from the one we are interested in, then we immediately exit and consider the data extent as shared. This is wrong, because: 1) This might be a DROP reference that will cancel out a reference in the extent tree; 2) Even if it's an ADD reference, it may be followed by a DROP reference that cancels it out. In either case we should not exit immediately. Fix this by never exiting when we find a delayed data reference for another inode - instead add the reference and if it does not cancel out other delayed reference, we will exit early when we call extent_is_shared() after processing all delayed references. If we find a drop reference, then signal the code that processes references from the extent tree (add_inline_refs() and add_keyed_refs()) to not exit immediately if it finds there a reference for another inode, since we have delayed drop references that may cancel it out. In this later case we exit once we don't have references in the rb trees that cancel out each other and have two references for different inodes. Example reproducer for case 1): $ cat test-1.sh #!/bin/bash DEV=/dev/sdj MNT=/mnt/sdj mkfs.btrfs -f $DEV mount $DEV $MNT xfs_io -f -c "pwrite 0 64K" $MNT/foo cp --reflink=always $MNT/foo $MNT/bar echo echo "fiemap after cloning:" xfs_io -c "fiemap -v" $MNT/foo rm -f $MNT/bar echo echo "fiemap after removing file bar:" xfs_io -c "fiemap -v" $MNT/foo umount $MNT Running it before this patch, the extent is still listed as shared, it has the flag 0x2000 (FIEMAP_EXTENT_SHARED) set: $ ./test-1.sh fiemap after cloning: /mnt/sdj/foo: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..127]: 26624..26751 128 0x2001 fiemap after removing file bar: /mnt/sdj/foo: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..127]: 26624..26751 128 0x2001 Example reproducer for case 2): $ cat test-2.sh #!/bin/bash DEV=/dev/sdj MNT=/mnt/sdj mkfs.btrfs -f $DEV mount $DEV $MNT xfs_io -f -c "pwrite 0 64K" $MNT/foo cp --reflink=always $MNT/foo $MNT/bar # Flush delayed references to the extent tree and commit current # transaction. sync echo echo "fiemap after cloning:" xfs_io -c "fiemap -v" $MNT/foo rm -f $MNT/bar echo echo "fiemap after removing file bar:" xfs_io -c "fiemap -v" $MNT/foo umount $MNT Running it before this patch, the extent is still listed as shared, it has the flag 0x2000 (FIEMAP_EXTENT_SHARED) set: $ ./test-2.sh fiemap after cloning: /mnt/sdj/foo: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..127]: 26624..26751 128 0x2001 fiemap after removing file bar: /mnt/sdj/foo: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..127]: 26624..26751 128 0x2001 After this patch, after deleting bar in both tests, the extent is not reported with the 0x2000 flag anymore, it gets only the flag 0x1 (which is FIEMAP_EXTENT_LAST): $ ./test-1.sh fiemap after cloning: /mnt/sdj/foo: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..127]: 26624..26751 128 0x2001 fiemap after removing file bar: /mnt/sdj/foo: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..127]: 26624..26751 128 0x1 $ ./test-2.sh fiemap after cloning: /mnt/sdj/foo: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..127]: 26624..26751 128 0x2001 fiemap after removing file bar: /mnt/sdj/foo: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..127]: 26624..26751 128 0x1 These tests will later be converted to a test case for fstests. Fixes: dc046b10c8b7d4 ("Btrfs: make fiemap not blow when you have lots of snapshots") Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/backref.c | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index baff31a147e7..7e8fac12f3f8 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -137,6 +137,7 @@ struct share_check { u64 root_objectid; u64 inum; int share_count; + bool have_delayed_delete_refs; }; static inline int extent_is_shared(struct share_check *sc) @@ -881,13 +882,22 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info, key.offset = ref->offset; /* - * Found a inum that doesn't match our known inum, we - * know it's shared. + * If we have a share check context and a reference for + * another inode, we can't exit immediately. This is + * because even if this is a BTRFS_ADD_DELAYED_REF + * reference we may find next a BTRFS_DROP_DELAYED_REF + * which cancels out this ADD reference. + * + * If this is a DROP reference and there was no previous + * ADD reference, then we need to signal that when we + * process references from the extent tree (through + * add_inline_refs() and add_keyed_refs()), we should + * not exit early if we find a reference for another + * inode, because one of the delayed DROP references + * may cancel that reference in the extent tree. */ - if (sc && sc->inum && ref->objectid != sc->inum) { - ret = BACKREF_FOUND_SHARED; - goto out; - } + if (sc && count < 0) + sc->have_delayed_delete_refs = true; ret = add_indirect_ref(fs_info, preftrees, ref->root, &key, 0, node->bytenr, count, sc, @@ -917,7 +927,7 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info, } if (!ret) ret = extent_is_shared(sc); -out: + spin_unlock(&head->lock); return ret; } @@ -1020,7 +1030,8 @@ static int add_inline_refs(const struct btrfs_fs_info *fs_info, key.type = BTRFS_EXTENT_DATA_KEY; key.offset = btrfs_extent_data_ref_offset(leaf, dref); - if (sc && sc->inum && key.objectid != sc->inum) { + if (sc && sc->inum && key.objectid != sc->inum && + !sc->have_delayed_delete_refs) { ret = BACKREF_FOUND_SHARED; break; } @@ -1030,6 +1041,7 @@ static int add_inline_refs(const struct btrfs_fs_info *fs_info, ret = add_indirect_ref(fs_info, preftrees, root, &key, 0, bytenr, count, sc, GFP_NOFS); + break; } default: @@ -1119,7 +1131,8 @@ static int add_keyed_refs(struct btrfs_fs_info *fs_info, key.type = BTRFS_EXTENT_DATA_KEY; key.offset = btrfs_extent_data_ref_offset(leaf, dref); - if (sc && sc->inum && key.objectid != sc->inum) { + if (sc && sc->inum && key.objectid != sc->inum && + !sc->have_delayed_delete_refs) { ret = BACKREF_FOUND_SHARED; break; } @@ -1542,6 +1555,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr, .root_objectid = root->root_key.objectid, .inum = inum, .share_count = 0, + .have_delayed_delete_refs = false, }; ulist_init(roots); @@ -1576,6 +1590,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr, break; bytenr = node->val; shared.share_count = 0; + shared.have_delayed_delete_refs = false; cond_resched(); } From 57e157749ad9bce8e44db8dd951e3a363061ed42 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 11 Oct 2022 13:16:52 +0100 Subject: [PATCH 0416/1477] btrfs: fix processing of delayed tree block refs during backref walking [ Upstream commit 943553ef9b51db303ab2b955c1025261abfdf6fb ] During backref walking, when processing a delayed reference with a type of BTRFS_TREE_BLOCK_REF_KEY, we have two bugs there: 1) We are accessing the delayed references extent_op, and its key, without the protection of the delayed ref head's lock; 2) If there's no extent op for the delayed ref head, we end up with an uninitialized key in the stack, variable 'tmp_op_key', and then pass it to add_indirect_ref(), which adds the reference to the indirect refs rb tree. This is wrong, because indirect references should have a NULL key when we don't have access to the key, and in that case they should be added to the indirect_missing_keys rb tree and not to the indirect rb tree. This means that if have BTRFS_TREE_BLOCK_REF_KEY delayed ref resulting from freeing an extent buffer, therefore with a count of -1, it will not cancel out the corresponding reference we have in the extent tree (with a count of 1), since both references end up in different rb trees. When using fiemap, where we often need to check if extents are shared through shared subtrees resulting from snapshots, it means we can incorrectly report an extent as shared when it's no longer shared. However this is temporary because after the transaction is committed the extent is no longer reported as shared, as running the delayed reference results in deleting the tree block reference from the extent tree. Outside the fiemap context, the result is unpredictable, as the key was not initialized but it's used when navigating the rb trees to insert and search for references (prelim_ref_compare()), and we expect all references in the indirect rb tree to have valid keys. The following reproducer triggers the second bug: $ cat test.sh #!/bin/bash DEV=/dev/sdj MNT=/mnt/sdj mkfs.btrfs -f $DEV mount -o compress $DEV $MNT # With a compressed 128M file we get a tree height of 2 (level 1 root). xfs_io -f -c "pwrite -b 1M 0 128M" $MNT/foo btrfs subvolume snapshot $MNT $MNT/snap # Fiemap should output 0x2008 in the flags column. # 0x2000 means shared extent # 0x8 means encoded extent (because it's compressed) echo echo "fiemap after snapshot, range [120M, 120M + 128K):" xfs_io -c "fiemap -v 120M 128K" $MNT/foo echo # Overwrite one extent and fsync to flush delalloc and COW a new path # in the snapshot's tree. # # After this we have a BTRFS_DROP_DELAYED_REF delayed ref of type # BTRFS_TREE_BLOCK_REF_KEY with a count of -1 for every COWed extent # buffer in the path. # # In the extent tree we have inline references of type # BTRFS_TREE_BLOCK_REF_KEY, with a count of 1, for the same extent # buffers, so they should cancel each other, and the extent buffers in # the fs tree should no longer be considered as shared. # echo "Overwriting file range [120M, 120M + 128K)..." xfs_io -c "pwrite -b 128K 120M 128K" $MNT/snap/foo xfs_io -c "fsync" $MNT/snap/foo # Fiemap should output 0x8 in the flags column. The extent in the range # [120M, 120M + 128K) is no longer shared, it's now exclusive to the fs # tree. echo echo "fiemap after overwrite range [120M, 120M + 128K):" xfs_io -c "fiemap -v 120M 128K" $MNT/foo echo umount $MNT Running it before this patch: $ ./test.sh (...) wrote 134217728/134217728 bytes at offset 0 128 MiB, 128 ops; 0.1152 sec (1.085 GiB/sec and 1110.5809 ops/sec) Create a snapshot of '/mnt/sdj' in '/mnt/sdj/snap' fiemap after snapshot, range [120M, 120M + 128K): /mnt/sdj/foo: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [245760..246015]: 34304..34559 256 0x2008 Overwriting file range [120M, 120M + 128K)... wrote 131072/131072 bytes at offset 125829120 128 KiB, 1 ops; 0.0001 sec (683.060 MiB/sec and 5464.4809 ops/sec) fiemap after overwrite range [120M, 120M + 128K): /mnt/sdj/foo: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [245760..246015]: 34304..34559 256 0x2008 The extent in the range [120M, 120M + 128K) is still reported as shared (0x2000 bit set) after overwriting that range and flushing delalloc, which is not correct - an entire path was COWed in the snapshot's tree and the extent is now only referenced by the original fs tree. Running it after this patch: $ ./test.sh (...) wrote 134217728/134217728 bytes at offset 0 128 MiB, 128 ops; 0.1198 sec (1.043 GiB/sec and 1068.2067 ops/sec) Create a snapshot of '/mnt/sdj' in '/mnt/sdj/snap' fiemap after snapshot, range [120M, 120M + 128K): /mnt/sdj/foo: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [245760..246015]: 34304..34559 256 0x2008 Overwriting file range [120M, 120M + 128K)... wrote 131072/131072 bytes at offset 125829120 128 KiB, 1 ops; 0.0001 sec (694.444 MiB/sec and 5555.5556 ops/sec) fiemap after overwrite range [120M, 120M + 128K): /mnt/sdj/foo: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [245760..246015]: 34304..34559 256 0x8 Now the extent is not reported as shared anymore. So fix this by passing a NULL key pointer to add_indirect_ref() when processing a delayed reference for a tree block if there's no extent op for our delayed ref head with a defined key. Also access the extent op only after locking the delayed ref head's lock. The reproducer will be converted later to a test case for fstests. Fixes: 86d5f994425252 ("btrfs: convert prelimary reference tracking to use rbtrees") Fixes: a6dbceafb915e8 ("btrfs: Remove unused op_key var from add_delayed_refs") Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/backref.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 7e8fac12f3f8..92cb16c0e5ee 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -818,16 +818,11 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info, struct preftrees *preftrees, struct share_check *sc) { struct btrfs_delayed_ref_node *node; - struct btrfs_delayed_extent_op *extent_op = head->extent_op; struct btrfs_key key; - struct btrfs_key tmp_op_key; struct rb_node *n; int count; int ret = 0; - if (extent_op && extent_op->update_key) - btrfs_disk_key_to_cpu(&tmp_op_key, &extent_op->key); - spin_lock(&head->lock); for (n = rb_first_cached(&head->ref_tree); n; n = rb_next(n)) { node = rb_entry(n, struct btrfs_delayed_ref_node, @@ -853,10 +848,16 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info, case BTRFS_TREE_BLOCK_REF_KEY: { /* NORMAL INDIRECT METADATA backref */ struct btrfs_delayed_tree_ref *ref; + struct btrfs_key *key_ptr = NULL; + + if (head->extent_op && head->extent_op->update_key) { + btrfs_disk_key_to_cpu(&key, &head->extent_op->key); + key_ptr = &key; + } ref = btrfs_delayed_node_to_tree_ref(node); ret = add_indirect_ref(fs_info, preftrees, ref->root, - &tmp_op_key, ref->level + 1, + key_ptr, ref->level + 1, node->bytenr, count, sc, GFP_ATOMIC); break; From fc8c6b8bb294b7c1b2853808648e7a2cec392b61 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Mon, 10 Oct 2022 13:34:23 -0700 Subject: [PATCH 0417/1477] ACPI: extlog: Handle multiple records [ Upstream commit f6ec01da40e4139b41179f046044ee7c4f6370dc ] If there is no user space consumer of extlog_mem trace records, then Linux properly handles multiple error records in an ELOG block extlog_print() print_extlog_rcd() __print_extlog_rcd() cper_estatus_print() apei_estatus_for_each_section() But the other code path hard codes looking for a single record to output a trace record. Fix by using the same apei_estatus_for_each_section() iterator to step over all records. Fixes: 2dfb7d51a61d ("trace, RAS: Add eMCA trace event interface") Signed-off-by: Tony Luck Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/acpi_extlog.c | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c index 72f1fb77abcd..e648158368a7 100644 --- a/drivers/acpi/acpi_extlog.c +++ b/drivers/acpi/acpi_extlog.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -138,8 +139,8 @@ static int extlog_print(struct notifier_block *nb, unsigned long val, int cpu = mce->extcpu; struct acpi_hest_generic_status *estatus, *tmp; struct acpi_hest_generic_data *gdata; - const guid_t *fru_id = &guid_null; - char *fru_text = ""; + const guid_t *fru_id; + char *fru_text; guid_t *sec_type; static u32 err_seq; @@ -160,17 +161,23 @@ static int extlog_print(struct notifier_block *nb, unsigned long val, /* log event via trace */ err_seq++; - gdata = (struct acpi_hest_generic_data *)(tmp + 1); - if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID) - fru_id = (guid_t *)gdata->fru_id; - if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT) - fru_text = gdata->fru_text; - sec_type = (guid_t *)gdata->section_type; - if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) { - struct cper_sec_mem_err *mem = (void *)(gdata + 1); - if (gdata->error_data_length >= sizeof(*mem)) - trace_extlog_mem_event(mem, err_seq, fru_id, fru_text, - (u8)gdata->error_severity); + apei_estatus_for_each_section(tmp, gdata) { + if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID) + fru_id = (guid_t *)gdata->fru_id; + else + fru_id = &guid_null; + if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT) + fru_text = gdata->fru_text; + else + fru_text = ""; + sec_type = (guid_t *)gdata->section_type; + if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) { + struct cper_sec_mem_err *mem = (void *)(gdata + 1); + + if (gdata->error_data_length >= sizeof(*mem)) + trace_extlog_mem_event(mem, err_seq, fru_id, fru_text, + (u8)gdata->error_severity); + } } out: From 1f4ed95ce617fbb4f2e3c3bf54fc3f8c72092fe6 Mon Sep 17 00:00:00 2001 From: Mark Tomlinson Date: Mon, 10 Oct 2022 15:46:13 +1300 Subject: [PATCH 0418/1477] tipc: Fix recognition of trial period [ Upstream commit 28be7ca4fcfd69a2d52aaa331adbf9dbe91f9e6e ] The trial period exists until jiffies is after addr_trial_end. But as jiffies will eventually overflow, just using time_after will eventually give incorrect results. As the node address is set once the trial period ends, this can be used to know that we are not in the trial period. Fixes: e415577f57f4 ("tipc: correct discovery message handling during address trial period") Signed-off-by: Mark Tomlinson Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/tipc/discover.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/discover.c b/net/tipc/discover.c index 14bc20604051..2ae268b67465 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -147,8 +147,8 @@ static bool tipc_disc_addr_trial_msg(struct tipc_discoverer *d, { struct net *net = d->net; struct tipc_net *tn = tipc_net(net); - bool trial = time_before(jiffies, tn->addr_trial_end); u32 self = tipc_own_addr(net); + bool trial = time_before(jiffies, tn->addr_trial_end) && !self; if (mtyp == DSC_TRIAL_FAIL_MSG) { if (!trial) From e558e148938442dd49628cd7ef61c360832bef31 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Wed, 12 Oct 2022 17:25:14 +0200 Subject: [PATCH 0419/1477] tipc: fix an information leak in tipc_topsrv_kern_subscr [ Upstream commit 777ecaabd614d47c482a5c9031579e66da13989a ] Use a 8-byte write to initialize sub.usr_handle in tipc_topsrv_kern_subscr(), otherwise four bytes remain uninitialized when issuing setsockopt(..., SOL_TIPC, ...). This resulted in an infoleak reported by KMSAN when the packet was received: ===================================================== BUG: KMSAN: kernel-infoleak in copyout+0xbc/0x100 lib/iov_iter.c:169 instrument_copy_to_user ./include/linux/instrumented.h:121 copyout+0xbc/0x100 lib/iov_iter.c:169 _copy_to_iter+0x5c0/0x20a0 lib/iov_iter.c:527 copy_to_iter ./include/linux/uio.h:176 simple_copy_to_iter+0x64/0xa0 net/core/datagram.c:513 __skb_datagram_iter+0x123/0xdc0 net/core/datagram.c:419 skb_copy_datagram_iter+0x58/0x200 net/core/datagram.c:527 skb_copy_datagram_msg ./include/linux/skbuff.h:3903 packet_recvmsg+0x521/0x1e70 net/packet/af_packet.c:3469 ____sys_recvmsg+0x2c4/0x810 net/socket.c:? ___sys_recvmsg+0x217/0x840 net/socket.c:2743 __sys_recvmsg net/socket.c:2773 __do_sys_recvmsg net/socket.c:2783 __se_sys_recvmsg net/socket.c:2780 __x64_sys_recvmsg+0x364/0x540 net/socket.c:2780 do_syscall_x64 arch/x86/entry/common.c:50 do_syscall_64+0x3d/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd arch/x86/entry/entry_64.S:120 ... Uninit was stored to memory at: tipc_sub_subscribe+0x42d/0xb50 net/tipc/subscr.c:156 tipc_conn_rcv_sub+0x246/0x620 net/tipc/topsrv.c:375 tipc_topsrv_kern_subscr+0x2e8/0x400 net/tipc/topsrv.c:579 tipc_group_create+0x4e7/0x7d0 net/tipc/group.c:190 tipc_sk_join+0x2a8/0x770 net/tipc/socket.c:3084 tipc_setsockopt+0xae5/0xe40 net/tipc/socket.c:3201 __sys_setsockopt+0x87f/0xdc0 net/socket.c:2252 __do_sys_setsockopt net/socket.c:2263 __se_sys_setsockopt net/socket.c:2260 __x64_sys_setsockopt+0xe0/0x160 net/socket.c:2260 do_syscall_x64 arch/x86/entry/common.c:50 do_syscall_64+0x3d/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd arch/x86/entry/entry_64.S:120 Local variable sub created at: tipc_topsrv_kern_subscr+0x57/0x400 net/tipc/topsrv.c:562 tipc_group_create+0x4e7/0x7d0 net/tipc/group.c:190 Bytes 84-87 of 88 are uninitialized Memory access of size 88 starts at ffff88801ed57cd0 Data copied to user address 0000000020000400 ... ===================================================== Signed-off-by: Alexander Potapenko Fixes: 026321c6d056a5 ("tipc: rename tipc_server to tipc_topsrv") Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/tipc/topsrv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c index 13f3143609f9..d9e2c0fea3f2 100644 --- a/net/tipc/topsrv.c +++ b/net/tipc/topsrv.c @@ -568,7 +568,7 @@ bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower, sub.seq.upper = upper; sub.timeout = TIPC_WAIT_FOREVER; sub.filter = filter; - *(u32 *)&sub.usr_handle = port; + *(u64 *)&sub.usr_handle = (u64)port; con = tipc_conn_alloc(tipc_topsrv(net)); if (IS_ERR(con)) From ed5baf3d0a33caaca4cd4073ebb0854cc77a616d Mon Sep 17 00:00:00 2001 From: Jan Sokolowski Date: Wed, 12 Oct 2022 13:54:40 -0700 Subject: [PATCH 0420/1477] i40e: Fix DMA mappings leak [ Upstream commit aae425efdfd1b1d8452260a3cb49344ebf20b1f5 ] During reallocation of RX buffers, new DMA mappings are created for those buffers. steps for reproduction: while : do for ((i=0; i<=8160; i=i+32)) do ethtool -G enp130s0f0 rx $i tx $i sleep 0.5 ethtool -g enp130s0f0 done done This resulted in crash: i40e 0000:01:00.1: Unable to allocate memory for the Rx descriptor ring, size=65536 Driver BUG WARNING: CPU: 0 PID: 4300 at net/core/xdp.c:141 xdp_rxq_info_unreg+0x43/0x50 Call Trace: i40e_free_rx_resources+0x70/0x80 [i40e] i40e_set_ringparam+0x27c/0x800 [i40e] ethnl_set_rings+0x1b2/0x290 genl_family_rcv_msg_doit.isra.15+0x10f/0x150 genl_family_rcv_msg+0xb3/0x160 ? rings_fill_reply+0x1a0/0x1a0 genl_rcv_msg+0x47/0x90 ? genl_family_rcv_msg+0x160/0x160 netlink_rcv_skb+0x4c/0x120 genl_rcv+0x24/0x40 netlink_unicast+0x196/0x230 netlink_sendmsg+0x204/0x3d0 sock_sendmsg+0x4c/0x50 __sys_sendto+0xee/0x160 ? handle_mm_fault+0xbe/0x1e0 ? syscall_trace_enter+0x1d3/0x2c0 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x5b/0x1a0 entry_SYSCALL_64_after_hwframe+0x65/0xca RIP: 0033:0x7f5eac8b035b Missing register, driver bug WARNING: CPU: 0 PID: 4300 at net/core/xdp.c:119 xdp_rxq_info_unreg_mem_model+0x69/0x140 Call Trace: xdp_rxq_info_unreg+0x1e/0x50 i40e_free_rx_resources+0x70/0x80 [i40e] i40e_set_ringparam+0x27c/0x800 [i40e] ethnl_set_rings+0x1b2/0x290 genl_family_rcv_msg_doit.isra.15+0x10f/0x150 genl_family_rcv_msg+0xb3/0x160 ? rings_fill_reply+0x1a0/0x1a0 genl_rcv_msg+0x47/0x90 ? genl_family_rcv_msg+0x160/0x160 netlink_rcv_skb+0x4c/0x120 genl_rcv+0x24/0x40 netlink_unicast+0x196/0x230 netlink_sendmsg+0x204/0x3d0 sock_sendmsg+0x4c/0x50 __sys_sendto+0xee/0x160 ? handle_mm_fault+0xbe/0x1e0 ? syscall_trace_enter+0x1d3/0x2c0 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x5b/0x1a0 entry_SYSCALL_64_after_hwframe+0x65/0xca RIP: 0033:0x7f5eac8b035b This was caused because of new buffers with different RX ring count should substitute older ones, but those buffers were freed in i40e_configure_rx_ring and reallocated again with i40e_alloc_rx_bi, thus kfree on rx_bi caused leak of already mapped DMA. Fix this by reallocating ZC with rx_bi_zc struct when BPF program loads. Additionally reallocate back to rx_bi when BPF program unloads. If BPF program is loaded/unloaded and XSK pools are created, reallocate RX queues accordingly in XSP_SETUP_XSK_POOL handler. Fixes: be1222b585fd ("i40e: Separate kernel allocated rx_bi rings from AF_XDP rings") Signed-off-by: Jan Sokolowski Signed-off-by: Mateusz Palczewski Signed-off-by: Jacob Keller Tested-by: Chandan (A Contingent Worker at Intel) Tested-by: Gurucharan (A Contingent worker at Intel) Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../net/ethernet/intel/i40e/i40e_ethtool.c | 3 - drivers/net/ethernet/intel/i40e/i40e_main.c | 16 +++-- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 13 ++-- drivers/net/ethernet/intel/i40e/i40e_txrx.h | 1 - drivers/net/ethernet/intel/i40e/i40e_xsk.c | 67 ++++++++++++++++--- drivers/net/ethernet/intel/i40e/i40e_xsk.h | 2 +- 6 files changed, 74 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 63054061966e..cc5f5c237774 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -2081,9 +2081,6 @@ static int i40e_set_ringparam(struct net_device *netdev, */ rx_rings[i].tail = hw->hw_addr + I40E_PRTGEN_STATUS; err = i40e_setup_rx_descriptors(&rx_rings[i]); - if (err) - goto rx_unwind; - err = i40e_alloc_rx_bi(&rx_rings[i]); if (err) goto rx_unwind; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index c7f243ddbcf7..ea6a984c6d12 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -3409,12 +3409,8 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) if (ring->vsi->type == I40E_VSI_MAIN) xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq); - kfree(ring->rx_bi); ring->xsk_pool = i40e_xsk_pool(ring); if (ring->xsk_pool) { - ret = i40e_alloc_rx_bi_zc(ring); - if (ret) - return ret; ring->rx_buf_len = xsk_pool_get_rx_frame_size(ring->xsk_pool); /* For AF_XDP ZC, we disallow packets to span on @@ -3432,9 +3428,6 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) ring->queue_index); } else { - ret = i40e_alloc_rx_bi(ring); - if (ret) - return ret; ring->rx_buf_len = vsi->rx_buf_len; if (ring->vsi->type == I40E_VSI_MAIN) { ret = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, @@ -12684,6 +12677,14 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, i40e_reset_and_rebuild(pf, true, true); } + if (!i40e_enabled_xdp_vsi(vsi) && prog) { + if (i40e_realloc_rx_bi_zc(vsi, true)) + return -ENOMEM; + } else if (i40e_enabled_xdp_vsi(vsi) && !prog) { + if (i40e_realloc_rx_bi_zc(vsi, false)) + return -ENOMEM; + } + for (i = 0; i < vsi->num_queue_pairs; i++) WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog); @@ -12916,6 +12917,7 @@ int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair) i40e_queue_pair_disable_irq(vsi, queue_pair); err = i40e_queue_pair_toggle_rings(vsi, queue_pair, false /* off */); + i40e_clean_rx_ring(vsi->rx_rings[queue_pair]); i40e_queue_pair_toggle_napi(vsi, queue_pair, false /* off */); i40e_queue_pair_clean_rings(vsi, queue_pair); i40e_queue_pair_reset_stats(vsi, queue_pair); diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 5ad28129fab2..43be33d87e39 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -1305,14 +1305,6 @@ err: return -ENOMEM; } -int i40e_alloc_rx_bi(struct i40e_ring *rx_ring) -{ - unsigned long sz = sizeof(*rx_ring->rx_bi) * rx_ring->count; - - rx_ring->rx_bi = kzalloc(sz, GFP_KERNEL); - return rx_ring->rx_bi ? 0 : -ENOMEM; -} - static void i40e_clear_rx_bi(struct i40e_ring *rx_ring) { memset(rx_ring->rx_bi, 0, sizeof(*rx_ring->rx_bi) * rx_ring->count); @@ -1443,6 +1435,11 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring) rx_ring->xdp_prog = rx_ring->vsi->xdp_prog; + rx_ring->rx_bi = + kcalloc(rx_ring->count, sizeof(*rx_ring->rx_bi), GFP_KERNEL); + if (!rx_ring->rx_bi) + return -ENOMEM; + return 0; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index 93ac201f68b8..af843e8169f7 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -465,7 +465,6 @@ int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size); bool __i40e_chk_linearize(struct sk_buff *skb); int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, u32 flags); -int i40e_alloc_rx_bi(struct i40e_ring *rx_ring); /** * i40e_get_head - Retrieve head from head writeback diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index 75e4a698c3db..7f1226123629 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -9,14 +9,6 @@ #include "i40e_txrx_common.h" #include "i40e_xsk.h" -int i40e_alloc_rx_bi_zc(struct i40e_ring *rx_ring) -{ - unsigned long sz = sizeof(*rx_ring->rx_bi_zc) * rx_ring->count; - - rx_ring->rx_bi_zc = kzalloc(sz, GFP_KERNEL); - return rx_ring->rx_bi_zc ? 0 : -ENOMEM; -} - void i40e_clear_rx_bi_zc(struct i40e_ring *rx_ring) { memset(rx_ring->rx_bi_zc, 0, @@ -28,6 +20,58 @@ static struct xdp_buff **i40e_rx_bi(struct i40e_ring *rx_ring, u32 idx) return &rx_ring->rx_bi_zc[idx]; } +/** + * i40e_realloc_rx_xdp_bi - reallocate SW ring for either XSK or normal buffer + * @rx_ring: Current rx ring + * @pool_present: is pool for XSK present + * + * Try allocating memory and return ENOMEM, if failed to allocate. + * If allocation was successful, substitute buffer with allocated one. + * Returns 0 on success, negative on failure + */ +static int i40e_realloc_rx_xdp_bi(struct i40e_ring *rx_ring, bool pool_present) +{ + size_t elem_size = pool_present ? sizeof(*rx_ring->rx_bi_zc) : + sizeof(*rx_ring->rx_bi); + void *sw_ring = kcalloc(rx_ring->count, elem_size, GFP_KERNEL); + + if (!sw_ring) + return -ENOMEM; + + if (pool_present) { + kfree(rx_ring->rx_bi); + rx_ring->rx_bi = NULL; + rx_ring->rx_bi_zc = sw_ring; + } else { + kfree(rx_ring->rx_bi_zc); + rx_ring->rx_bi_zc = NULL; + rx_ring->rx_bi = sw_ring; + } + return 0; +} + +/** + * i40e_realloc_rx_bi_zc - reallocate rx SW rings + * @vsi: Current VSI + * @zc: is zero copy set + * + * Reallocate buffer for rx_rings that might be used by XSK. + * XDP requires more memory, than rx_buf provides. + * Returns 0 on success, negative on failure + */ +int i40e_realloc_rx_bi_zc(struct i40e_vsi *vsi, bool zc) +{ + struct i40e_ring *rx_ring; + unsigned long q; + + for_each_set_bit(q, vsi->af_xdp_zc_qps, vsi->alloc_queue_pairs) { + rx_ring = vsi->rx_rings[q]; + if (i40e_realloc_rx_xdp_bi(rx_ring, zc)) + return -ENOMEM; + } + return 0; +} + /** * i40e_xsk_pool_enable - Enable/associate an AF_XDP buffer pool to a * certain ring/qid @@ -68,6 +112,10 @@ static int i40e_xsk_pool_enable(struct i40e_vsi *vsi, if (err) return err; + err = i40e_realloc_rx_xdp_bi(vsi->rx_rings[qid], true); + if (err) + return err; + err = i40e_queue_pair_enable(vsi, qid); if (err) return err; @@ -112,6 +160,9 @@ static int i40e_xsk_pool_disable(struct i40e_vsi *vsi, u16 qid) xsk_pool_dma_unmap(pool, I40E_RX_DMA_ATTR); if (if_running) { + err = i40e_realloc_rx_xdp_bi(vsi->rx_rings[qid], false); + if (err) + return err; err = i40e_queue_pair_enable(vsi, qid); if (err) return err; diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.h b/drivers/net/ethernet/intel/i40e/i40e_xsk.h index 7adfd8539247..36f5b6d20601 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.h +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.h @@ -17,7 +17,7 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget); bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring); int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags); -int i40e_alloc_rx_bi_zc(struct i40e_ring *rx_ring); +int i40e_realloc_rx_bi_zc(struct i40e_vsi *vsi, bool zc); void i40e_clear_rx_bi_zc(struct i40e_ring *rx_ring); #endif /* _I40E_XSK_H_ */ From 39d10f0dfb7201cbd722706e0c428e4f7d89281f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Exp=C3=B3sito?= Date: Sun, 9 Oct 2022 20:27:47 +0200 Subject: [PATCH 0421/1477] HID: magicmouse: Do not set BTN_MOUSE on double report MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit bb5f0c855dcfc893ae5ed90e4c646bde9e4498bf ] Under certain conditions the Magic Trackpad can group 2 reports in a single packet. The packet is split and the raw event function is invoked recursively for each part. However, after processing each part, the BTN_MOUSE status is updated, sending multiple click events. [1] Return after processing double reports to avoid this issue. Link: https://gitlab.freedesktop.org/libinput/libinput/-/issues/811 # [1] Fixes: a462230e16ac ("HID: magicmouse: enable Magic Trackpad support") Reported-by: Nulo Signed-off-by: José Expósito Signed-off-by: Benjamin Tissoires Link: https://lore.kernel.org/r/20221009182747.90730-1-jose.exposito89@gmail.com Signed-off-by: Sasha Levin --- drivers/hid/hid-magicmouse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c index fc4c07459753..28158d2f2352 100644 --- a/drivers/hid/hid-magicmouse.c +++ b/drivers/hid/hid-magicmouse.c @@ -387,7 +387,7 @@ static int magicmouse_raw_event(struct hid_device *hdev, magicmouse_raw_event(hdev, report, data + 2, data[1]); magicmouse_raw_event(hdev, report, data + 2 + data[1], size - 2 - data[1]); - break; + return 0; default: return 0; } From c8310a99e7e4ad658aa5160fe817533d0d7069fe Mon Sep 17 00:00:00 2001 From: Jonathan Cooper Date: Thu, 13 Oct 2022 10:55:53 +0100 Subject: [PATCH 0422/1477] sfc: Change VF mac via PF as first preference if available. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a8aed7b35becfd21f22a77c7014029ea837b018f ] Changing a VF's mac address through the VF (rather than via the PF) fails with EPERM because the latter part of efx_ef10_set_mac_address attempts to change the vport mac address list as the VF. Even with this fixed it still fails with EBUSY because the vadaptor is still assigned on the VF - the vadaptor reassignment must be within a section where the VF has torn down its state. A major reason this has broken is because we have two functions that ostensibly do the same thing - have a PF and VF cooperate to change a VF mac address. Rather than do this, if we are changing the mac of a VF that has a link to the PF in the same VM then simply call sriov_set_vf_mac instead, which is a proven working function that does that. If there is no PF available, or that fails non-fatally, then attempt to change the VF's mac address as we would a PF, without updating the PF's data. Test case: Create a VF: echo 1 > /sys/class/net//device/sriov_numvfs Set the mac address of the VF directly: ip link set addr 00:11:22:33:44:55 Set the MAC address of the VF via the PF: ip link set vf 0 mac 00:11:22:33:44:66 Without this patch the last command will fail with ENOENT. Signed-off-by: Jonathan Cooper Reported-by: Íñigo Huguet Fixes: 910c8789a777 ("set the MAC address using MC_CMD_VADAPTOR_SET_MAC") Acked-by: Edward Cree Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/sfc/ef10.c | 58 ++++++++++++++------------------- 1 file changed, 24 insertions(+), 34 deletions(-) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 5b7413305be6..eb1be7302082 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -3255,6 +3255,30 @@ static int efx_ef10_set_mac_address(struct efx_nic *efx) bool was_enabled = efx->port_enabled; int rc; +#ifdef CONFIG_SFC_SRIOV + /* If this function is a VF and we have access to the parent PF, + * then use the PF control path to attempt to change the VF MAC address. + */ + if (efx->pci_dev->is_virtfn && efx->pci_dev->physfn) { + struct efx_nic *efx_pf = pci_get_drvdata(efx->pci_dev->physfn); + struct efx_ef10_nic_data *nic_data = efx->nic_data; + u8 mac[ETH_ALEN]; + + /* net_dev->dev_addr can be zeroed by efx_net_stop in + * efx_ef10_sriov_set_vf_mac, so pass in a copy. + */ + ether_addr_copy(mac, efx->net_dev->dev_addr); + + rc = efx_ef10_sriov_set_vf_mac(efx_pf, nic_data->vf_index, mac); + if (!rc) + return 0; + + netif_dbg(efx, drv, efx->net_dev, + "Updating VF mac via PF failed (%d), setting directly\n", + rc); + } +#endif + efx_device_detach_sync(efx); efx_net_stop(efx->net_dev); @@ -3277,40 +3301,6 @@ static int efx_ef10_set_mac_address(struct efx_nic *efx) efx_net_open(efx->net_dev); efx_device_attach_if_not_resetting(efx); -#ifdef CONFIG_SFC_SRIOV - if (efx->pci_dev->is_virtfn && efx->pci_dev->physfn) { - struct efx_ef10_nic_data *nic_data = efx->nic_data; - struct pci_dev *pci_dev_pf = efx->pci_dev->physfn; - - if (rc == -EPERM) { - struct efx_nic *efx_pf; - - /* Switch to PF and change MAC address on vport */ - efx_pf = pci_get_drvdata(pci_dev_pf); - - rc = efx_ef10_sriov_set_vf_mac(efx_pf, - nic_data->vf_index, - efx->net_dev->dev_addr); - } else if (!rc) { - struct efx_nic *efx_pf = pci_get_drvdata(pci_dev_pf); - struct efx_ef10_nic_data *nic_data = efx_pf->nic_data; - unsigned int i; - - /* MAC address successfully changed by VF (with MAC - * spoofing) so update the parent PF if possible. - */ - for (i = 0; i < efx_pf->vf_count; ++i) { - struct ef10_vf *vf = nic_data->vf + i; - - if (vf->efx == efx) { - ether_addr_copy(vf->mac, - efx->net_dev->dev_addr); - return 0; - } - } - } - } else -#endif if (rc == -EPERM) { netif_err(efx, drv, efx->net_dev, "Cannot change MAC address; use sfboot to enable" From 118f412bedc5843032d65f41eb4e635a830c2fa8 Mon Sep 17 00:00:00 2001 From: Xiaobo Liu Date: Fri, 14 Oct 2022 10:05:40 +0800 Subject: [PATCH 0423/1477] net/atm: fix proc_mpc_write incorrect return value [ Upstream commit d8bde3bf7f82dac5fc68a62c2816793a12cafa2a ] Then the input contains '\0' or '\n', proc_mpc_write has read them, so the return value needs +1. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Xiaobo Liu Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/atm/mpoa_proc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/atm/mpoa_proc.c b/net/atm/mpoa_proc.c index 829db9eba0cb..aaf64b953915 100644 --- a/net/atm/mpoa_proc.c +++ b/net/atm/mpoa_proc.c @@ -219,11 +219,12 @@ static ssize_t proc_mpc_write(struct file *file, const char __user *buff, if (!page) return -ENOMEM; - for (p = page, len = 0; len < nbytes; p++, len++) { + for (p = page, len = 0; len < nbytes; p++) { if (get_user(*p, buff++)) { free_page((unsigned long)page); return -EFAULT; } + len += 1; if (*p == '\0' || *p == '\n') break; } From 05cc22c0085e18627df110facd731359b83be7ef Mon Sep 17 00:00:00 2001 From: Harini Katakam Date: Fri, 14 Oct 2022 12:17:35 +0530 Subject: [PATCH 0424/1477] net: phy: dp83867: Extend RX strap quirk for SGMII mode [ Upstream commit 0c9efbd5c50c64ead434960a404c9c9a097b0403 ] When RX strap in HW is not set to MODE 3 or 4, bit 7 and 8 in CF4 register should be set. The former is already handled in dp83867_config_init; add the latter in SGMII specific initialization. Fixes: 2a10154abcb7 ("net: phy: dp83867: Add TI dp83867 phy") Signed-off-by: Harini Katakam Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/phy/dp83867.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c index f86acad0aad4..c8031e297faf 100644 --- a/drivers/net/phy/dp83867.c +++ b/drivers/net/phy/dp83867.c @@ -757,6 +757,14 @@ static int dp83867_config_init(struct phy_device *phydev) else val &= ~DP83867_SGMII_TYPE; phy_write_mmd(phydev, DP83867_DEVADDR, DP83867_SGMIICTL, val); + + /* This is a SW workaround for link instability if RX_CTRL is + * not strapped to mode 3 or 4 in HW. This is required for SGMII + * in addition to clearing bit 7, handled above. + */ + if (dp83867->rxctrl_strap_quirk) + phy_set_bits_mmd(phydev, DP83867_DEVADDR, DP83867_CFG4, + BIT(8)); } val = phy_read(phydev, DP83867_CFG3); From bf49d4fe4ab7b8d812927a2c7b514864d5fc1bb2 Mon Sep 17 00:00:00 2001 From: Zhang Xiaoxu Date: Mon, 17 Oct 2022 22:45:22 +0800 Subject: [PATCH 0425/1477] cifs: Fix xid leak in cifs_copy_file_range() [ Upstream commit 9a97df404a402fe1174d2d1119f87ff2a0ca2fe9 ] If the file is used by swap, before return -EOPNOTSUPP, should free the xid, otherwise, the xid will be leaked. Fixes: 4e8aea30f775 ("smb3: enable swap on SMB3 mounts") Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Zhang Xiaoxu Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/cifs/cifsfs.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index bc957e6ca48b..f442ef8b65da 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -1221,8 +1221,11 @@ static ssize_t cifs_copy_file_range(struct file *src_file, loff_t off, ssize_t rc; struct cifsFileInfo *cfile = dst_file->private_data; - if (cfile->swapfile) - return -EOPNOTSUPP; + if (cfile->swapfile) { + rc = -EOPNOTSUPP; + free_xid(xid); + return rc; + } rc = cifs_file_copychunk_range(xid, src_file, off, dst_file, destoff, len, flags); From 2d08311aa3056a84bfdc09bc47777de6a9b16c30 Mon Sep 17 00:00:00 2001 From: Zhang Xiaoxu Date: Mon, 17 Oct 2022 22:45:23 +0800 Subject: [PATCH 0426/1477] cifs: Fix xid leak in cifs_flock() [ Upstream commit 575e079c782b9862ec2626403922d041a42e6ed6 ] If not flock, before return -ENOLCK, should free the xid, otherwise, the xid will be leaked. Fixes: d0677992d2af ("cifs: add support for flock") Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Zhang Xiaoxu Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/cifs/file.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index a648146e49cf..144064dc0d38 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1735,11 +1735,13 @@ int cifs_flock(struct file *file, int cmd, struct file_lock *fl) struct cifsFileInfo *cfile; __u32 type; - rc = -EACCES; xid = get_xid(); - if (!(fl->fl_flags & FL_FLOCK)) - return -ENOLCK; + if (!(fl->fl_flags & FL_FLOCK)) { + rc = -ENOLCK; + free_xid(xid); + return rc; + } cfile = (struct cifsFileInfo *)file->private_data; tcon = tlink_tcon(cfile->tlink); @@ -1758,8 +1760,9 @@ int cifs_flock(struct file *file, int cmd, struct file_lock *fl) * if no lock or unlock then nothing to do since we do not * know what it is */ + rc = -EOPNOTSUPP; free_xid(xid); - return -EOPNOTSUPP; + return rc; } rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock, From 7286f875510486fdc2fc426b7c826262e2283a65 Mon Sep 17 00:00:00 2001 From: Zhang Xiaoxu Date: Mon, 17 Oct 2022 22:45:24 +0800 Subject: [PATCH 0427/1477] cifs: Fix xid leak in cifs_ses_add_channel() [ Upstream commit e909d054bdea75ef1ec48c18c5936affdaecbb2c ] Before return, should free the xid, otherwise, the xid will be leaked. Fixes: d70e9fa55884 ("cifs: try opening channels after mounting") Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Zhang Xiaoxu Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/cifs/sess.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index d58c5ffeca0d..cf6fd138d8d5 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -306,6 +306,7 @@ out: cifs_put_tcp_session(chan->server, 0); unload_nls(vol.local_nls); + free_xid(xid); return rc; } From ff7ba766758313129794f150bbc4d351b5e17a53 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 17 Oct 2022 16:59:28 +0000 Subject: [PATCH 0428/1477] net: hsr: avoid possible NULL deref in skb_clone() [ Upstream commit d8b57135fd9ffe9a5b445350a686442a531c5339 ] syzbot got a crash [1] in skb_clone(), caused by a bug in hsr_get_untagged_frame(). When/if create_stripped_skb_hsr() returns NULL, we must not attempt to call skb_clone(). While we are at it, replace a WARN_ONCE() by netdev_warn_once(). [1] general protection fault, probably for non-canonical address 0xdffffc000000000f: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000078-0x000000000000007f] CPU: 1 PID: 754 Comm: syz-executor.0 Not tainted 6.0.0-syzkaller-02734-g0326074ff465 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/22/2022 RIP: 0010:skb_clone+0x108/0x3c0 net/core/skbuff.c:1641 Code: 93 02 00 00 49 83 7c 24 28 00 0f 85 e9 00 00 00 e8 5d 4a 29 fa 4c 8d 75 7e 48 b8 00 00 00 00 00 fc ff df 4c 89 f2 48 c1 ea 03 <0f> b6 04 02 4c 89 f2 83 e2 07 38 d0 7f 08 84 c0 0f 85 9e 01 00 00 RSP: 0018:ffffc90003ccf4e0 EFLAGS: 00010207 RAX: dffffc0000000000 RBX: ffffc90003ccf5f8 RCX: ffffc9000c24b000 RDX: 000000000000000f RSI: ffffffff8751cb13 RDI: 0000000000000000 RBP: 0000000000000000 R08: 00000000000000f0 R09: 0000000000000140 R10: fffffbfff181d972 R11: 0000000000000000 R12: ffff888161fc3640 R13: 0000000000000a20 R14: 000000000000007e R15: ffffffff8dc5f620 FS: 00007feb621e4700(0000) GS:ffff8880b9b00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007feb621e3ff8 CR3: 00000001643a9000 CR4: 00000000003506e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: hsr_get_untagged_frame+0x4e/0x610 net/hsr/hsr_forward.c:164 hsr_forward_do net/hsr/hsr_forward.c:461 [inline] hsr_forward_skb+0xcca/0x1d50 net/hsr/hsr_forward.c:623 hsr_handle_frame+0x588/0x7c0 net/hsr/hsr_slave.c:69 __netif_receive_skb_core+0x9fe/0x38f0 net/core/dev.c:5379 __netif_receive_skb_one_core+0xae/0x180 net/core/dev.c:5483 __netif_receive_skb+0x1f/0x1c0 net/core/dev.c:5599 netif_receive_skb_internal net/core/dev.c:5685 [inline] netif_receive_skb+0x12f/0x8d0 net/core/dev.c:5744 tun_rx_batched+0x4ab/0x7a0 drivers/net/tun.c:1544 tun_get_user+0x2686/0x3a00 drivers/net/tun.c:1995 tun_chr_write_iter+0xdb/0x200 drivers/net/tun.c:2025 call_write_iter include/linux/fs.h:2187 [inline] new_sync_write fs/read_write.c:491 [inline] vfs_write+0x9e9/0xdd0 fs/read_write.c:584 ksys_write+0x127/0x250 fs/read_write.c:637 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Fixes: f266a683a480 ("net/hsr: Better frame dispatch") Reported-by: syzbot Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20221017165928.2150130-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/hsr/hsr_forward.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index baf4765be6d7..908324b46328 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -108,15 +108,15 @@ struct sk_buff *hsr_get_untagged_frame(struct hsr_frame_info *frame, struct hsr_port *port) { if (!frame->skb_std) { - if (frame->skb_hsr) { + if (frame->skb_hsr) frame->skb_std = create_stripped_skb_hsr(frame->skb_hsr, frame); - } else { - /* Unexpected */ - WARN_ONCE(1, "%s:%d: Unexpected frame received (port_src %s)\n", - __FILE__, __LINE__, port->dev->name); + else + netdev_warn_once(port->dev, + "Unexpected frame received in hsr_get_untagged_frame()\n"); + + if (!frame->skb_std) return NULL; - } } return skb_clone(frame->skb_std, GFP_ATOMIC); From 191d71c6357ef6e64b07e37b1e28c44480526d5a Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Mon, 17 Oct 2022 16:31:23 -0700 Subject: [PATCH 0429/1477] ionic: catch NULL pointer issue on reconfig [ Upstream commit aa1d7e1267c12e07d979aa34c613716a89029db2 ] It's possible that the driver will dereference a qcq that doesn't exist when calling ionic_reconfigure_queues(), which causes a page fault BUG. If a reduction in the number of queues is followed by a different reconfig such as changing the ring size, the driver can hit a NULL pointer when trying to clean up non-existent queues. Fix this by checking to make sure both the qcqs array and qcq entry exists bofore trying to use and free the entry. Fixes: 101b40a0171f ("ionic: change queue count with no reset") Signed-off-by: Brett Creeley Signed-off-by: Shannon Nelson Link: https://lore.kernel.org/r/20221017233123.15869-1-snelson@pensando.io Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index e42520f909fe..cb12d0171517 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -2383,11 +2383,15 @@ err_out: * than the full array, but leave the qcq shells in place */ for (i = lif->nxqs; i < lif->ionic->ntxqs_per_lif; i++) { - lif->txqcqs[i]->flags &= ~IONIC_QCQ_F_INTR; - ionic_qcq_free(lif, lif->txqcqs[i]); + if (lif->txqcqs && lif->txqcqs[i]) { + lif->txqcqs[i]->flags &= ~IONIC_QCQ_F_INTR; + ionic_qcq_free(lif, lif->txqcqs[i]); + } - lif->rxqcqs[i]->flags &= ~IONIC_QCQ_F_INTR; - ionic_qcq_free(lif, lif->rxqcqs[i]); + if (lif->rxqcqs && lif->rxqcqs[i]) { + lif->rxqcqs[i]->flags &= ~IONIC_QCQ_F_INTR; + ionic_qcq_free(lif, lif->rxqcqs[i]); + } } return err; From bc17f727b005921fc3b8651daefa053200c57cf0 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 19 Jan 2021 07:43:18 +0100 Subject: [PATCH 0430/1477] nvme-hwmon: rework to avoid devm allocation [ Upstream commit ed7770f6628691c13c9423bce7eee7cff2399c12 ] The original design to use device-managed resource allocation doesn't really work as the NVMe controller has a vastly different lifetime than the hwmon sysfs attributes, causing warning about duplicate sysfs entries upon reconnection. This patch reworks the hwmon allocation to avoid device-managed resource allocation, and uses the NVMe controller as parent for the sysfs attributes. Cc: Guenter Roeck Signed-off-by: Hannes Reinecke Tested-by: Enzo Matsumiya Tested-by: Daniel Wagner Signed-off-by: Christoph Hellwig Stable-dep-of: c94b7f9bab22 ("nvme-hwmon: kmalloc the NVME SMART log buffer") Signed-off-by: Sasha Levin --- drivers/nvme/host/core.c | 1 + drivers/nvme/host/hwmon.c | 31 +++++++++++++++++++++---------- drivers/nvme/host/nvme.h | 8 ++++++++ 3 files changed, 30 insertions(+), 10 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index e9c13804760e..51e5c12988fe 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4485,6 +4485,7 @@ EXPORT_SYMBOL_GPL(nvme_start_ctrl); void nvme_uninit_ctrl(struct nvme_ctrl *ctrl) { + nvme_hwmon_exit(ctrl); nvme_fault_inject_fini(&ctrl->fault_inject); dev_pm_qos_hide_latency_tolerance(ctrl->device); cdev_device_del(&ctrl->cdev, ctrl->device); diff --git a/drivers/nvme/host/hwmon.c b/drivers/nvme/host/hwmon.c index 552dbc04567b..8f9e96986780 100644 --- a/drivers/nvme/host/hwmon.c +++ b/drivers/nvme/host/hwmon.c @@ -223,12 +223,12 @@ static const struct hwmon_chip_info nvme_hwmon_chip_info = { int nvme_hwmon_init(struct nvme_ctrl *ctrl) { - struct device *dev = ctrl->dev; + struct device *dev = ctrl->device; struct nvme_hwmon_data *data; struct device *hwmon; int err; - data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); + data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) return 0; @@ -237,19 +237,30 @@ int nvme_hwmon_init(struct nvme_ctrl *ctrl) err = nvme_hwmon_get_smart_log(data); if (err) { - dev_warn(ctrl->device, - "Failed to read smart log (error %d)\n", err); - devm_kfree(dev, data); + dev_warn(dev, "Failed to read smart log (error %d)\n", err); + kfree(data); return err; } - hwmon = devm_hwmon_device_register_with_info(dev, "nvme", data, - &nvme_hwmon_chip_info, - NULL); + hwmon = hwmon_device_register_with_info(dev, "nvme", + data, &nvme_hwmon_chip_info, + NULL); if (IS_ERR(hwmon)) { dev_warn(dev, "Failed to instantiate hwmon device\n"); - devm_kfree(dev, data); + kfree(data); } - + ctrl->hwmon_device = hwmon; return 0; } + +void nvme_hwmon_exit(struct nvme_ctrl *ctrl) +{ + if (ctrl->hwmon_device) { + struct nvme_hwmon_data *data = + dev_get_drvdata(ctrl->hwmon_device); + + hwmon_device_unregister(ctrl->hwmon_device); + ctrl->hwmon_device = NULL; + kfree(data); + } +} diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 58cf9e39d613..abae7ef2ac51 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -257,6 +257,9 @@ struct nvme_ctrl { struct rw_semaphore namespaces_rwsem; struct device ctrl_device; struct device *device; /* char device */ +#ifdef CONFIG_NVME_HWMON + struct device *hwmon_device; +#endif struct cdev cdev; struct work_struct reset_work; struct work_struct delete_work; @@ -876,11 +879,16 @@ static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev) #ifdef CONFIG_NVME_HWMON int nvme_hwmon_init(struct nvme_ctrl *ctrl); +void nvme_hwmon_exit(struct nvme_ctrl *ctrl); #else static inline int nvme_hwmon_init(struct nvme_ctrl *ctrl) { return 0; } + +static inline void nvme_hwmon_exit(struct nvme_ctrl *ctrl) +{ +} #endif u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, From 67106ac27243313beb58097449555f92ea5947ec Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Fri, 12 Feb 2021 10:30:15 +0100 Subject: [PATCH 0431/1477] nvme-hwmon: Return error code when registration fails [ Upstream commit 78570f8873c8cd44c12714c7fa7db2601ec5617d ] The hwmon pointer wont be NULL if the registration fails. Though the exit code path will assign it to ctrl->hwmon_device. Later nvme_hwmon_exit() will try to free the invalid pointer. Avoid this by returning the error code from hwmon_device_register_with_info(). Fixes: ed7770f66286 ("nvme/hwmon: rework to avoid devm allocation") Signed-off-by: Daniel Wagner Signed-off-by: Christoph Hellwig Stable-dep-of: c94b7f9bab22 ("nvme-hwmon: kmalloc the NVME SMART log buffer") Signed-off-by: Sasha Levin --- drivers/nvme/host/hwmon.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/hwmon.c b/drivers/nvme/host/hwmon.c index 8f9e96986780..0a586d712920 100644 --- a/drivers/nvme/host/hwmon.c +++ b/drivers/nvme/host/hwmon.c @@ -248,6 +248,7 @@ int nvme_hwmon_init(struct nvme_ctrl *ctrl) if (IS_ERR(hwmon)) { dev_warn(dev, "Failed to instantiate hwmon device\n"); kfree(data); + return PTR_ERR(hwmon); } ctrl->hwmon_device = hwmon; return 0; From 770b7e3a2c1f32e82c5c9143d0a3670e9ea95a5f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 18 Oct 2022 16:55:55 +0200 Subject: [PATCH 0432/1477] nvme-hwmon: consistently ignore errors from nvme_hwmon_init [ Upstream commit 6b8cf94005187952f794c0c4ed3920a1e8accfa3 ] An NVMe controller works perfectly fine even when the hwmon initialization fails. Stop returning errors that do not come from a controller reset from nvme_hwmon_init to handle this case consistently. Signed-off-by: Christoph Hellwig Reviewed-by: Guenter Roeck Reviewed-by: Serge Semin Stable-dep-of: c94b7f9bab22 ("nvme-hwmon: kmalloc the NVME SMART log buffer") Signed-off-by: Sasha Levin --- drivers/nvme/host/core.c | 6 +++++- drivers/nvme/host/hwmon.c | 13 ++++++++----- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 51e5c12988fe..3f106771d15b 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3232,8 +3232,12 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) return ret; if (!ctrl->identified && !nvme_discovery_ctrl(ctrl)) { + /* + * Do not return errors unless we are in a controller reset, + * the controller works perfectly fine without hwmon. + */ ret = nvme_hwmon_init(ctrl); - if (ret < 0) + if (ret == -EINTR) return ret; } diff --git a/drivers/nvme/host/hwmon.c b/drivers/nvme/host/hwmon.c index 0a586d712920..23918bb7bdca 100644 --- a/drivers/nvme/host/hwmon.c +++ b/drivers/nvme/host/hwmon.c @@ -230,7 +230,7 @@ int nvme_hwmon_init(struct nvme_ctrl *ctrl) data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) - return 0; + return -ENOMEM; data->ctrl = ctrl; mutex_init(&data->read_lock); @@ -238,8 +238,7 @@ int nvme_hwmon_init(struct nvme_ctrl *ctrl) err = nvme_hwmon_get_smart_log(data); if (err) { dev_warn(dev, "Failed to read smart log (error %d)\n", err); - kfree(data); - return err; + goto err_free_data; } hwmon = hwmon_device_register_with_info(dev, "nvme", @@ -247,11 +246,15 @@ int nvme_hwmon_init(struct nvme_ctrl *ctrl) NULL); if (IS_ERR(hwmon)) { dev_warn(dev, "Failed to instantiate hwmon device\n"); - kfree(data); - return PTR_ERR(hwmon); + err = PTR_ERR(hwmon); + goto err_free_data; } ctrl->hwmon_device = hwmon; return 0; + +err_free_data: + kfree(data); + return err; } void nvme_hwmon_exit(struct nvme_ctrl *ctrl) From 2008ad08a2aee0aa8d6201c509bf6546f0df1183 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Tue, 18 Oct 2022 17:33:52 +0200 Subject: [PATCH 0433/1477] nvme-hwmon: kmalloc the NVME SMART log buffer [ Upstream commit c94b7f9bab22ac504f9153767676e659988575ad ] Recent commit 52fde2c07da6 ("nvme: set dma alignment to dword") has caused a regression on our platform. It turned out that the nvme_get_log() method invocation caused the nvme_hwmon_data structure instance corruption. In particular the nvme_hwmon_data.ctrl pointer was overwritten either with zeros or with garbage. After some research we discovered that the problem happened even before the actual NVME DMA execution, but during the buffer mapping. Since our platform is DMA-noncoherent, the mapping implied the cache-line invalidations or write-backs depending on the DMA-direction parameter. In case of the NVME SMART log getting the DMA was performed from-device-to-memory, thus the cache-invalidation was activated during the buffer mapping. Since the log-buffer isn't cache-line aligned, the cache-invalidation caused the neighbour data to be discarded. The neighbouring data turned to be the data surrounding the buffer in the framework of the nvme_hwmon_data structure. In order to fix that we need to make sure that the whole log-buffer is defined within the cache-line-aligned memory region so the cache-invalidation procedure wouldn't involve the adjacent data. One of the option to guarantee that is to kmalloc the DMA-buffer [1]. Seeing the rest of the NVME core driver prefer that method it has been chosen to fix this problem too. Note after a deeper researches we found out that the denoted commit wasn't a root cause of the problem. It just revealed the invalidity by activating the DMA-based NVME SMART log getting performed in the framework of the NVME hwmon driver. The problem was here since the initial commit of the driver. [1] Documentation/core-api/dma-api-howto.rst Fixes: 400b6a7b13a3 ("nvme: Add hardware monitoring support") Signed-off-by: Serge Semin Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/host/hwmon.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/host/hwmon.c b/drivers/nvme/host/hwmon.c index 23918bb7bdca..9e6e56c20ec9 100644 --- a/drivers/nvme/host/hwmon.c +++ b/drivers/nvme/host/hwmon.c @@ -12,7 +12,7 @@ struct nvme_hwmon_data { struct nvme_ctrl *ctrl; - struct nvme_smart_log log; + struct nvme_smart_log *log; struct mutex read_lock; }; @@ -60,14 +60,14 @@ static int nvme_set_temp_thresh(struct nvme_ctrl *ctrl, int sensor, bool under, static int nvme_hwmon_get_smart_log(struct nvme_hwmon_data *data) { return nvme_get_log(data->ctrl, NVME_NSID_ALL, NVME_LOG_SMART, 0, - NVME_CSI_NVM, &data->log, sizeof(data->log), 0); + NVME_CSI_NVM, data->log, sizeof(*data->log), 0); } static int nvme_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel, long *val) { struct nvme_hwmon_data *data = dev_get_drvdata(dev); - struct nvme_smart_log *log = &data->log; + struct nvme_smart_log *log = data->log; int temp; int err; @@ -163,7 +163,7 @@ static umode_t nvme_hwmon_is_visible(const void *_data, case hwmon_temp_max: case hwmon_temp_min: if ((!channel && data->ctrl->wctemp) || - (channel && data->log.temp_sensor[channel - 1])) { + (channel && data->log->temp_sensor[channel - 1])) { if (data->ctrl->quirks & NVME_QUIRK_NO_TEMP_THRESH_CHANGE) return 0444; @@ -176,7 +176,7 @@ static umode_t nvme_hwmon_is_visible(const void *_data, break; case hwmon_temp_input: case hwmon_temp_label: - if (!channel || data->log.temp_sensor[channel - 1]) + if (!channel || data->log->temp_sensor[channel - 1]) return 0444; break; default: @@ -232,13 +232,19 @@ int nvme_hwmon_init(struct nvme_ctrl *ctrl) if (!data) return -ENOMEM; + data->log = kzalloc(sizeof(*data->log), GFP_KERNEL); + if (!data->log) { + err = -ENOMEM; + goto err_free_data; + } + data->ctrl = ctrl; mutex_init(&data->read_lock); err = nvme_hwmon_get_smart_log(data); if (err) { dev_warn(dev, "Failed to read smart log (error %d)\n", err); - goto err_free_data; + goto err_free_log; } hwmon = hwmon_device_register_with_info(dev, "nvme", @@ -247,11 +253,13 @@ int nvme_hwmon_init(struct nvme_ctrl *ctrl) if (IS_ERR(hwmon)) { dev_warn(dev, "Failed to instantiate hwmon device\n"); err = PTR_ERR(hwmon); - goto err_free_data; + goto err_free_log; } ctrl->hwmon_device = hwmon; return 0; +err_free_log: + kfree(data->log); err_free_data: kfree(data); return err; @@ -265,6 +273,7 @@ void nvme_hwmon_exit(struct nvme_ctrl *ctrl) hwmon_device_unregister(ctrl->hwmon_device); ctrl->hwmon_device = NULL; + kfree(data->log); kfree(data); } } From ae48bee2830bf216800e1447baca39541e27a12e Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Tue, 18 Oct 2022 14:31:59 +0800 Subject: [PATCH 0434/1477] net: sched: cake: fix null pointer access issue when cake_init() fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 51f9a8921ceacd7bf0d3f47fa867a64988ba1dcb ] When the default qdisc is cake, if the qdisc of dev_queue fails to be inited during mqprio_init(), cake_reset() is invoked to clear resources. In this case, the tins is NULL, and it will cause gpf issue. The process is as follows: qdisc_create_dflt() cake_init() q->tins = kvcalloc(...) --->failed, q->tins is NULL ... qdisc_put() ... cake_reset() ... cake_dequeue_one() b = &q->tins[...] --->q->tins is NULL The following is the Call Trace information: general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] RIP: 0010:cake_dequeue_one+0xc9/0x3c0 Call Trace: cake_reset+0xb1/0x140 qdisc_reset+0xed/0x6f0 qdisc_destroy+0x82/0x4c0 qdisc_put+0x9e/0xb0 qdisc_create_dflt+0x2c3/0x4a0 mqprio_init+0xa71/0x1760 qdisc_create+0x3eb/0x1000 tc_modify_qdisc+0x408/0x1720 rtnetlink_rcv_msg+0x38e/0xac0 netlink_rcv_skb+0x12d/0x3a0 netlink_unicast+0x4a2/0x740 netlink_sendmsg+0x826/0xcc0 sock_sendmsg+0xc5/0x100 ____sys_sendmsg+0x583/0x690 ___sys_sendmsg+0xe8/0x160 __sys_sendmsg+0xbf/0x160 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7f89e5122d04 Fixes: 046f6fd5daef ("sched: Add Common Applications Kept Enhanced (cake) qdisc") Signed-off-by: Zhengchao Shao Acked-by: Toke Høiland-Jørgensen Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/sched/sch_cake.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index c580139fcede..5dc7a3c310c9 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -2224,8 +2224,12 @@ retry: static void cake_reset(struct Qdisc *sch) { + struct cake_sched_data *q = qdisc_priv(sch); u32 c; + if (!q->tins) + return; + for (c = 0; c < CAKE_MAX_TINS; c++) cake_clear_tin(sch, c); } From 305aa36b628e2c7974fb23674a56b28b7224c2c2 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Wed, 24 Aug 2022 08:52:31 +0800 Subject: [PATCH 0435/1477] net: sched: delete duplicate cleanup of backlog and qlen [ Upstream commit c19d893fbf3f2f8fa864ae39652c7fee939edde2 ] qdisc_reset() is clearing qdisc->q.qlen and qdisc->qstats.backlog _after_ calling qdisc->ops->reset. There is no need to clear them again in the specific reset function. Signed-off-by: Zhengchao Shao Link: https://lore.kernel.org/r/20220824005231.345727-1-shaozhengchao@huawei.com Signed-off-by: Paolo Abeni Stable-dep-of: 2a3fc78210b9 ("net: sched: sfb: fix null pointer access issue when sfb_init() fails") Signed-off-by: Sasha Levin --- include/net/sch_generic.h | 1 - net/sched/sch_atm.c | 1 - net/sched/sch_cbq.c | 1 - net/sched/sch_choke.c | 2 -- net/sched/sch_drr.c | 2 -- net/sched/sch_dsmark.c | 2 -- net/sched/sch_etf.c | 3 --- net/sched/sch_ets.c | 2 -- net/sched/sch_fq_codel.c | 2 -- net/sched/sch_fq_pie.c | 3 --- net/sched/sch_hfsc.c | 2 -- net/sched/sch_htb.c | 2 -- net/sched/sch_multiq.c | 1 - net/sched/sch_prio.c | 2 -- net/sched/sch_qfq.c | 2 -- net/sched/sch_red.c | 2 -- net/sched/sch_sfb.c | 2 -- net/sched/sch_skbprio.c | 3 --- net/sched/sch_taprio.c | 2 -- net/sched/sch_tbf.c | 2 -- net/sched/sch_teql.c | 1 - 21 files changed, 40 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index bed2387af456..e7e8c318925d 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -1178,7 +1178,6 @@ static inline void __qdisc_reset_queue(struct qdisc_skb_head *qh) static inline void qdisc_reset_queue(struct Qdisc *sch) { __qdisc_reset_queue(&sch->q); - sch->qstats.backlog = 0; } static inline struct Qdisc *qdisc_replace(struct Qdisc *sch, struct Qdisc *new, diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 1c281cc81f57..794c7377cd7e 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -575,7 +575,6 @@ static void atm_tc_reset(struct Qdisc *sch) pr_debug("atm_tc_reset(sch %p,[qdisc %p])\n", sch, p); list_for_each_entry(flow, &p->flows, list) qdisc_reset(flow->q); - sch->q.qlen = 0; } static void atm_tc_destroy(struct Qdisc *sch) diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 4a78fcf5d4f9..9a3dff02b7a2 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1053,7 +1053,6 @@ cbq_reset(struct Qdisc *sch) cl->cpriority = cl->priority; } } - sch->q.qlen = 0; } diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index 2adbd945bf15..25d2daaa8122 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -315,8 +315,6 @@ static void choke_reset(struct Qdisc *sch) rtnl_qdisc_drop(skb, sch); } - sch->q.qlen = 0; - sch->qstats.backlog = 0; if (q->tab) memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *)); q->head = q->tail = 0; diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index dde564670ad8..08424aac6da8 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -443,8 +443,6 @@ static void drr_reset_qdisc(struct Qdisc *sch) qdisc_reset(cl->qdisc); } } - sch->qstats.backlog = 0; - sch->q.qlen = 0; } static void drr_destroy_qdisc(struct Qdisc *sch) diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 76ed1a05ded2..a75bc7f80cd7 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -408,8 +408,6 @@ static void dsmark_reset(struct Qdisc *sch) pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); if (p->q) qdisc_reset(p->q); - sch->qstats.backlog = 0; - sch->q.qlen = 0; } static void dsmark_destroy(struct Qdisc *sch) diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c index c48f91075b5c..d96103b0e2bf 100644 --- a/net/sched/sch_etf.c +++ b/net/sched/sch_etf.c @@ -445,9 +445,6 @@ static void etf_reset(struct Qdisc *sch) timesortedlist_clear(sch); __qdisc_reset_queue(&sch->q); - sch->qstats.backlog = 0; - sch->q.qlen = 0; - q->last = 0; } diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index 9c224872ef03..05817c55692f 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -722,8 +722,6 @@ static void ets_qdisc_reset(struct Qdisc *sch) } for (band = 0; band < q->nbands; band++) qdisc_reset(q->classes[band].qdisc); - sch->qstats.backlog = 0; - sch->q.qlen = 0; } static void ets_qdisc_destroy(struct Qdisc *sch) diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 99e8db262198..01d6eea5b0ce 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -347,8 +347,6 @@ static void fq_codel_reset(struct Qdisc *sch) codel_vars_init(&flow->cvars); } memset(q->backlogs, 0, q->flows_cnt * sizeof(u32)); - sch->q.qlen = 0; - sch->qstats.backlog = 0; q->memory_usage = 0; } diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c index c70802785518..cf04f70e96bf 100644 --- a/net/sched/sch_fq_pie.c +++ b/net/sched/sch_fq_pie.c @@ -521,9 +521,6 @@ static void fq_pie_reset(struct Qdisc *sch) INIT_LIST_HEAD(&flow->flowchain); pie_vars_init(&flow->vars); } - - sch->q.qlen = 0; - sch->qstats.backlog = 0; } static void fq_pie_destroy(struct Qdisc *sch) diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index d1902fca9844..cdc43a06aa9b 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1484,8 +1484,6 @@ hfsc_reset_qdisc(struct Qdisc *sch) } q->eligible = RB_ROOT; qdisc_watchdog_cancel(&q->watchdog); - sch->qstats.backlog = 0; - sch->q.qlen = 0; } static void diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index cd70dbcbd72f..c3ba018fd083 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -966,8 +966,6 @@ static void htb_reset(struct Qdisc *sch) } qdisc_watchdog_cancel(&q->watchdog); __qdisc_reset_queue(&q->direct_queue); - sch->q.qlen = 0; - sch->qstats.backlog = 0; memset(q->hlevel, 0, sizeof(q->hlevel)); memset(q->row_mask, 0, sizeof(q->row_mask)); } diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 5c27b4270b90..1c6dbcfa89b8 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -152,7 +152,6 @@ multiq_reset(struct Qdisc *sch) for (band = 0; band < q->bands; band++) qdisc_reset(q->queues[band]); - sch->q.qlen = 0; q->curband = 0; } diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 3eabb871a1d5..1c805fe05b82 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -135,8 +135,6 @@ prio_reset(struct Qdisc *sch) for (prio = 0; prio < q->bands; prio++) qdisc_reset(q->queues[prio]); - sch->qstats.backlog = 0; - sch->q.qlen = 0; } static int prio_offload(struct Qdisc *sch, struct tc_prio_qopt *qopt) diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index af8c63a9ec18..1d1d81aeb389 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -1458,8 +1458,6 @@ static void qfq_reset_qdisc(struct Qdisc *sch) qdisc_reset(cl->qdisc); } } - sch->qstats.backlog = 0; - sch->q.qlen = 0; } static void qfq_destroy_qdisc(struct Qdisc *sch) diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 40adf1f07a82..f1e013e3f04a 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -176,8 +176,6 @@ static void red_reset(struct Qdisc *sch) struct red_sched_data *q = qdisc_priv(sch); qdisc_reset(q->qdisc); - sch->qstats.backlog = 0; - sch->q.qlen = 0; red_restart(&q->vars); } diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index b2724057629f..0e1cb517b0d9 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -456,8 +456,6 @@ static void sfb_reset(struct Qdisc *sch) struct sfb_sched_data *q = qdisc_priv(sch); qdisc_reset(q->qdisc); - sch->qstats.backlog = 0; - sch->q.qlen = 0; q->slot = 0; q->double_buffering = false; sfb_zero_all_buckets(q); diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c index 7a5e4c454715..df72fb83d9c7 100644 --- a/net/sched/sch_skbprio.c +++ b/net/sched/sch_skbprio.c @@ -213,9 +213,6 @@ static void skbprio_reset(struct Qdisc *sch) struct skbprio_sched_data *q = qdisc_priv(sch); int prio; - sch->qstats.backlog = 0; - sch->q.qlen = 0; - for (prio = 0; prio < SKBPRIO_MAX_PRIORITY; prio++) __skb_queue_purge(&q->qdiscs[prio]); diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index ab8835a72cee..7f33b31c7b8b 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1626,8 +1626,6 @@ static void taprio_reset(struct Qdisc *sch) if (q->qdiscs[i]) qdisc_reset(q->qdiscs[i]); } - sch->qstats.backlog = 0; - sch->q.qlen = 0; } static void taprio_destroy(struct Qdisc *sch) diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 6eb17004a9e4..7461e5c67d50 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -316,8 +316,6 @@ static void tbf_reset(struct Qdisc *sch) struct tbf_sched_data *q = qdisc_priv(sch); qdisc_reset(q->qdisc); - sch->qstats.backlog = 0; - sch->q.qlen = 0; q->t_c = ktime_get_ns(); q->tokens = q->buffer; q->ptokens = q->mtu; diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 6af6b95bdb67..79aaab51cbf5 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -124,7 +124,6 @@ teql_reset(struct Qdisc *sch) struct teql_sched_data *dat = qdisc_priv(sch); skb_queue_purge(&dat->q); - sch->q.qlen = 0; } static void From ded86c4191a3c17f8200d17a7d8a6f63b74554ae Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Tue, 18 Oct 2022 14:32:01 +0800 Subject: [PATCH 0436/1477] net: sched: sfb: fix null pointer access issue when sfb_init() fails [ Upstream commit 2a3fc78210b9f0e85372a2435368962009f480fc ] When the default qdisc is sfb, if the qdisc of dev_queue fails to be inited during mqprio_init(), sfb_reset() is invoked to clear resources. In this case, the q->qdisc is NULL, and it will cause gpf issue. The process is as follows: qdisc_create_dflt() sfb_init() tcf_block_get() --->failed, q->qdisc is NULL ... qdisc_put() ... sfb_reset() qdisc_reset(q->qdisc) --->q->qdisc is NULL ops = qdisc->ops The following is the Call Trace information: general protection fault, probably for non-canonical address 0xdffffc0000000003: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000018-0x000000000000001f] RIP: 0010:qdisc_reset+0x2b/0x6f0 Call Trace: sfb_reset+0x37/0xd0 qdisc_reset+0xed/0x6f0 qdisc_destroy+0x82/0x4c0 qdisc_put+0x9e/0xb0 qdisc_create_dflt+0x2c3/0x4a0 mqprio_init+0xa71/0x1760 qdisc_create+0x3eb/0x1000 tc_modify_qdisc+0x408/0x1720 rtnetlink_rcv_msg+0x38e/0xac0 netlink_rcv_skb+0x12d/0x3a0 netlink_unicast+0x4a2/0x740 netlink_sendmsg+0x826/0xcc0 sock_sendmsg+0xc5/0x100 ____sys_sendmsg+0x583/0x690 ___sys_sendmsg+0xe8/0x160 __sys_sendmsg+0xbf/0x160 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7f2164122d04 Fixes: e13e02a3c68d ("net_sched: SFB flow scheduler") Signed-off-by: Zhengchao Shao Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/sched/sch_sfb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index 0e1cb517b0d9..9ded56228ea1 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -455,7 +455,8 @@ static void sfb_reset(struct Qdisc *sch) { struct sfb_sched_data *q = qdisc_priv(sch); - qdisc_reset(q->qdisc); + if (likely(q->qdisc)) + qdisc_reset(q->qdisc); q->slot = 0; q->double_buffering = false; sfb_zero_all_buckets(q); From 3032e316e0a9a04b9cc3ca73ff22c4d546aee243 Mon Sep 17 00:00:00 2001 From: Pieter Jansen van Vuuren Date: Tue, 18 Oct 2022 10:28:41 +0100 Subject: [PATCH 0437/1477] sfc: include vport_id in filter spec hash and equal() [ Upstream commit c2bf23e4a5af37a4d77901d9ff14c50a269f143d ] Filters on different vports are qualified by different implicit MACs and/or VLANs, so shouldn't be considered equal even if their other match fields are identical. Fixes: 7c460d9be610 ("sfc: Extend and abstract efx_filter_spec to cover Huntington/EF10") Co-developed-by: Edward Cree Signed-off-by: Edward Cree Signed-off-by: Pieter Jansen van Vuuren Reviewed-by: Martin Habets Link: https://lore.kernel.org/r/20221018092841.32206-1-pieter.jansen-van-vuuren@amd.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/sfc/filter.h | 3 ++- drivers/net/ethernet/sfc/rx_common.c | 10 +++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/sfc/filter.h b/drivers/net/ethernet/sfc/filter.h index 40b2af8bfb81..2ac3c8f1b04b 100644 --- a/drivers/net/ethernet/sfc/filter.h +++ b/drivers/net/ethernet/sfc/filter.h @@ -157,7 +157,8 @@ struct efx_filter_spec { u32 flags:6; u32 dmaq_id:12; u32 rss_context; - __be16 outer_vid __aligned(4); /* allow jhash2() of match values */ + u32 vport_id; + __be16 outer_vid; __be16 inner_vid; u8 loc_mac[ETH_ALEN]; u8 rem_mac[ETH_ALEN]; diff --git a/drivers/net/ethernet/sfc/rx_common.c b/drivers/net/ethernet/sfc/rx_common.c index 2c09afac5beb..36b46ddb6710 100644 --- a/drivers/net/ethernet/sfc/rx_common.c +++ b/drivers/net/ethernet/sfc/rx_common.c @@ -676,17 +676,17 @@ bool efx_filter_spec_equal(const struct efx_filter_spec *left, (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_TX))) return false; - return memcmp(&left->outer_vid, &right->outer_vid, + return memcmp(&left->vport_id, &right->vport_id, sizeof(struct efx_filter_spec) - - offsetof(struct efx_filter_spec, outer_vid)) == 0; + offsetof(struct efx_filter_spec, vport_id)) == 0; } u32 efx_filter_spec_hash(const struct efx_filter_spec *spec) { - BUILD_BUG_ON(offsetof(struct efx_filter_spec, outer_vid) & 3); - return jhash2((const u32 *)&spec->outer_vid, + BUILD_BUG_ON(offsetof(struct efx_filter_spec, vport_id) & 3); + return jhash2((const u32 *)&spec->vport_id, (sizeof(struct efx_filter_spec) - - offsetof(struct efx_filter_spec, outer_vid)) / 4, + offsetof(struct efx_filter_spec, vport_id)) / 4, 0); } From 2974f3b330ef25f5d34a4948d04290c2cd7802cf Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 18 Oct 2022 20:24:51 +0800 Subject: [PATCH 0438/1477] net: hns: fix possible memory leak in hnae_ae_register() [ Upstream commit ff2f5ec5d009844ec28f171123f9e58750cef4bf ] Inject fault while probing module, if device_register() fails, but the refcount of kobject is not decreased to 0, the name allocated in dev_set_name() is leaked. Fix this by calling put_device(), so that name can be freed in callback function kobject_cleanup(). unreferenced object 0xffff00c01aba2100 (size 128): comm "systemd-udevd", pid 1259, jiffies 4294903284 (age 294.152s) hex dump (first 32 bytes): 68 6e 61 65 30 00 00 00 18 21 ba 1a c0 00 ff ff hnae0....!...... 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<0000000034783f26>] slab_post_alloc_hook+0xa0/0x3e0 [<00000000748188f2>] __kmem_cache_alloc_node+0x164/0x2b0 [<00000000ab0743e8>] __kmalloc_node_track_caller+0x6c/0x390 [<000000006c0ffb13>] kvasprintf+0x8c/0x118 [<00000000fa27bfe1>] kvasprintf_const+0x60/0xc8 [<0000000083e10ed7>] kobject_set_name_vargs+0x3c/0xc0 [<000000000b87affc>] dev_set_name+0x7c/0xa0 [<000000003fd8fe26>] hnae_ae_register+0xcc/0x190 [hnae] [<00000000fe97edc9>] hns_dsaf_ae_init+0x9c/0x108 [hns_dsaf] [<00000000c36ff1eb>] hns_dsaf_probe+0x548/0x748 [hns_dsaf] Fixes: 6fe6611ff275 ("net: add Hisilicon Network Subsystem hnae framework support") Signed-off-by: Yang Yingliang Reviewed-by: Leon Romanovsky Link: https://lore.kernel.org/r/20221018122451.1749171-1-yangyingliang@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns/hnae.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.c b/drivers/net/ethernet/hisilicon/hns/hnae.c index 00fafc0f8512..430eccea8e5e 100644 --- a/drivers/net/ethernet/hisilicon/hns/hnae.c +++ b/drivers/net/ethernet/hisilicon/hns/hnae.c @@ -419,8 +419,10 @@ int hnae_ae_register(struct hnae_ae_dev *hdev, struct module *owner) hdev->cls_dev.release = hnae_release; (void)dev_set_name(&hdev->cls_dev, "hnae%d", hdev->id); ret = device_register(&hdev->cls_dev); - if (ret) + if (ret) { + put_device(&hdev->cls_dev); return ret; + } __module_get(THIS_MODULE); From 7aa3d623c11b9ab60f86b7833666e5d55bac4be9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 18 Oct 2022 20:32:58 +0000 Subject: [PATCH 0439/1477] net: sched: fix race condition in qdisc_graft() [ Upstream commit ebda44da44f6f309d302522b049f43d6f829f7aa ] We had one syzbot report [1] in syzbot queue for a while. I was waiting for more occurrences and/or a repro but Dmitry Vyukov spotted the issue right away. qdisc_graft() drops reference to qdisc in notify_and_destroy while it's still assigned to dev->qdisc Indeed, RCU rules are clear when replacing a data structure. The visible pointer (dev->qdisc in this case) must be updated to the new object _before_ RCU grace period is started (qdisc_put(old) in this case). [1] BUG: KASAN: use-after-free in __tcf_qdisc_find.part.0+0xa3a/0xac0 net/sched/cls_api.c:1066 Read of size 4 at addr ffff88802065e038 by task syz-executor.4/21027 CPU: 0 PID: 21027 Comm: syz-executor.4 Not tainted 6.0.0-rc3-syzkaller-00363-g7726d4c3e60b #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/26/2022 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:317 [inline] print_report.cold+0x2ba/0x719 mm/kasan/report.c:433 kasan_report+0xb1/0x1e0 mm/kasan/report.c:495 __tcf_qdisc_find.part.0+0xa3a/0xac0 net/sched/cls_api.c:1066 __tcf_qdisc_find net/sched/cls_api.c:1051 [inline] tc_new_tfilter+0x34f/0x2200 net/sched/cls_api.c:2018 rtnetlink_rcv_msg+0x955/0xca0 net/core/rtnetlink.c:6081 netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2501 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] netlink_unicast+0x543/0x7f0 net/netlink/af_netlink.c:1345 netlink_sendmsg+0x917/0xe10 net/netlink/af_netlink.c:1921 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:734 ____sys_sendmsg+0x6eb/0x810 net/socket.c:2482 ___sys_sendmsg+0x110/0x1b0 net/socket.c:2536 __sys_sendmsg+0xf3/0x1c0 net/socket.c:2565 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7f5efaa89279 Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f5efbc31168 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007f5efab9bf80 RCX: 00007f5efaa89279 RDX: 0000000000000000 RSI: 0000000020000140 RDI: 0000000000000005 RBP: 00007f5efaae32e9 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 00007f5efb0cfb1f R14: 00007f5efbc31300 R15: 0000000000022000 Allocated by task 21027: kasan_save_stack+0x1e/0x40 mm/kasan/common.c:38 kasan_set_track mm/kasan/common.c:45 [inline] set_alloc_info mm/kasan/common.c:437 [inline] ____kasan_kmalloc mm/kasan/common.c:516 [inline] ____kasan_kmalloc mm/kasan/common.c:475 [inline] __kasan_kmalloc+0xa9/0xd0 mm/kasan/common.c:525 kmalloc_node include/linux/slab.h:623 [inline] kzalloc_node include/linux/slab.h:744 [inline] qdisc_alloc+0xb0/0xc50 net/sched/sch_generic.c:938 qdisc_create_dflt+0x71/0x4a0 net/sched/sch_generic.c:997 attach_one_default_qdisc net/sched/sch_generic.c:1152 [inline] netdev_for_each_tx_queue include/linux/netdevice.h:2437 [inline] attach_default_qdiscs net/sched/sch_generic.c:1170 [inline] dev_activate+0x760/0xcd0 net/sched/sch_generic.c:1229 __dev_open+0x393/0x4d0 net/core/dev.c:1441 __dev_change_flags+0x583/0x750 net/core/dev.c:8556 rtnl_configure_link+0xee/0x240 net/core/rtnetlink.c:3189 rtnl_newlink_create net/core/rtnetlink.c:3371 [inline] __rtnl_newlink+0x10b8/0x17e0 net/core/rtnetlink.c:3580 rtnl_newlink+0x64/0xa0 net/core/rtnetlink.c:3593 rtnetlink_rcv_msg+0x43a/0xca0 net/core/rtnetlink.c:6090 netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2501 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] netlink_unicast+0x543/0x7f0 net/netlink/af_netlink.c:1345 netlink_sendmsg+0x917/0xe10 net/netlink/af_netlink.c:1921 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:734 ____sys_sendmsg+0x6eb/0x810 net/socket.c:2482 ___sys_sendmsg+0x110/0x1b0 net/socket.c:2536 __sys_sendmsg+0xf3/0x1c0 net/socket.c:2565 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Freed by task 21020: kasan_save_stack+0x1e/0x40 mm/kasan/common.c:38 kasan_set_track+0x21/0x30 mm/kasan/common.c:45 kasan_set_free_info+0x20/0x30 mm/kasan/generic.c:370 ____kasan_slab_free mm/kasan/common.c:367 [inline] ____kasan_slab_free+0x166/0x1c0 mm/kasan/common.c:329 kasan_slab_free include/linux/kasan.h:200 [inline] slab_free_hook mm/slub.c:1754 [inline] slab_free_freelist_hook+0x8b/0x1c0 mm/slub.c:1780 slab_free mm/slub.c:3534 [inline] kfree+0xe2/0x580 mm/slub.c:4562 rcu_do_batch kernel/rcu/tree.c:2245 [inline] rcu_core+0x7b5/0x1890 kernel/rcu/tree.c:2505 __do_softirq+0x1d3/0x9c6 kernel/softirq.c:571 Last potentially related work creation: kasan_save_stack+0x1e/0x40 mm/kasan/common.c:38 __kasan_record_aux_stack+0xbe/0xd0 mm/kasan/generic.c:348 call_rcu+0x99/0x790 kernel/rcu/tree.c:2793 qdisc_put+0xcd/0xe0 net/sched/sch_generic.c:1083 notify_and_destroy net/sched/sch_api.c:1012 [inline] qdisc_graft+0xeb1/0x1270 net/sched/sch_api.c:1084 tc_modify_qdisc+0xbb7/0x1a00 net/sched/sch_api.c:1671 rtnetlink_rcv_msg+0x43a/0xca0 net/core/rtnetlink.c:6090 netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2501 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] netlink_unicast+0x543/0x7f0 net/netlink/af_netlink.c:1345 netlink_sendmsg+0x917/0xe10 net/netlink/af_netlink.c:1921 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:734 ____sys_sendmsg+0x6eb/0x810 net/socket.c:2482 ___sys_sendmsg+0x110/0x1b0 net/socket.c:2536 __sys_sendmsg+0xf3/0x1c0 net/socket.c:2565 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Second to last potentially related work creation: kasan_save_stack+0x1e/0x40 mm/kasan/common.c:38 __kasan_record_aux_stack+0xbe/0xd0 mm/kasan/generic.c:348 kvfree_call_rcu+0x74/0x940 kernel/rcu/tree.c:3322 neigh_destroy+0x431/0x630 net/core/neighbour.c:912 neigh_release include/net/neighbour.h:454 [inline] neigh_cleanup_and_release+0x1f8/0x330 net/core/neighbour.c:103 neigh_del net/core/neighbour.c:225 [inline] neigh_remove_one+0x37d/0x460 net/core/neighbour.c:246 neigh_forced_gc net/core/neighbour.c:276 [inline] neigh_alloc net/core/neighbour.c:447 [inline] ___neigh_create+0x18b5/0x29a0 net/core/neighbour.c:642 ip6_finish_output2+0xfb8/0x1520 net/ipv6/ip6_output.c:125 __ip6_finish_output net/ipv6/ip6_output.c:195 [inline] ip6_finish_output+0x690/0x1160 net/ipv6/ip6_output.c:206 NF_HOOK_COND include/linux/netfilter.h:296 [inline] ip6_output+0x1ed/0x540 net/ipv6/ip6_output.c:227 dst_output include/net/dst.h:451 [inline] NF_HOOK include/linux/netfilter.h:307 [inline] NF_HOOK include/linux/netfilter.h:301 [inline] mld_sendpack+0xa09/0xe70 net/ipv6/mcast.c:1820 mld_send_cr net/ipv6/mcast.c:2121 [inline] mld_ifc_work+0x71c/0xdc0 net/ipv6/mcast.c:2653 process_one_work+0x991/0x1610 kernel/workqueue.c:2289 worker_thread+0x665/0x1080 kernel/workqueue.c:2436 kthread+0x2e4/0x3a0 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:306 The buggy address belongs to the object at ffff88802065e000 which belongs to the cache kmalloc-1k of size 1024 The buggy address is located 56 bytes inside of 1024-byte region [ffff88802065e000, ffff88802065e400) The buggy address belongs to the physical page: page:ffffea0000819600 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x20658 head:ffffea0000819600 order:3 compound_mapcount:0 compound_pincount:0 flags: 0xfff00000010200(slab|head|node=0|zone=1|lastcpupid=0x7ff) raw: 00fff00000010200 0000000000000000 dead000000000001 ffff888011841dc0 raw: 0000000000000000 0000000000100010 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected page_owner tracks the page as allocated page last allocated via order 3, migratetype Unmovable, gfp_mask 0xd20c0(__GFP_IO|__GFP_FS|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP|__GFP_NOMEMALLOC), pid 3523, tgid 3523 (sshd), ts 41495190986, free_ts 41417713212 prep_new_page mm/page_alloc.c:2532 [inline] get_page_from_freelist+0x109b/0x2ce0 mm/page_alloc.c:4283 __alloc_pages+0x1c7/0x510 mm/page_alloc.c:5515 alloc_pages+0x1a6/0x270 mm/mempolicy.c:2270 alloc_slab_page mm/slub.c:1824 [inline] allocate_slab+0x27e/0x3d0 mm/slub.c:1969 new_slab mm/slub.c:2029 [inline] ___slab_alloc+0x7f1/0xe10 mm/slub.c:3031 __slab_alloc.constprop.0+0x4d/0xa0 mm/slub.c:3118 slab_alloc_node mm/slub.c:3209 [inline] __kmalloc_node_track_caller+0x2f2/0x380 mm/slub.c:4955 kmalloc_reserve net/core/skbuff.c:358 [inline] __alloc_skb+0xd9/0x2f0 net/core/skbuff.c:430 alloc_skb_fclone include/linux/skbuff.h:1307 [inline] tcp_stream_alloc_skb+0x38/0x580 net/ipv4/tcp.c:861 tcp_sendmsg_locked+0xc36/0x2f80 net/ipv4/tcp.c:1325 tcp_sendmsg+0x2b/0x40 net/ipv4/tcp.c:1483 inet_sendmsg+0x99/0xe0 net/ipv4/af_inet.c:819 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:734 sock_write_iter+0x291/0x3d0 net/socket.c:1108 call_write_iter include/linux/fs.h:2187 [inline] new_sync_write fs/read_write.c:491 [inline] vfs_write+0x9e9/0xdd0 fs/read_write.c:578 ksys_write+0x1e8/0x250 fs/read_write.c:631 page last free stack trace: reset_page_owner include/linux/page_owner.h:24 [inline] free_pages_prepare mm/page_alloc.c:1449 [inline] free_pcp_prepare+0x5e4/0xd20 mm/page_alloc.c:1499 free_unref_page_prepare mm/page_alloc.c:3380 [inline] free_unref_page+0x19/0x4d0 mm/page_alloc.c:3476 __unfreeze_partials+0x17c/0x1a0 mm/slub.c:2548 qlink_free mm/kasan/quarantine.c:168 [inline] qlist_free_all+0x6a/0x170 mm/kasan/quarantine.c:187 kasan_quarantine_reduce+0x180/0x200 mm/kasan/quarantine.c:294 __kasan_slab_alloc+0xa2/0xc0 mm/kasan/common.c:447 kasan_slab_alloc include/linux/kasan.h:224 [inline] slab_post_alloc_hook mm/slab.h:727 [inline] slab_alloc_node mm/slub.c:3243 [inline] slab_alloc mm/slub.c:3251 [inline] __kmem_cache_alloc_lru mm/slub.c:3258 [inline] kmem_cache_alloc+0x267/0x3b0 mm/slub.c:3268 kmem_cache_zalloc include/linux/slab.h:723 [inline] alloc_buffer_head+0x20/0x140 fs/buffer.c:2974 alloc_page_buffers+0x280/0x790 fs/buffer.c:829 create_empty_buffers+0x2c/0xee0 fs/buffer.c:1558 ext4_block_write_begin+0x1004/0x1530 fs/ext4/inode.c:1074 ext4_da_write_begin+0x422/0xae0 fs/ext4/inode.c:2996 generic_perform_write+0x246/0x560 mm/filemap.c:3738 ext4_buffered_write_iter+0x15b/0x460 fs/ext4/file.c:270 ext4_file_write_iter+0x44a/0x1660 fs/ext4/file.c:679 call_write_iter include/linux/fs.h:2187 [inline] new_sync_write fs/read_write.c:491 [inline] vfs_write+0x9e9/0xdd0 fs/read_write.c:578 Fixes: af356afa010f ("net_sched: reintroduce dev->qdisc for use by sch_api") Reported-by: syzbot Diagnosed-by: Dmitry Vyukov Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20221018203258.2793282-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/sched/sch_api.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 6e18aa417782..d8ffe4114385 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1081,12 +1081,13 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, skip: if (!ingress) { - notify_and_destroy(net, skb, n, classid, - rtnl_dereference(dev->qdisc), new); + old = rtnl_dereference(dev->qdisc); if (new && !new->ops->attach) qdisc_refcount_inc(new); rcu_assign_pointer(dev->qdisc, new ? : &noop_qdisc); + notify_and_destroy(net, skb, n, classid, old, new); + if (new && new->ops->attach) new->ops->attach(new); } else { From 35c92435be76acdd544c834ae734b88d08e91e0e Mon Sep 17 00:00:00 2001 From: Felix Riemann Date: Tue, 18 Oct 2022 12:47:54 +0200 Subject: [PATCH 0440/1477] net: phy: dp83822: disable MDI crossover status change interrupt [ Upstream commit 7f378c03aa4952507521174fb0da7b24a9ad0be6 ] If the cable is disconnected the PHY seems to toggle between MDI and MDI-X modes. With the MDI crossover status interrupt active this causes roughly 10 interrupts per second. As the crossover status isn't checked by the driver, the interrupt can be disabled to reduce the interrupt load. Fixes: 87461f7a58ab ("net: phy: DP83822 initial driver submission") Signed-off-by: Felix Riemann Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20221018104755.30025-1-svc.sw.rte.linux@sma.de Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/phy/dp83822.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c index 3a8849716459..db651649e0b8 100644 --- a/drivers/net/phy/dp83822.c +++ b/drivers/net/phy/dp83822.c @@ -268,8 +268,7 @@ static int dp83822_config_intr(struct phy_device *phydev) DP83822_EEE_ERROR_CHANGE_INT_EN); if (!dp83822->fx_enabled) - misr_status |= DP83822_MDI_XOVER_INT_EN | - DP83822_ANEG_ERR_INT_EN | + misr_status |= DP83822_ANEG_ERR_INT_EN | DP83822_WOL_PKT_INT_EN; err = phy_write(phydev, MII_DP83822_MISR2, misr_status); From ef11e8ec00b976a30131751cca1792ac6e87e95c Mon Sep 17 00:00:00 2001 From: Charlotte Tan Date: Wed, 19 Oct 2022 08:44:46 +0800 Subject: [PATCH 0441/1477] iommu/vt-d: Allow NVS regions in arch_rmrr_sanity_check() [ Upstream commit 5566e68d829f5d87670d5984c1c2ccb4c518405f ] arch_rmrr_sanity_check() warns if the RMRR is not covered by an ACPI Reserved region, but it seems like it should accept an NVS region as well. The ACPI spec https://uefi.org/specs/ACPI/6.5/15_System_Address_Map_Interfaces.html uses similar wording for "Reserved" and "NVS" region types; for NVS regions it says "This range of addresses is in use or reserved by the system and must not be used by the operating system." There is an old comment on this mailing list that also suggests NVS regions should pass the arch_rmrr_sanity_check() test: The warnings come from arch_rmrr_sanity_check() since it checks whether the region is E820_TYPE_RESERVED. However, if the purpose of the check is to detect RMRR has regions that may be used by OS as free memory, isn't E820_TYPE_NVS safe, too? This patch overlaps with another proposed patch that would add the region type to the log since sometimes the bug reporter sees this log on the console but doesn't know to include the kernel log: https://lore.kernel.org/lkml/20220611204859.234975-3-atomlin@redhat.com/ Here's an example of the "Firmware Bug" apparent false positive (wrapped for line length): DMAR: [Firmware Bug]: No firmware reserved region can cover this RMRR [0x000000006f760000-0x000000006f762fff], contact BIOS vendor for fixes DMAR: [Firmware Bug]: Your BIOS is broken; bad RMRR [0x000000006f760000-0x000000006f762fff] This is the snippet from the e820 table: BIOS-e820: [mem 0x0000000068bff000-0x000000006ebfefff] reserved BIOS-e820: [mem 0x000000006ebff000-0x000000006f9fefff] ACPI NVS BIOS-e820: [mem 0x000000006f9ff000-0x000000006fffefff] ACPI data Fixes: f036c7fa0ab6 ("iommu/vt-d: Check VT-d RMRR region in BIOS is reported as reserved") Cc: Will Mortensen Link: https://lore.kernel.org/linux-iommu/64a5843d-850d-e58c-4fc2-0a0eeeb656dc@nec.com/ Link: https://bugzilla.kernel.org/show_bug.cgi?id=216443 Signed-off-by: Charlotte Tan Reviewed-by: Aaron Tomlin Link: https://lore.kernel.org/r/20220929044449.32515-1-charlotte@extrahop.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- arch/x86/include/asm/iommu.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index bf1ed2ddc74b..7a983119bc40 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h @@ -17,8 +17,10 @@ arch_rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr) { u64 start = rmrr->base_address; u64 end = rmrr->end_address + 1; + int entry_type; - if (e820__mapped_all(start, end, E820_TYPE_RESERVED)) + entry_type = e820__get_entry_type(start, end); + if (entry_type == E820_TYPE_RESERVED || entry_type == E820_TYPE_NVS) return 0; pr_err(FW_BUG "No firmware reserved region can cover this RMRR [%#018Lx-%#018Lx], contact BIOS vendor for fixes\n", From d74196bb278b8f8af88e16bd595997dfa3d6fdb0 Mon Sep 17 00:00:00 2001 From: Jerry Snitselaar Date: Wed, 19 Oct 2022 08:44:47 +0800 Subject: [PATCH 0442/1477] iommu/vt-d: Clean up si_domain in the init_dmars() error path [ Upstream commit 620bf9f981365c18cc2766c53d92bf8131c63f32 ] A splat from kmem_cache_destroy() was seen with a kernel prior to commit ee2653bbe89d ("iommu/vt-d: Remove domain and devinfo mempool") when there was a failure in init_dmars(), because the iommu_domain cache still had objects. While the mempool code is now gone, there still is a leak of the si_domain memory if init_dmars() fails. So clean up si_domain in the init_dmars() error path. Cc: Lu Baolu Cc: Joerg Roedel Cc: Will Deacon Cc: Robin Murphy Fixes: 86080ccc223a ("iommu/vt-d: Allocate si_domain in init_dmars()") Signed-off-by: Jerry Snitselaar Link: https://lore.kernel.org/r/20221010144842.308890-1-jsnitsel@redhat.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/intel/iommu.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index c48cf737b521..f23329b7f97c 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -2846,6 +2846,7 @@ static int __init si_domain_init(int hw) if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { domain_exit(si_domain); + si_domain = NULL; return -EFAULT; } @@ -3505,6 +3506,10 @@ free_iommu: disable_dmar_iommu(iommu); free_dmar_iommu(iommu); } + if (si_domain) { + domain_exit(si_domain); + si_domain = NULL; + } kfree(g_iommus); From 1414e9bf3c307759347e6c91ed1143065ee86402 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 30 Jun 2022 23:07:22 +0300 Subject: [PATCH 0443/1477] drm/virtio: Use appropriate atomic state in virtio_gpu_plane_cleanup_fb() [ Upstream commit 4656b3a26a9e9fe5f04bfd2ab55b066266ba7f4d ] Make virtio_gpu_plane_cleanup_fb() to clean the state which DRM core wants to clean up and not the current plane's state. Normally the older atomic state is cleaned up, but the newer state could also be cleaned up in case of aborted commits. Cc: stable@vger.kernel.org Signed-off-by: Dmitry Osipenko Link: http://patchwork.freedesktop.org/patch/msgid/20220630200726.1884320-6-dmitry.osipenko@collabora.com Signed-off-by: Gerd Hoffmann Signed-off-by: Sasha Levin --- drivers/gpu/drm/virtio/virtgpu_plane.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_plane.c b/drivers/gpu/drm/virtio/virtgpu_plane.c index 6a311cd93440..e6de62734269 100644 --- a/drivers/gpu/drm/virtio/virtgpu_plane.c +++ b/drivers/gpu/drm/virtio/virtgpu_plane.c @@ -213,14 +213,14 @@ static int virtio_gpu_cursor_prepare_fb(struct drm_plane *plane, } static void virtio_gpu_cursor_cleanup_fb(struct drm_plane *plane, - struct drm_plane_state *old_state) + struct drm_plane_state *state) { struct virtio_gpu_framebuffer *vgfb; - if (!plane->state->fb) + if (!state->fb) return; - vgfb = to_virtio_gpu_framebuffer(plane->state->fb); + vgfb = to_virtio_gpu_framebuffer(state->fb); if (vgfb->fence) { dma_fence_put(&vgfb->fence->f); vgfb->fence = NULL; From 1da5d249704666fe8d3b1cb531d73fb6c621d84f Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 23 Nov 2020 16:30:51 -0300 Subject: [PATCH 0444/1477] dmaengine: mxs-dma: Remove the unused .id_table [ Upstream commit cc2afb0d4c7cbba6743ed6d9564f0883cab6bae1 ] The mxs-dma driver is only used by DT platforms and the .id_table is unused. Get rid of it to simplify the code. Signed-off-by: Fabio Estevam Link: https://lore.kernel.org/r/20201123193051.17285-1-festevam@gmail.com Signed-off-by: Vinod Koul Stable-dep-of: 26696d465716 ("dmaengine: mxs: use platform_driver_register") Signed-off-by: Sasha Levin --- drivers/dma/mxs-dma.c | 37 +++++-------------------------------- 1 file changed, 5 insertions(+), 32 deletions(-) diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c index 65f816b40c32..994fc4d2aca4 100644 --- a/drivers/dma/mxs-dma.c +++ b/drivers/dma/mxs-dma.c @@ -167,29 +167,11 @@ static struct mxs_dma_type mxs_dma_types[] = { } }; -static const struct platform_device_id mxs_dma_ids[] = { - { - .name = "imx23-dma-apbh", - .driver_data = (kernel_ulong_t) &mxs_dma_types[0], - }, { - .name = "imx23-dma-apbx", - .driver_data = (kernel_ulong_t) &mxs_dma_types[1], - }, { - .name = "imx28-dma-apbh", - .driver_data = (kernel_ulong_t) &mxs_dma_types[2], - }, { - .name = "imx28-dma-apbx", - .driver_data = (kernel_ulong_t) &mxs_dma_types[3], - }, { - /* end of list */ - } -}; - static const struct of_device_id mxs_dma_dt_ids[] = { - { .compatible = "fsl,imx23-dma-apbh", .data = &mxs_dma_ids[0], }, - { .compatible = "fsl,imx23-dma-apbx", .data = &mxs_dma_ids[1], }, - { .compatible = "fsl,imx28-dma-apbh", .data = &mxs_dma_ids[2], }, - { .compatible = "fsl,imx28-dma-apbx", .data = &mxs_dma_ids[3], }, + { .compatible = "fsl,imx23-dma-apbh", .data = &mxs_dma_types[0], }, + { .compatible = "fsl,imx23-dma-apbx", .data = &mxs_dma_types[1], }, + { .compatible = "fsl,imx28-dma-apbh", .data = &mxs_dma_types[2], }, + { .compatible = "fsl,imx28-dma-apbx", .data = &mxs_dma_types[3], }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, mxs_dma_dt_ids); @@ -762,8 +744,6 @@ static struct dma_chan *mxs_dma_xlate(struct of_phandle_args *dma_spec, static int __init mxs_dma_probe(struct platform_device *pdev) { struct device_node *np = pdev->dev.of_node; - const struct platform_device_id *id_entry; - const struct of_device_id *of_id; const struct mxs_dma_type *dma_type; struct mxs_dma_engine *mxs_dma; struct resource *iores; @@ -779,13 +759,7 @@ static int __init mxs_dma_probe(struct platform_device *pdev) return ret; } - of_id = of_match_device(mxs_dma_dt_ids, &pdev->dev); - if (of_id) - id_entry = of_id->data; - else - id_entry = platform_get_device_id(pdev); - - dma_type = (struct mxs_dma_type *)id_entry->driver_data; + dma_type = (struct mxs_dma_type *)of_device_get_match_data(&pdev->dev); mxs_dma->type = dma_type->type; mxs_dma->dev_id = dma_type->id; @@ -865,7 +839,6 @@ static struct platform_driver mxs_dma_driver = { .name = "mxs-dma", .of_match_table = mxs_dma_dt_ids, }, - .id_table = mxs_dma_ids, }; static int __init mxs_dma_module_init(void) From 4e3a15ca24b33961da73d54ebfd4a3fd8bcc7bd7 Mon Sep 17 00:00:00 2001 From: Dario Binacchi Date: Wed, 21 Sep 2022 19:05:56 +0200 Subject: [PATCH 0445/1477] dmaengine: mxs: use platform_driver_register [ Upstream commit 26696d4657167112a1079f86cba1739765c1360e ] Driver registration fails on SOC imx8mn as its supplier, the clock control module, is probed later than subsys initcall level. This driver uses platform_driver_probe which is not compatible with deferred probing and won't be probed again later if probe function fails due to clock not being available at that time. This patch replaces the use of platform_driver_probe with platform_driver_register which will allow probing the driver later again when the clock control module will be available. The __init annotation has been dropped because it is not compatible with deferred probing. The code is not executed once and its memory cannot be freed. Fixes: a580b8c5429a ("dmaengine: mxs-dma: add dma support for i.MX23/28") Co-developed-by: Michael Trimarchi Signed-off-by: Michael Trimarchi Signed-off-by: Dario Binacchi Acked-by: Sascha Hauer Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220921170556.1055962-1-dario.binacchi@amarulasolutions.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/mxs-dma.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c index 994fc4d2aca4..dc147cc2436e 100644 --- a/drivers/dma/mxs-dma.c +++ b/drivers/dma/mxs-dma.c @@ -670,7 +670,7 @@ static enum dma_status mxs_dma_tx_status(struct dma_chan *chan, return mxs_chan->status; } -static int __init mxs_dma_init(struct mxs_dma_engine *mxs_dma) +static int mxs_dma_init(struct mxs_dma_engine *mxs_dma) { int ret; @@ -741,7 +741,7 @@ static struct dma_chan *mxs_dma_xlate(struct of_phandle_args *dma_spec, ofdma->of_node); } -static int __init mxs_dma_probe(struct platform_device *pdev) +static int mxs_dma_probe(struct platform_device *pdev) { struct device_node *np = pdev->dev.of_node; const struct mxs_dma_type *dma_type; @@ -839,10 +839,7 @@ static struct platform_driver mxs_dma_driver = { .name = "mxs-dma", .of_match_table = mxs_dma_dt_ids, }, + .probe = mxs_dma_probe, }; -static int __init mxs_dma_module_init(void) -{ - return platform_driver_probe(&mxs_dma_driver, mxs_dma_probe); -} -subsys_initcall(mxs_dma_module_init); +builtin_platform_driver(mxs_dma_driver); From bd6af07e7993e304679ac21781d2d39ecdf97de0 Mon Sep 17 00:00:00 2001 From: sunliming Date: Thu, 2 Jun 2022 22:06:13 +0800 Subject: [PATCH 0446/1477] tracing: Simplify conditional compilation code in tracing_set_tracer() [ Upstream commit f4b0d318097e45cbac5e14976f8bb56aa2cef504 ] Two conditional compilation directives "#ifdef CONFIG_TRACER_MAX_TRACE" are used consecutively, and no other code in between. Simplify conditional the compilation code and only use one "#ifdef CONFIG_TRACER_MAX_TRACE". Link: https://lkml.kernel.org/r/20220602140613.545069-1-sunliming@kylinos.cn Signed-off-by: sunliming Signed-off-by: Steven Rostedt (Google) Stable-dep-of: a541a9559bb0 ("tracing: Do not free snapshot if tracer is on cmdline") Signed-off-by: Sasha Levin --- kernel/trace/trace.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a5245362ce7a..870033f9c198 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6025,9 +6025,7 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf) synchronize_rcu(); free_snapshot(tr); } -#endif -#ifdef CONFIG_TRACER_MAX_TRACE if (t->use_max_tr && !had_max_tr) { ret = tracing_alloc_snapshot_instance(tr); if (ret < 0) From ea82edad0aee6e080ec6dcdd200c3eade374d5bb Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Wed, 5 Oct 2022 11:37:57 -0400 Subject: [PATCH 0447/1477] tracing: Do not free snapshot if tracer is on cmdline [ Upstream commit a541a9559bb0a8ecc434de01d3e4826c32e8bb53 ] The ftrace_boot_snapshot and alloc_snapshot cmdline options allocate the snapshot buffer at boot up for use later. The ftrace_boot_snapshot in particular requires the snapshot to be allocated because it will take a snapshot at the end of boot up allowing to see the traces that happened during boot so that it's not lost when user space takes over. When a tracer is registered (started) there's a path that checks if it requires the snapshot buffer or not, and if it does not and it was allocated it will do a synchronization and free the snapshot buffer. This is only required if the previous tracer was using it for "max latency" snapshots, as it needs to make sure all max snapshots are complete before freeing. But this is only needed if the previous tracer was using the snapshot buffer for latency (like irqoff tracer and friends). But it does not make sense to free it, if the previous tracer was not using it, and the snapshot was allocated by the cmdline parameters. This basically takes away the point of allocating it in the first place! Note, the allocated snapshot worked fine for just trace events, but fails when a tracer is enabled on the cmdline. Further investigation, this goes back even further and it does not require a tracer on the cmdline to fail. Simply enable snapshots and then enable a tracer, and it will remove the snapshot. Link: https://lkml.kernel.org/r/20221005113757.041df7fe@gandalf.local.home Cc: Masami Hiramatsu Cc: Andrew Morton Cc: stable@vger.kernel.org Fixes: 45ad21ca5530 ("tracing: Have trace_array keep track if snapshot buffer is allocated") Reported-by: Ross Zwisler Tested-by: Ross Zwisler Signed-off-by: Steven Rostedt (Google) Signed-off-by: Sasha Levin --- kernel/trace/trace.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 870033f9c198..b7cb9147f0c5 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6008,12 +6008,12 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf) if (tr->current_trace->reset) tr->current_trace->reset(tr); +#ifdef CONFIG_TRACER_MAX_TRACE + had_max_tr = tr->current_trace->use_max_tr; + /* Current trace needs to be nop_trace before synchronize_rcu */ tr->current_trace = &nop_trace; -#ifdef CONFIG_TRACER_MAX_TRACE - had_max_tr = tr->allocated_snapshot; - if (had_max_tr && !t->use_max_tr) { /* * We need to make sure that the update_max_tr sees that @@ -6026,11 +6026,13 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf) free_snapshot(tr); } - if (t->use_max_tr && !had_max_tr) { + if (t->use_max_tr && !tr->allocated_snapshot) { ret = tracing_alloc_snapshot_instance(tr); if (ret < 0) goto out; } +#else + tr->current_trace = &nop_trace; #endif if (t->init) { From 5232411f37d79654db574ff7357c5acc4620f173 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 30 Jul 2021 09:18:04 +0200 Subject: [PATCH 0448/1477] xen: assume XENFEAT_gnttab_map_avail_bits being set for pv guests [ Upstream commit 30dcc56bba911db561c35d4131baf983a41023f8 ] XENFEAT_gnttab_map_avail_bits is always set in Xen 4.0 and newer. Remove coding assuming it might be zero. Signed-off-by: Juergen Gross Acked-by: Peter Zijlstra (Intel) Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20210730071804.4302-4-jgross@suse.com Signed-off-by: Juergen Gross Stable-dep-of: 5c13a4a0291b ("xen/gntdev: Accommodate VMA splitting") Signed-off-by: Sasha Levin --- drivers/xen/gntdev.c | 36 ++---------------------------------- 1 file changed, 2 insertions(+), 34 deletions(-) diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 54fee4087bf1..5dd9d1ac755e 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -289,20 +289,13 @@ static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data) { struct gntdev_grant_map *map = data; unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT; - int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte; + int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte | + (1 << _GNTMAP_guest_avail0); u64 pte_maddr; BUG_ON(pgnr >= map->count); pte_maddr = arbitrary_virt_to_machine(pte).maddr; - /* - * Set the PTE as special to force get_user_pages_fast() fall - * back to the slow path. If this is not supported as part of - * the grant map, it will be done afterwards. - */ - if (xen_feature(XENFEAT_gnttab_map_avail_bits)) - flags |= (1 << _GNTMAP_guest_avail0); - gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr, flags, map->grants[pgnr].ref, map->grants[pgnr].domid); @@ -311,14 +304,6 @@ static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data) return 0; } -#ifdef CONFIG_X86 -static int set_grant_ptes_as_special(pte_t *pte, unsigned long addr, void *data) -{ - set_pte_at(current->mm, addr, pte, pte_mkspecial(*pte)); - return 0; -} -#endif - int gntdev_map_grant_pages(struct gntdev_grant_map *map) { size_t alloced = 0; @@ -1102,23 +1087,6 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) err = vm_map_pages_zero(vma, map->pages, map->count); if (err) goto out_put_map; - } else { -#ifdef CONFIG_X86 - /* - * If the PTEs were not made special by the grant map - * hypercall, do so here. - * - * This is racy since the mapping is already visible - * to userspace but userspace should be well-behaved - * enough to not touch it until the mmap() call - * returns. - */ - if (!xen_feature(XENFEAT_gnttab_map_avail_bits)) { - apply_to_page_range(vma->vm_mm, vma->vm_start, - vma->vm_end - vma->vm_start, - set_grant_ptes_as_special, NULL); - } -#endif } return 0; From 3c6a888e352283a14f37b9b433cd598a1a3a7dd0 Mon Sep 17 00:00:00 2001 From: "M. Vefa Bicakci" Date: Sun, 2 Oct 2022 18:20:06 -0400 Subject: [PATCH 0449/1477] xen/gntdev: Accommodate VMA splitting [ Upstream commit 5c13a4a0291b30191eff9ead8d010e1ca43a4d0c ] Prior to this commit, the gntdev driver code did not handle the following scenario correctly with paravirtualized (PV) Xen domains: * User process sets up a gntdev mapping composed of two grant mappings (i.e., two pages shared by another Xen domain). * User process munmap()s one of the pages. * User process munmap()s the remaining page. * User process exits. In the scenario above, the user process would cause the kernel to log the following messages in dmesg for the first munmap(), and the second munmap() call would result in similar log messages: BUG: Bad page map in process doublemap.test pte:... pmd:... page:0000000057c97bff refcount:1 mapcount:-1 \ mapping:0000000000000000 index:0x0 pfn:... ... page dumped because: bad pte ... file:gntdev fault:0x0 mmap:gntdev_mmap [xen_gntdev] readpage:0x0 ... Call Trace: dump_stack_lvl+0x46/0x5e print_bad_pte.cold+0x66/0xb6 unmap_page_range+0x7e5/0xdc0 unmap_vmas+0x78/0xf0 unmap_region+0xa8/0x110 __do_munmap+0x1ea/0x4e0 __vm_munmap+0x75/0x120 __x64_sys_munmap+0x28/0x40 do_syscall_64+0x38/0x90 entry_SYSCALL_64_after_hwframe+0x61/0xcb ... For each munmap() call, the Xen hypervisor (if built with CONFIG_DEBUG) would print out the following and trigger a general protection fault in the affected Xen PV domain: (XEN) d0v... Attempt to implicitly unmap d0's grant PTE ... (XEN) d0v... Attempt to implicitly unmap d0's grant PTE ... As of this writing, gntdev_grant_map structure's vma field (referred to as map->vma below) is mainly used for checking the start and end addresses of mappings. However, with split VMAs, these may change, and there could be more than one VMA associated with a gntdev mapping. Hence, remove the use of map->vma and rely on map->pages_vm_start for the original start address and on (map->count << PAGE_SHIFT) for the original mapping size. Let the invalidate() and find_special_page() hooks use these. Also, given that there can be multiple VMAs associated with a gntdev mapping, move the "mmu_interval_notifier_remove(&map->notifier)" call to the end of gntdev_put_map, so that the MMU notifier is only removed after the closing of the last remaining VMA. Finally, use an atomic to prevent inadvertent gntdev mapping re-use, instead of using the map->live_grants atomic counter and/or the map->vma pointer (the latter of which is now removed). This prevents the userspace from mmap()'ing (with MAP_FIXED) a gntdev mapping over the same address range as a previously set up gntdev mapping. This scenario can be summarized with the following call-trace, which was valid prior to this commit: mmap gntdev_mmap mmap (repeat mmap with MAP_FIXED over the same address range) gntdev_invalidate unmap_grant_pages (sets 'being_removed' entries to true) gnttab_unmap_refs_async unmap_single_vma gntdev_mmap (maps the shared pages again) munmap gntdev_invalidate unmap_grant_pages (no-op because 'being_removed' entries are true) unmap_single_vma (For PV domains, Xen reports that a granted page is being unmapped and triggers a general protection fault in the affected domain, if Xen was built with CONFIG_DEBUG) The fix for this last scenario could be worth its own commit, but we opted for a single commit, because removing the gntdev_grant_map structure's vma field requires guarding the entry to gntdev_mmap(), and the live_grants atomic counter is not sufficient on its own to prevent the mmap() over a pre-existing mapping. Link: https://github.com/QubesOS/qubes-issues/issues/7631 Fixes: ab31523c2fca ("xen/gntdev: allow usermode to map granted pages") Cc: stable@vger.kernel.org Signed-off-by: M. Vefa Bicakci Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20221002222006.2077-3-m.v.b@runbox.com Signed-off-by: Juergen Gross Signed-off-by: Sasha Levin --- drivers/xen/gntdev-common.h | 3 +- drivers/xen/gntdev.c | 58 ++++++++++++++++--------------------- 2 files changed, 27 insertions(+), 34 deletions(-) diff --git a/drivers/xen/gntdev-common.h b/drivers/xen/gntdev-common.h index 40ef379c28ab..9c286b2a1900 100644 --- a/drivers/xen/gntdev-common.h +++ b/drivers/xen/gntdev-common.h @@ -44,9 +44,10 @@ struct gntdev_unmap_notify { }; struct gntdev_grant_map { + atomic_t in_use; struct mmu_interval_notifier notifier; + bool notifier_init; struct list_head next; - struct vm_area_struct *vma; int index; int count; int flags; diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 5dd9d1ac755e..ff195b571763 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -276,6 +276,9 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map) */ } + if (use_ptemod && map->notifier_init) + mmu_interval_notifier_remove(&map->notifier); + if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) { notify_remote_via_evtchn(map->notify.event); evtchn_put(map->notify.event); @@ -288,7 +291,7 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map) static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data) { struct gntdev_grant_map *map = data; - unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT; + unsigned int pgnr = (addr - map->pages_vm_start) >> PAGE_SHIFT; int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte | (1 << _GNTMAP_guest_avail0); u64 pte_maddr; @@ -478,11 +481,7 @@ static void gntdev_vma_close(struct vm_area_struct *vma) struct gntdev_priv *priv = file->private_data; pr_debug("gntdev_vma_close %p\n", vma); - if (use_ptemod) { - WARN_ON(map->vma != vma); - mmu_interval_notifier_remove(&map->notifier); - map->vma = NULL; - } + vma->vm_private_data = NULL; gntdev_put_map(priv, map); } @@ -510,29 +509,30 @@ static bool gntdev_invalidate(struct mmu_interval_notifier *mn, struct gntdev_grant_map *map = container_of(mn, struct gntdev_grant_map, notifier); unsigned long mstart, mend; + unsigned long map_start, map_end; if (!mmu_notifier_range_blockable(range)) return false; + map_start = map->pages_vm_start; + map_end = map->pages_vm_start + (map->count << PAGE_SHIFT); + /* * If the VMA is split or otherwise changed the notifier is not * updated, but we don't want to process VA's outside the modified * VMA. FIXME: It would be much more understandable to just prevent * modifying the VMA in the first place. */ - if (map->vma->vm_start >= range->end || - map->vma->vm_end <= range->start) + if (map_start >= range->end || map_end <= range->start) return true; - mstart = max(range->start, map->vma->vm_start); - mend = min(range->end, map->vma->vm_end); + mstart = max(range->start, map_start); + mend = min(range->end, map_end); pr_debug("map %d+%d (%lx %lx), range %lx %lx, mrange %lx %lx\n", - map->index, map->count, - map->vma->vm_start, map->vma->vm_end, - range->start, range->end, mstart, mend); - unmap_grant_pages(map, - (mstart - map->vma->vm_start) >> PAGE_SHIFT, - (mend - mstart) >> PAGE_SHIFT); + map->index, map->count, map_start, map_end, + range->start, range->end, mstart, mend); + unmap_grant_pages(map, (mstart - map_start) >> PAGE_SHIFT, + (mend - mstart) >> PAGE_SHIFT); return true; } @@ -1012,18 +1012,15 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) return -EINVAL; pr_debug("map %d+%d at %lx (pgoff %lx)\n", - index, count, vma->vm_start, vma->vm_pgoff); + index, count, vma->vm_start, vma->vm_pgoff); mutex_lock(&priv->lock); map = gntdev_find_map_index(priv, index, count); if (!map) goto unlock_out; - if (use_ptemod && map->vma) + if (!atomic_add_unless(&map->in_use, 1, 1)) goto unlock_out; - if (atomic_read(&map->live_grants)) { - err = -EAGAIN; - goto unlock_out; - } + refcount_inc(&map->users); vma->vm_ops = &gntdev_vmops; @@ -1044,15 +1041,16 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) map->flags |= GNTMAP_readonly; } + map->pages_vm_start = vma->vm_start; + if (use_ptemod) { - map->vma = vma; err = mmu_interval_notifier_insert_locked( &map->notifier, vma->vm_mm, vma->vm_start, vma->vm_end - vma->vm_start, &gntdev_mmu_ops); - if (err) { - map->vma = NULL; + if (err) goto out_unlock_put; - } + + map->notifier_init = true; } mutex_unlock(&priv->lock); @@ -1069,7 +1067,6 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) */ mmu_interval_read_begin(&map->notifier); - map->pages_vm_start = vma->vm_start; err = apply_to_page_range(vma->vm_mm, vma->vm_start, vma->vm_end - vma->vm_start, find_grant_ptes, map); @@ -1098,13 +1095,8 @@ unlock_out: out_unlock_put: mutex_unlock(&priv->lock); out_put_map: - if (use_ptemod) { + if (use_ptemod) unmap_grant_pages(map, 0, map->count); - if (map->vma) { - mmu_interval_notifier_remove(&map->notifier); - map->vma = NULL; - } - } gntdev_put_map(priv, map); return err; } From 7fba4a389d070daf17e18c657c8e31f03cc2486b Mon Sep 17 00:00:00 2001 From: Prathamesh Shete Date: Thu, 6 Oct 2022 18:36:22 +0530 Subject: [PATCH 0450/1477] mmc: sdhci-tegra: Use actual clock rate for SW tuning correction [ Upstream commit b78870e7f41534cc719c295d1f8809aca93aeeab ] Ensure tegra_host member "curr_clk_rate" holds the actual clock rate instead of requested clock rate for proper use during tuning correction algorithm. Actual clk rate may not be the same as the requested clk frequency depending on the parent clock source set. Tuning correction algorithm depends on certain parameters which are sensitive to current clk rate. If the host clk is selected instead of the actual clock rate, tuning correction algorithm may end up applying invalid correction, which could result in errors Fixes: ea8fc5953e8b ("mmc: tegra: update hw tuning process") Signed-off-by: Aniruddha TVS Rao Signed-off-by: Prathamesh Shete Acked-by: Adrian Hunter Acked-by: Thierry Reding Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20221006130622.22900-4-pshete@nvidia.com Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/mmc/host/sdhci-tegra.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c index d50b691f6c44..67211fc42d24 100644 --- a/drivers/mmc/host/sdhci-tegra.c +++ b/drivers/mmc/host/sdhci-tegra.c @@ -760,7 +760,7 @@ static void tegra_sdhci_set_clock(struct sdhci_host *host, unsigned int clock) */ host_clk = tegra_host->ddr_signaling ? clock * 2 : clock; clk_set_rate(pltfm_host->clk, host_clk); - tegra_host->curr_clk_rate = host_clk; + tegra_host->curr_clk_rate = clk_get_rate(pltfm_host->clk); if (tegra_host->ddr_signaling) host->max_clk = host_clk; else From 0e4c06ae7c546764eb4769ede647ef71a2fd363d Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Wed, 25 Nov 2020 19:44:15 +0800 Subject: [PATCH 0451/1477] riscv: Add machine name to kernel boot log and stack dump output [ Upstream commit 46ad48e8a28da7cc37a16c7e7fc632ecf906e4bf ] Add the machine name to kernel boot-up log, and install the machine name to stack dump for DT boot mode. Signed-off-by: Kefeng Wang Reviewed-by: Atish Patra Signed-off-by: Palmer Dabbelt Stable-dep-of: 10f6913c548b ("riscv: always honor the CONFIG_CMDLINE_FORCE when parsing dtb") Signed-off-by: Sasha Levin --- arch/riscv/kernel/setup.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 117f3212a8e4..5d17d3ce36fd 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -54,8 +54,15 @@ static DEFINE_PER_CPU(struct cpu, cpu_devices); static void __init parse_dtb(void) { /* Early scan of device tree from init memory */ - if (early_init_dt_scan(dtb_early_va)) + if (early_init_dt_scan(dtb_early_va)) { + const char *name = of_flat_dt_get_machine_name(); + + if (name) { + pr_info("Machine model: %s\n", name); + dump_stack_set_arch_desc("%s (DT)", name); + } return; + } pr_err("No DTB passed to the kernel\n"); #ifdef CONFIG_CMDLINE_FORCE From 86e995f964f6ebc1ff48b0e1649f03b2c8931088 Mon Sep 17 00:00:00 2001 From: Wenting Zhang Date: Fri, 8 Jul 2022 16:38:22 -0400 Subject: [PATCH 0452/1477] riscv: always honor the CONFIG_CMDLINE_FORCE when parsing dtb MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 10f6913c548b32ecb73801a16b120e761c6957ea ] When CONFIG_CMDLINE_FORCE is enabled, cmdline provided by CONFIG_CMDLINE are always used. This allows CONFIG_CMDLINE to be used regardless of the result of device tree scanning. This especially fixes the case where a device tree without the chosen node is supplied to the kernel. In such cases, early_init_dt_scan would return true. But inside early_init_dt_scan_chosen, the cmdline won't be updated as there is no chosen node in the device tree. As a result, CONFIG_CMDLINE is not copied into boot_command_line even if CONFIG_CMDLINE_FORCE is enabled. This commit allows properly update boot_command_line in this situation. Fixes: 8fd6e05c7463 ("arch: riscv: support kernel command line forcing when no DTB passed") Signed-off-by: Wenting Zhang Reviewed-by: Björn Töpel Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/PSBPR04MB399135DFC54928AB958D0638B1829@PSBPR04MB3991.apcprd04.prod.outlook.com Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt Signed-off-by: Sasha Levin --- arch/riscv/kernel/setup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 5d17d3ce36fd..cc85858f7fe8 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -61,10 +61,10 @@ static void __init parse_dtb(void) pr_info("Machine model: %s\n", name); dump_stack_set_arch_desc("%s (DT)", name); } - return; + } else { + pr_err("No DTB passed to the kernel\n"); } - pr_err("No DTB passed to the kernel\n"); #ifdef CONFIG_CMDLINE_FORCE strlcpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE); pr_info("Forcing kernel command line to: %s\n", boot_command_line); From dea47fefa6aa87256c8a46137c742060940a4197 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Wed, 10 Mar 2021 13:11:38 +0800 Subject: [PATCH 0453/1477] perf pmu: Validate raw event with sysfs exported format bits [ Upstream commit e40647762fb5881360874e08e03e972d58d63c42 ] A raw PMU event (eventsel+umask) in the form of rNNN is supported by perf but lacks of checking for the validity of raw encoding. For example, bit 16 and bit 17 are not valid on KBL but perf doesn't report warning when encoding with these bits. Before: # ./perf stat -e cpu/r031234/ -a -- sleep 1 Performance counter stats for 'system wide': 0 cpu/r031234/ 1.003798924 seconds time elapsed It may silently measure the wrong event! The kernel supported bits have been exported through /sys/devices//format/. Perf collects the information to 'struct perf_pmu_format' and links it to 'pmu->format' list. The 'struct perf_pmu_format' has a bitmap which records the valid bits for this format. For example, root@kbl-ppc:/sys/devices/cpu/format# cat umask config:8-15 The valid bits (bit8-bit15) are recorded in bitmap of format 'umask'. We collect total valid bits of all formats, save to a local variable 'masks' and reverse it. Now '~masks' represents total invalid bits. bits = config & ~masks; The set bits in 'bits' indicate the invalid bits used in config. Finally we use bitmap_scnprintf to report the invalid bits. Some architectures may not export supported bits through sysfs, so if masks is 0, perf_pmu__warn_invalid_config directly returns. After: Single event without name: # ./perf stat -e cpu/r031234/ -a -- sleep 1 WARNING: event 'N/A' not valid (bits 16-17 of config '31234' not supported by kernel)! Performance counter stats for 'system wide': 0 cpu/r031234/ 1.001597373 seconds time elapsed Multiple events with names: # ./perf stat -e cpu/rf01234,name=aaa/,cpu/r031234,name=bbb/ -a -- sleep 1 WARNING: event 'aaa' not valid (bits 20,22 of config 'f01234' not supported by kernel)! WARNING: event 'bbb' not valid (bits 16-17 of config '31234' not supported by kernel)! Performance counter stats for 'system wide': 0 aaa 0 bbb 1.001573787 seconds time elapsed Warnings are reported for invalid bits. Co-developed-by: Jiri Olsa Signed-off-by: Jin Yao Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210310051138.12154-1-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo Stable-dep-of: e552b7be12ed ("perf: Skip and warn on unknown format 'configN' attrs") Signed-off-by: Sasha Levin --- tools/perf/util/parse-events.c | 3 +++ tools/perf/util/pmu.c | 33 +++++++++++++++++++++++++++++++++ tools/perf/util/pmu.h | 3 +++ 3 files changed, 39 insertions(+) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 3a0a7930cd10..36969fc8f1fc 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -356,6 +356,9 @@ __add_event(struct list_head *list, int *idx, struct perf_cpu_map *cpus = pmu ? perf_cpu_map__get(pmu->cpus) : cpu_list ? perf_cpu_map__new(cpu_list) : NULL; + if (pmu && attr->type == PERF_TYPE_RAW) + perf_pmu__warn_invalid_config(pmu, attr->config, name); + if (init_attr) event_attr_init(attr); diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index d41caeb35cf6..349012f7defb 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1716,3 +1716,36 @@ int perf_pmu__caps_parse(struct perf_pmu *pmu) return nr_caps; } + +void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config, + char *name) +{ + struct perf_pmu_format *format; + __u64 masks = 0, bits; + char buf[100]; + unsigned int i; + + list_for_each_entry(format, &pmu->format, list) { + if (format->value != PERF_PMU_FORMAT_VALUE_CONFIG) + continue; + + for_each_set_bit(i, format->bits, PERF_PMU_FORMAT_BITS) + masks |= 1ULL << i; + } + + /* + * Kernel doesn't export any valid format bits. + */ + if (masks == 0) + return; + + bits = config & ~masks; + if (bits == 0) + return; + + bitmap_scnprintf((unsigned long *)&bits, sizeof(bits) * 8, buf, sizeof(buf)); + + pr_warning("WARNING: event '%s' not valid (bits %s of config " + "'%llx' not supported by kernel)!\n", + name ?: "N/A", buf, config); +} diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index a64e9c9ce731..d9aa8c958d21 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -120,4 +120,7 @@ int perf_pmu__convert_scale(const char *scale, char **end, double *sval); int perf_pmu__caps_parse(struct perf_pmu *pmu); +void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config, + char *name); + #endif /* __PMU_H */ From ca4c49838278344854792bec2645b01e50471ccf Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 4 Oct 2022 14:12:35 -0500 Subject: [PATCH 0454/1477] perf: Skip and warn on unknown format 'configN' attrs [ Upstream commit e552b7be12ed62357df84392efa525ecb01910fb ] If the kernel exposes a new perf_event_attr field in a format attr, perf will return an error stating the specified PMU can't be found. For example, a format attr with 'config3:0-63' causes an error as config3 is unknown to perf. This causes a compatibility issue between a newer kernel with older perf tool. Before this change with a kernel adding 'config3' I get: $ perf record -e arm_spe// -- true event syntax error: 'arm_spe//' \___ Cannot find PMU `arm_spe'. Missing kernel support? Run 'perf list' for a list of valid events Usage: perf record [] [] or: perf record [] -- [] -e, --event event selector. use 'perf list' to list available events After this change, I get: $ perf record -e arm_spe// -- true WARNING: 'arm_spe_0' format 'inv_event_filter' requires 'perf_event_attr::config3' which is not supported by this version of perf! [ perf record: Woken up 2 times to write data ] [ perf record: Captured and wrote 0.091 MB perf.data ] To support unknown configN formats, rework the YACC implementation to pass any config[0-9]+ format to perf_pmu__new_format() to handle with a warning. Reviewed-by: Namhyung Kim Signed-off-by: Rob Herring Tested-by: Leo Yan Cc: Alexander Shishkin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220914-arm-perf-tool-spe1-2-v2-v4-1-83c098e6212e@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/parse-events.c | 3 +++ tools/perf/util/pmu.c | 17 +++++++++++++++++ tools/perf/util/pmu.h | 2 ++ tools/perf/util/pmu.l | 2 -- tools/perf/util/pmu.y | 15 ++++----------- 5 files changed, 26 insertions(+), 13 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 36969fc8f1fc..c56a4d9c3be9 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -356,6 +356,9 @@ __add_event(struct list_head *list, int *idx, struct perf_cpu_map *cpus = pmu ? perf_cpu_map__get(pmu->cpus) : cpu_list ? perf_cpu_map__new(cpu_list) : NULL; + if (pmu) + perf_pmu__warn_invalid_formats(pmu); + if (pmu && attr->type == PERF_TYPE_RAW) perf_pmu__warn_invalid_config(pmu, attr->config, name); diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 349012f7defb..ac45da0302a7 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -862,6 +862,23 @@ static struct perf_pmu *pmu_lookup(const char *name) return pmu; } +void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu) +{ + struct perf_pmu_format *format; + + /* fake pmu doesn't have format list */ + if (pmu == &perf_pmu__fake) + return; + + list_for_each_entry(format, &pmu->format, list) + if (format->value >= PERF_PMU_FORMAT_VALUE_CONFIG_END) { + pr_warning("WARNING: '%s' format '%s' requires 'perf_event_attr::config%d'" + "which is not supported by this version of perf!\n", + pmu->name, format->name, format->value); + return; + } +} + static struct perf_pmu *pmu_find(const char *name) { struct perf_pmu *pmu; diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index d9aa8c958d21..7d208b850769 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -15,6 +15,7 @@ enum { PERF_PMU_FORMAT_VALUE_CONFIG, PERF_PMU_FORMAT_VALUE_CONFIG1, PERF_PMU_FORMAT_VALUE_CONFIG2, + PERF_PMU_FORMAT_VALUE_CONFIG_END, }; #define PERF_PMU_FORMAT_BITS 64 @@ -122,5 +123,6 @@ int perf_pmu__caps_parse(struct perf_pmu *pmu); void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config, char *name); +void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu); #endif /* __PMU_H */ diff --git a/tools/perf/util/pmu.l b/tools/perf/util/pmu.l index a15d9fbd7c0e..58b4926cfaca 100644 --- a/tools/perf/util/pmu.l +++ b/tools/perf/util/pmu.l @@ -27,8 +27,6 @@ num_dec [0-9]+ {num_dec} { return value(10); } config { return PP_CONFIG; } -config1 { return PP_CONFIG1; } -config2 { return PP_CONFIG2; } - { return '-'; } : { return ':'; } , { return ','; } diff --git a/tools/perf/util/pmu.y b/tools/perf/util/pmu.y index bfd7e8509869..283efe059819 100644 --- a/tools/perf/util/pmu.y +++ b/tools/perf/util/pmu.y @@ -20,7 +20,7 @@ do { \ %} -%token PP_CONFIG PP_CONFIG1 PP_CONFIG2 +%token PP_CONFIG %token PP_VALUE PP_ERROR %type PP_VALUE %type bit_term @@ -47,18 +47,11 @@ PP_CONFIG ':' bits $3)); } | -PP_CONFIG1 ':' bits +PP_CONFIG PP_VALUE ':' bits { ABORT_ON(perf_pmu__new_format(format, name, - PERF_PMU_FORMAT_VALUE_CONFIG1, - $3)); -} -| -PP_CONFIG2 ':' bits -{ - ABORT_ON(perf_pmu__new_format(format, name, - PERF_PMU_FORMAT_VALUE_CONFIG2, - $3)); + $2, + $4)); } bits: From b1efc196446ae0e331045ad0ae9149021bc1642f Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Wed, 3 Feb 2021 15:41:56 +0300 Subject: [PATCH 0455/1477] fcntl: make F_GETOWN(EX) return 0 on dead owner task [ Upstream commit cc4a3f885e8f2bc3c86a265972e94fef32d68f67 ] Currently there is no way to differentiate the file with alive owner from the file with dead owner but pid of the owner reused. That's why CRIU can't actually know if it needs to restore file owner or not, because if it restores owner but actual owner was dead, this can introduce unexpected signals to the "false"-owner (which reused the pid). Let's change the api, so that F_GETOWN(EX) returns 0 in case actual owner is dead already. This comports with the POSIX spec, which states that a PID of 0 indicates that no signal will be sent. Cc: Jeff Layton Cc: "J. Bruce Fields" Cc: Alexander Viro Cc: linux-fsdevel@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: Cyrill Gorcunov Cc: Andrei Vagin Signed-off-by: Pavel Tikhomirov Signed-off-by: Jeff Layton Stable-dep-of: f671a691e299 ("fcntl: fix potential deadlocks for &fown_struct.lock") Signed-off-by: Sasha Levin --- fs/fcntl.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/fs/fcntl.c b/fs/fcntl.c index 71b43538fa44..5a56351f1fc3 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -148,11 +148,15 @@ void f_delown(struct file *filp) pid_t f_getown(struct file *filp) { - pid_t pid; + pid_t pid = 0; read_lock(&filp->f_owner.lock); - pid = pid_vnr(filp->f_owner.pid); - if (filp->f_owner.pid_type == PIDTYPE_PGID) - pid = -pid; + rcu_read_lock(); + if (pid_task(filp->f_owner.pid, filp->f_owner.pid_type)) { + pid = pid_vnr(filp->f_owner.pid); + if (filp->f_owner.pid_type == PIDTYPE_PGID) + pid = -pid; + } + rcu_read_unlock(); read_unlock(&filp->f_owner.lock); return pid; } @@ -200,11 +204,14 @@ static int f_setown_ex(struct file *filp, unsigned long arg) static int f_getown_ex(struct file *filp, unsigned long arg) { struct f_owner_ex __user *owner_p = (void __user *)arg; - struct f_owner_ex owner; + struct f_owner_ex owner = {}; int ret = 0; read_lock(&filp->f_owner.lock); - owner.pid = pid_vnr(filp->f_owner.pid); + rcu_read_lock(); + if (pid_task(filp->f_owner.pid, filp->f_owner.pid_type)) + owner.pid = pid_vnr(filp->f_owner.pid); + rcu_read_unlock(); switch (filp->f_owner.pid_type) { case PIDTYPE_PID: owner.type = F_OWNER_TID; From f687e2111b6f1745bb32b7575224fe564d45b8b8 Mon Sep 17 00:00:00 2001 From: Desmond Cheong Zhi Xi Date: Fri, 2 Jul 2021 17:18:30 +0800 Subject: [PATCH 0456/1477] fcntl: fix potential deadlocks for &fown_struct.lock [ Upstream commit f671a691e299f58835d4660d642582bf0e8f6fda ] Syzbot reports a potential deadlock in do_fcntl: ======================================================== WARNING: possible irq lock inversion dependency detected 5.12.0-syzkaller #0 Not tainted -------------------------------------------------------- syz-executor132/8391 just changed the state of lock: ffff888015967bf8 (&f->f_owner.lock){.+..}-{2:2}, at: f_getown_ex fs/fcntl.c:211 [inline] ffff888015967bf8 (&f->f_owner.lock){.+..}-{2:2}, at: do_fcntl+0x8b4/0x1200 fs/fcntl.c:395 but this lock was taken by another, HARDIRQ-safe lock in the past: (&dev->event_lock){-...}-{2:2} and interrupts could create inverse lock ordering between them. other info that might help us debug this: Chain exists of: &dev->event_lock --> &new->fa_lock --> &f->f_owner.lock Possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&f->f_owner.lock); local_irq_disable(); lock(&dev->event_lock); lock(&new->fa_lock); lock(&dev->event_lock); *** DEADLOCK *** This happens because there is a lock hierarchy of &dev->event_lock --> &new->fa_lock --> &f->f_owner.lock from the following call chain: input_inject_event(): spin_lock_irqsave(&dev->event_lock,...); input_handle_event(): input_pass_values(): input_to_handler(): evdev_events(): evdev_pass_values(): spin_lock(&client->buffer_lock); __pass_event(): kill_fasync(): kill_fasync_rcu(): read_lock(&fa->fa_lock); send_sigio(): read_lock_irqsave(&fown->lock,...); However, since &dev->event_lock is HARDIRQ-safe, interrupts have to be disabled while grabbing &f->f_owner.lock, otherwise we invert the lock hierarchy. Hence, we replace calls to read_lock/read_unlock on &f->f_owner.lock, with read_lock_irq/read_unlock_irq. Reported-and-tested-by: syzbot+e6d5398a02c516ce5e70@syzkaller.appspotmail.com Signed-off-by: Desmond Cheong Zhi Xi Signed-off-by: Jeff Layton Signed-off-by: Sasha Levin --- fs/fcntl.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/fcntl.c b/fs/fcntl.c index 5a56351f1fc3..fcf34f83bf6a 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -149,7 +149,8 @@ void f_delown(struct file *filp) pid_t f_getown(struct file *filp) { pid_t pid = 0; - read_lock(&filp->f_owner.lock); + + read_lock_irq(&filp->f_owner.lock); rcu_read_lock(); if (pid_task(filp->f_owner.pid, filp->f_owner.pid_type)) { pid = pid_vnr(filp->f_owner.pid); @@ -157,7 +158,7 @@ pid_t f_getown(struct file *filp) pid = -pid; } rcu_read_unlock(); - read_unlock(&filp->f_owner.lock); + read_unlock_irq(&filp->f_owner.lock); return pid; } @@ -207,7 +208,7 @@ static int f_getown_ex(struct file *filp, unsigned long arg) struct f_owner_ex owner = {}; int ret = 0; - read_lock(&filp->f_owner.lock); + read_lock_irq(&filp->f_owner.lock); rcu_read_lock(); if (pid_task(filp->f_owner.pid, filp->f_owner.pid_type)) owner.pid = pid_vnr(filp->f_owner.pid); @@ -230,7 +231,7 @@ static int f_getown_ex(struct file *filp, unsigned long arg) ret = -EINVAL; break; } - read_unlock(&filp->f_owner.lock); + read_unlock_irq(&filp->f_owner.lock); if (!ret) { ret = copy_to_user(owner_p, &owner, sizeof(owner)); @@ -248,10 +249,10 @@ static int f_getowner_uids(struct file *filp, unsigned long arg) uid_t src[2]; int err; - read_lock(&filp->f_owner.lock); + read_lock_irq(&filp->f_owner.lock); src[0] = from_kuid(user_ns, filp->f_owner.uid); src[1] = from_kuid(user_ns, filp->f_owner.euid); - read_unlock(&filp->f_owner.lock); + read_unlock_irq(&filp->f_owner.lock); err = put_user(src[0], &dst[0]); err |= put_user(src[1], &dst[1]); From cb1024d8a4d09198985adb252905b2f4039d79d5 Mon Sep 17 00:00:00 2001 From: Sibi Sankar Date: Thu, 15 Oct 2020 23:57:56 +0530 Subject: [PATCH 0457/1477] arm64: dts: qcom: sc7180-trogdor: Fixup modem memory region commit ef9a5d188d663753e73a3c8e8910ceab8e9305c4 upstream. The modem firmware memory requirements vary between 32M/140M on no-lte/lte skus respectively, so fixup the modem memory region to reflect the requirements. Reviewed-by: Evan Green Signed-off-by: Sibi Sankar Link: https://lore.kernel.org/r/1602786476-27833-1-git-send-email-sibis@codeaurora.org Signed-off-by: Bjorn Andersson Acked-by: Alex Elder Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/sc7180-trogdor-lte-sku.dtsi | 4 ++++ arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sc7180-trogdor-lte-sku.dtsi b/arch/arm64/boot/dts/qcom/sc7180-trogdor-lte-sku.dtsi index 44956e3165a1..469aad4e5948 100644 --- a/arch/arm64/boot/dts/qcom/sc7180-trogdor-lte-sku.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7180-trogdor-lte-sku.dtsi @@ -9,6 +9,10 @@ label = "proximity-wifi-lte"; }; +&mpss_mem { + reg = <0x0 0x86000000 0x0 0x8c00000>; +}; + &remoteproc_mpss { firmware-name = "qcom/sc7180-trogdor/modem/mba.mbn", "qcom/sc7180-trogdor/modem/qdsp6sw.mbn"; diff --git a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi index 5b2a616c6257..cb2c47f13a8a 100644 --- a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi @@ -39,7 +39,7 @@ }; mpss_mem: memory@86000000 { - reg = <0x0 0x86000000 0x0 0x8c00000>; + reg = <0x0 0x86000000 0x0 0x2000000>; no-map; }; From a6e770733dc4ce616ff77f43f50507fa2034ff49 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 19 Oct 2022 13:53:02 +0100 Subject: [PATCH 0458/1477] arm64: topology: move store_cpu_topology() to shared code commit 456797da792fa7cbf6698febf275fe9b36691f78 upstream. arm64's method of defining a default cpu topology requires only minimal changes to apply to RISC-V also. The current arm64 implementation exits early in a uniprocessor configuration by reading MPIDR & claiming that uniprocessor can rely on the default values. This is appears to be a hangover from prior to '3102bc0e6ac7 ("arm64: topology: Stop using MPIDR for topology information")', because the current code just assigns default values for multiprocessor systems. With the MPIDR references removed, store_cpu_topolgy() can be moved to the common arch_topology code. Reviewed-by: Sudeep Holla Acked-by: Catalin Marinas Reviewed-by: Atish Patra Signed-off-by: Conor Dooley Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/topology.c | 40 ------------------------------------ drivers/base/arch_topology.c | 19 +++++++++++++++++ 2 files changed, 19 insertions(+), 40 deletions(-) diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 4358bc319306..f35af19b7055 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -22,46 +22,6 @@ #include #include -void store_cpu_topology(unsigned int cpuid) -{ - struct cpu_topology *cpuid_topo = &cpu_topology[cpuid]; - u64 mpidr; - - if (cpuid_topo->package_id != -1) - goto topology_populated; - - mpidr = read_cpuid_mpidr(); - - /* Uniprocessor systems can rely on default topology values */ - if (mpidr & MPIDR_UP_BITMASK) - return; - - /* - * This would be the place to create cpu topology based on MPIDR. - * - * However, it cannot be trusted to depict the actual topology; some - * pieces of the architecture enforce an artificial cap on Aff0 values - * (e.g. GICv3's ICC_SGI1R_EL1 limits it to 15), leading to an - * artificial cycling of Aff1, Aff2 and Aff3 values. IOW, these end up - * having absolutely no relationship to the actual underlying system - * topology, and cannot be reasonably used as core / package ID. - * - * If the MT bit is set, Aff0 *could* be used to define a thread ID, but - * we still wouldn't be able to obtain a sane core ID. This means we - * need to entirely ignore MPIDR for any topology deduction. - */ - cpuid_topo->thread_id = -1; - cpuid_topo->core_id = cpuid; - cpuid_topo->package_id = cpu_to_node(cpuid); - - pr_debug("CPU%u: cluster %d core %d thread %d mpidr %#016llx\n", - cpuid, cpuid_topo->package_id, cpuid_topo->core_id, - cpuid_topo->thread_id, mpidr); - -topology_populated: - update_siblings_masks(cpuid); -} - #ifdef CONFIG_ACPI static bool __init acpi_cpu_is_threaded(int cpu) { diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 8272a3a002a3..51647926e605 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -596,4 +596,23 @@ void __init init_cpu_topology(void) else if (of_have_populated_dt() && parse_dt_topology()) reset_cpu_topology(); } + +void store_cpu_topology(unsigned int cpuid) +{ + struct cpu_topology *cpuid_topo = &cpu_topology[cpuid]; + + if (cpuid_topo->package_id != -1) + goto topology_populated; + + cpuid_topo->thread_id = -1; + cpuid_topo->core_id = cpuid; + cpuid_topo->package_id = cpu_to_node(cpuid); + + pr_debug("CPU%u: package %d core %d thread %d\n", + cpuid, cpuid_topo->package_id, cpuid_topo->core_id, + cpuid_topo->thread_id); + +topology_populated: + update_siblings_masks(cpuid); +} #endif From 79c3482fbe20e39630b0f95e1ffeb846aa945c41 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 19 Oct 2022 13:53:03 +0100 Subject: [PATCH 0459/1477] riscv: topology: fix default topology reporting commit fbd92809997a391f28075f1c8b5ee314c225557c upstream. RISC-V has no sane defaults to fall back on where there is no cpu-map in the devicetree. Without sane defaults, the package, core and thread IDs are all set to -1. This causes user-visible inaccuracies for tools like hwloc/lstopo which rely on the sysfs cpu topology files to detect a system's topology. On a PolarFire SoC, which should have 4 harts with a thread each, lstopo currently reports: Machine (793MB total) Package L#0 NUMANode L#0 (P#0 793MB) Core L#0 L1d L#0 (32KB) + L1i L#0 (32KB) + PU L#0 (P#0) L1d L#1 (32KB) + L1i L#1 (32KB) + PU L#1 (P#1) L1d L#2 (32KB) + L1i L#2 (32KB) + PU L#2 (P#2) L1d L#3 (32KB) + L1i L#3 (32KB) + PU L#3 (P#3) Adding calls to store_cpu_topology() in {boot,smp} hart bringup code results in the correct topolgy being reported: Machine (793MB total) Package L#0 NUMANode L#0 (P#0 793MB) L1d L#0 (32KB) + L1i L#0 (32KB) + Core L#0 + PU L#0 (P#0) L1d L#1 (32KB) + L1i L#1 (32KB) + Core L#1 + PU L#1 (P#1) L1d L#2 (32KB) + L1i L#2 (32KB) + Core L#2 + PU L#2 (P#2) L1d L#3 (32KB) + L1i L#3 (32KB) + Core L#3 + PU L#3 (P#3) CC: stable@vger.kernel.org # 456797da792f: arm64: topology: move store_cpu_topology() to shared code Fixes: 03f11f03dbfe ("RISC-V: Parse cpu topology during boot.") Reported-by: Brice Goglin Link: https://github.com/open-mpi/hwloc/issues/536 Reviewed-by: Sudeep Holla Reviewed-by: Atish Patra Signed-off-by: Conor Dooley Signed-off-by: Greg Kroah-Hartman --- arch/riscv/Kconfig | 2 +- arch/riscv/kernel/smpboot.c | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 1b894c327578..557c4a8c4087 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -35,7 +35,7 @@ config RISCV select CLINT_TIMER if !MMU select COMMON_CLK select EDAC_SUPPORT - select GENERIC_ARCH_TOPOLOGY if SMP + select GENERIC_ARCH_TOPOLOGY select GENERIC_ATOMIC64 if !64BIT select GENERIC_CLOCKEVENTS select GENERIC_EARLY_IOREMAP diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index 0b04e0eae3ab..0e0aed380e28 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -46,6 +46,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus) int cpuid; int ret; + store_cpu_topology(smp_processor_id()); + /* This covers non-smp usecase mandated by "nosmp" option */ if (max_cpus == 0) return; @@ -152,8 +154,8 @@ asmlinkage __visible void smp_callin(void) mmgrab(mm); current->active_mm = mm; + store_cpu_topology(curr_cpuid); notify_cpu_starting(curr_cpuid); - update_siblings_masks(curr_cpuid); set_cpu_online(curr_cpuid, 1); /* From da54c5f4b5b547bab4c38f3356d0f779d9ad2098 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 31 Jan 2022 09:24:49 +0200 Subject: [PATCH 0460/1477] perf/x86/intel/pt: Relax address filter validation commit c243cecb58e3905baeace8827201c14df8481e2a upstream. The requirement for 64-bit address filters is that they are canonical addresses. In other respects any address range is allowed which would include user space addresses. That can be useful for tracing virtual machine guests because address filtering can be used to advantage in place of current privilege level (CPL) filtering. Signed-off-by: Adrian Hunter Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220131072453.2839535-2-adrian.hunter@intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/pt.c | 63 ++++++++++++++++++++++++++++++-------- 1 file changed, 50 insertions(+), 13 deletions(-) diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index cc3b79c06685..95234f46b0fb 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -13,6 +13,8 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include +#include +#include #include #include @@ -1348,11 +1350,37 @@ static void pt_addr_filters_fini(struct perf_event *event) event->hw.addr_filters = NULL; } -static inline bool valid_kernel_ip(unsigned long ip) +#ifdef CONFIG_X86_64 +static u64 canonical_address(u64 vaddr, u8 vaddr_bits) { - return virt_addr_valid(ip) && kernel_ip(ip); + return ((s64)vaddr << (64 - vaddr_bits)) >> (64 - vaddr_bits); } +static u64 is_canonical_address(u64 vaddr, u8 vaddr_bits) +{ + return canonical_address(vaddr, vaddr_bits) == vaddr; +} + +/* Clamp to a canonical address greater-than-or-equal-to the address given */ +static u64 clamp_to_ge_canonical_addr(u64 vaddr, u8 vaddr_bits) +{ + return is_canonical_address(vaddr, vaddr_bits) ? + vaddr : + -BIT_ULL(vaddr_bits - 1); +} + +/* Clamp to a canonical address less-than-or-equal-to the address given */ +static u64 clamp_to_le_canonical_addr(u64 vaddr, u8 vaddr_bits) +{ + return is_canonical_address(vaddr, vaddr_bits) ? + vaddr : + BIT_ULL(vaddr_bits - 1) - 1; +} +#else +#define clamp_to_ge_canonical_addr(x, y) (x) +#define clamp_to_le_canonical_addr(x, y) (x) +#endif + static int pt_event_addr_filters_validate(struct list_head *filters) { struct perf_addr_filter *filter; @@ -1367,14 +1395,6 @@ static int pt_event_addr_filters_validate(struct list_head *filters) filter->action == PERF_ADDR_FILTER_ACTION_START) return -EOPNOTSUPP; - if (!filter->path.dentry) { - if (!valid_kernel_ip(filter->offset)) - return -EINVAL; - - if (!valid_kernel_ip(filter->offset + filter->size)) - return -EINVAL; - } - if (++range > intel_pt_validate_hw_cap(PT_CAP_num_address_ranges)) return -EOPNOTSUPP; } @@ -1398,9 +1418,26 @@ static void pt_event_addr_filters_sync(struct perf_event *event) if (filter->path.dentry && !fr[range].start) { msr_a = msr_b = 0; } else { - /* apply the offset */ - msr_a = fr[range].start; - msr_b = msr_a + fr[range].size - 1; + unsigned long n = fr[range].size - 1; + unsigned long a = fr[range].start; + unsigned long b; + + if (a > ULONG_MAX - n) + b = ULONG_MAX; + else + b = a + n; + /* + * Apply the offset. 64-bit addresses written to the + * MSRs must be canonical, but the range can encompass + * non-canonical addresses. Since software cannot + * execute at non-canonical addresses, adjusting to + * canonical addresses does not affect the result of the + * address filter. + */ + msr_a = clamp_to_ge_canonical_addr(a, boot_cpu_data.x86_virt_bits); + msr_b = clamp_to_le_canonical_addr(b, boot_cpu_data.x86_virt_bits); + if (msr_b < msr_a) + msr_a = msr_b = 0; } filters->filter[range].msr_a = msr_a; From dcaf6313202a02cee13302a04a9b0503352ddef2 Mon Sep 17 00:00:00 2001 From: Gaurav Kohli Date: Wed, 5 Oct 2022 22:52:59 -0700 Subject: [PATCH 0461/1477] hv_netvsc: Fix race between VF offering and VF association message from host commit 365e1ececb2905f94cc10a5817c5b644a32a3ae2 upstream. During vm boot, there might be possibility that vf registration call comes before the vf association from host to vm. And this might break netvsc vf path, To prevent the same block vf registration until vf bind message comes from host. Cc: stable@vger.kernel.org Fixes: 00d7ddba11436 ("hv_netvsc: pair VF based on serial number") Reviewed-by: Haiyang Zhang Signed-off-by: Gaurav Kohli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/hyperv/hyperv_net.h | 3 ++- drivers/net/hyperv/netvsc.c | 4 ++++ drivers/net/hyperv/netvsc_drv.c | 20 ++++++++++++++++++++ 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index a0f338cf1424..367878493e70 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -977,7 +977,8 @@ struct net_device_context { u32 vf_alloc; /* Serial number of the VF to team with */ u32 vf_serial; - + /* completion variable to confirm vf association */ + struct completion vf_add; /* Is the current data path through the VF NIC? */ bool data_path_is_vf; diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 6a7ab930ef70..d15da8287df3 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -1327,6 +1327,10 @@ static void netvsc_send_vf(struct net_device *ndev, net_device_ctx->vf_alloc = nvmsg->msg.v4_msg.vf_assoc.allocated; net_device_ctx->vf_serial = nvmsg->msg.v4_msg.vf_assoc.serial; + + if (net_device_ctx->vf_alloc) + complete(&net_device_ctx->vf_add); + netdev_info(ndev, "VF slot %u %s\n", net_device_ctx->vf_serial, net_device_ctx->vf_alloc ? "added" : "removed"); diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 18484370da0d..f2020be43cfe 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -2290,6 +2290,7 @@ static struct net_device *get_netvsc_byslot(const struct net_device *vf_netdev) { struct device *parent = vf_netdev->dev.parent; struct net_device_context *ndev_ctx; + struct net_device *ndev; struct pci_dev *pdev; u32 serial; @@ -2316,6 +2317,18 @@ static struct net_device *get_netvsc_byslot(const struct net_device *vf_netdev) return hv_get_drvdata(ndev_ctx->device_ctx); } + /* Fallback path to check synthetic vf with + * help of mac addr + */ + list_for_each_entry(ndev_ctx, &netvsc_dev_list, list) { + ndev = hv_get_drvdata(ndev_ctx->device_ctx); + if (ether_addr_equal(vf_netdev->perm_addr, ndev->perm_addr)) { + netdev_notice(vf_netdev, + "falling back to mac addr based matching\n"); + return ndev; + } + } + netdev_notice(vf_netdev, "no netdev found for vf serial:%u\n", serial); return NULL; @@ -2406,6 +2419,11 @@ static int netvsc_vf_changed(struct net_device *vf_netdev) return NOTIFY_OK; net_device_ctx->data_path_is_vf = vf_is_up; + if (vf_is_up && !net_device_ctx->vf_alloc) { + netdev_info(ndev, "Waiting for the VF association from host\n"); + wait_for_completion(&net_device_ctx->vf_add); + } + netvsc_switch_datapath(ndev, vf_is_up); netdev_info(ndev, "Data path switched %s VF: %s\n", vf_is_up ? "to" : "from", vf_netdev->name); @@ -2429,6 +2447,7 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev) netvsc_vf_setxdp(vf_netdev, NULL); + reinit_completion(&net_device_ctx->vf_add); netdev_rx_handler_unregister(vf_netdev); netdev_upper_dev_unlink(vf_netdev, ndev); RCU_INIT_POINTER(net_device_ctx->vf_netdev, NULL); @@ -2466,6 +2485,7 @@ static int netvsc_probe(struct hv_device *dev, INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_link_change); + init_completion(&net_device_ctx->vf_add); spin_lock_init(&net_device_ctx->lock); INIT_LIST_HEAD(&net_device_ctx->reconfig_events); INIT_DELAYED_WORK(&net_device_ctx->vf_takeover, netvsc_vf_setup); From 67dafece56b6e78ccf956064b50f08c80dd6134e Mon Sep 17 00:00:00 2001 From: Werner Sembach Date: Wed, 26 Oct 2022 17:22:46 +0200 Subject: [PATCH 0462/1477] ACPI: video: Force backlight native for more TongFang devices commit 3dbc80a3e4c55c4a5b89ef207bed7b7de36157b4 upstream. This commit is very different from the upstream commit! It fixes the same issue by adding more quirks, rather then the general fix from the 6.1 kernel, because the general fix from the 6.1 kernel is part of a larger refactoring of the backlight code which is not suitable for the stable series. As described in "ACPI: video: Drop NL5x?U, PF4NU1F and PF5?U?? acpi_backlight=native quirks" (10212754a0d2) the upstream commit "ACPI: video: Make backlight class device registration a separate step (v2)" (3dbc80a3e4c5) makes these quirks unnecessary. However as mentioned in this bugtracker ticket https://bugzilla.kernel.org/show_bug.cgi?id=215683#c17 the upstream fix is part of a larger patchset that is overall too complex for stable. The TongFang GKxNRxx, GMxNGxx, GMxZGxx, and GMxRGxx / TUXEDO Stellaris/Polaris Gen 1-4, have the same problem as the Clevo NL5xRU and NL5xNU / TUXEDO Aura 15 Gen1 and Gen2: They have a working native and video interface for screen backlight. However the default detection mechanism first registers the video interface before unregistering it again and switching to the native interface during boot. This results in a dangling SBIOS request for backlight change for some reason, causing the backlight to switch to ~2% once per boot on the first power cord connect or disconnect event. Setting the native interface explicitly circumvents this buggy behaviour by avoiding the unregistering process. Reviewed-by: Hans de Goede Signed-off-by: Werner Sembach Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/video_detect.c | 64 +++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index e39d59ad6496..b13713199ad9 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -500,6 +500,70 @@ static const struct dmi_system_id video_detect_dmi_table[] = { DMI_MATCH(DMI_BOARD_NAME, "PF5LUXG"), }, }, + /* + * More Tongfang devices with the same issue as the Clevo NL5xRU and + * NL5xNU/TUXEDO Aura 15 Gen1 and Gen2. See the description above. + */ + { + .callback = video_detect_force_native, + .ident = "TongFang GKxNRxx", + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "GKxNRxx"), + }, + }, + { + .callback = video_detect_force_native, + .ident = "TongFang GKxNRxx", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"), + DMI_MATCH(DMI_BOARD_NAME, "POLARIS1501A1650TI"), + }, + }, + { + .callback = video_detect_force_native, + .ident = "TongFang GKxNRxx", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"), + DMI_MATCH(DMI_BOARD_NAME, "POLARIS1501A2060"), + }, + }, + { + .callback = video_detect_force_native, + .ident = "TongFang GKxNRxx", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"), + DMI_MATCH(DMI_BOARD_NAME, "POLARIS1701A1650TI"), + }, + }, + { + .callback = video_detect_force_native, + .ident = "TongFang GKxNRxx", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"), + DMI_MATCH(DMI_BOARD_NAME, "POLARIS1701A2060"), + }, + }, + { + .callback = video_detect_force_native, + .ident = "TongFang GMxNGxx", + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "GMxNGxx"), + }, + }, + { + .callback = video_detect_force_native, + .ident = "TongFang GMxZGxx", + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "GMxZGxx"), + }, + }, + { + .callback = video_detect_force_native, + .ident = "TongFang GMxRGxx", + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "GMxRGxx"), + }, + }, /* * Desktops which falsely report a backlight and which our heuristics * for this do not catch. From 6ab2287b26f133891aaaa8ac0623266775d51b0c Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 29 Sep 2022 08:20:10 -0700 Subject: [PATCH 0463/1477] x86/Kconfig: Drop check for -mabi=ms for CONFIG_EFI_STUB commit 33806e7cb8d50379f55c3e8f335e91e1b359dc7b upstream. A recent change in LLVM made CONFIG_EFI_STUB unselectable because it no longer pretends to support -mabi=ms, breaking the dependency in Kconfig. Lack of CONFIG_EFI_STUB can prevent kernels from booting via EFI in certain circumstances. This check was added by 8f24f8c2fc82 ("efi/libstub: Annotate firmware routines as __efiapi") to ensure that __attribute__((ms_abi)) was available, as -mabi=ms is not actually used in any cflags. According to the GCC documentation, this attribute has been supported since GCC 4.4.7. The kernel currently requires GCC 5.1 so this check is not necessary; even when that change landed in 5.6, the kernel required GCC 4.9 so it was unnecessary then as well. Clang supports __attribute__((ms_abi)) for all versions that are supported for building the kernel so no additional check is needed. Remove the 'depends on' line altogether to allow CONFIG_EFI_STUB to be selected when CONFIG_EFI is enabled, regardless of compiler. Fixes: 8f24f8c2fc82 ("efi/libstub: Annotate firmware routines as __efiapi") Signed-off-by: Nathan Chancellor Signed-off-by: Borislav Petkov Reviewed-by: Nick Desaulniers Acked-by: Ard Biesheuvel Cc: stable@vger.kernel.org Link: https://github.com/llvm/llvm-project/commit/d1ad006a8f64bdc17f618deffa9e7c91d82c444d [nathan: Fix conflict due to lack of c6dbd3e5e69c in older trees] Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- arch/x86/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 159646da3c6b..d64e69013995 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1945,7 +1945,6 @@ config EFI config EFI_STUB bool "EFI stub support" depends on EFI && !X86_USE_3DNOW - depends on $(cc-option,-mabi=ms) || X86_32 select RELOCATABLE help This kernel feature allows a bzImage to be loaded directly From 3a260e9844c97c220603696b0bc225a8acf44d9c Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Mon, 24 Oct 2022 13:34:14 -0700 Subject: [PATCH 0464/1477] Makefile.debug: re-enable debug info for .S files This is _not_ an upstream commit and just for 5.10.y only. It is based on commit 32ef9e5054ec0321b9336058c58ec749e9c6b0fe upstream. Alexey reported that the fraction of unknown filename instances in kallsyms grew from ~0.3% to ~10% recently; Bill and Greg tracked it down to assembler defined symbols, which regressed as a result of: commit b8a9092330da ("Kbuild: do not emit debug info for assembly with LLVM_IAS=1") In that commit, I allude to restoring debug info for assembler defined symbols in a follow up patch, but it seems I forgot to do so in commit a66049e2cf0e ("Kbuild: make DWARF version a choice") Fixes: b8a9092330da ("Kbuild: do not emit debug info for assembly with LLVM_IAS=1") Signed-off-by: Nick Desaulniers Signed-off-by: Greg Kroah-Hartman --- Makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0e22d4c8bc79..8d63b1bff690 100644 --- a/Makefile +++ b/Makefile @@ -842,7 +842,9 @@ else DEBUG_CFLAGS += -g endif -ifneq ($(LLVM_IAS),1) +ifeq ($(LLVM_IAS),1) +KBUILD_AFLAGS += -g +else KBUILD_AFLAGS += -Wa,-gdwarf-2 endif From e2f9b62ead9a69f5f605bb8777be5c97f292972d Mon Sep 17 00:00:00 2001 From: Avri Altman Date: Wed, 28 Sep 2022 12:57:44 +0300 Subject: [PATCH 0465/1477] mmc: core: Add SD card quirk for broken discard commit 07d2872bf4c864eb83d034263c155746a2fb7a3b upstream. Some SD-cards from Sandisk that are SDA-6.0 compliant reports they supports discard, while they actually don't. This might cause mk2fs to fail while trying to format the card and revert it to a read-only mode. To fix this problem, let's add a card quirk (MMC_QUIRK_BROKEN_SD_DISCARD) to indicate that we shall fall-back to use the legacy erase command instead. Signed-off-by: Avri Altman Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220928095744.16455-1-avri.altman@wdc.com [Ulf: Updated the commit message] Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/block.c | 7 ++++++- drivers/mmc/core/card.h | 6 ++++++ drivers/mmc/core/quirks.h | 6 ++++++ include/linux/mmc/card.h | 1 + 4 files changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 66a00b7c751f..6622e3262187 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -1069,6 +1069,11 @@ static void mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req) nr = blk_rq_sectors(req); do { + unsigned int erase_arg = card->erase_arg; + + if (mmc_card_broken_sd_discard(card)) + erase_arg = SD_ERASE_ARG; + err = 0; if (card->quirks & MMC_QUIRK_INAND_CMD38) { err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, @@ -1079,7 +1084,7 @@ static void mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req) card->ext_csd.generic_cmd6_time); } if (!err) - err = mmc_erase(card, from, nr, card->erase_arg); + err = mmc_erase(card, from, nr, erase_arg); } while (err == -EIO && !mmc_blk_reset(md, card->host, type)); if (err) status = BLK_STS_IOERR; diff --git a/drivers/mmc/core/card.h b/drivers/mmc/core/card.h index 7bd392d55cfa..5c6986131faf 100644 --- a/drivers/mmc/core/card.h +++ b/drivers/mmc/core/card.h @@ -70,6 +70,7 @@ struct mmc_fixup { #define EXT_CSD_REV_ANY (-1u) #define CID_MANFID_SANDISK 0x2 +#define CID_MANFID_SANDISK_SD 0x3 #define CID_MANFID_ATP 0x9 #define CID_MANFID_TOSHIBA 0x11 #define CID_MANFID_MICRON 0x13 @@ -222,4 +223,9 @@ static inline int mmc_card_broken_hpi(const struct mmc_card *c) return c->quirks & MMC_QUIRK_BROKEN_HPI; } +static inline int mmc_card_broken_sd_discard(const struct mmc_card *c) +{ + return c->quirks & MMC_QUIRK_BROKEN_SD_DISCARD; +} + #endif diff --git a/drivers/mmc/core/quirks.h b/drivers/mmc/core/quirks.h index d68e6e513a4f..c8c0f50a2076 100644 --- a/drivers/mmc/core/quirks.h +++ b/drivers/mmc/core/quirks.h @@ -99,6 +99,12 @@ static const struct mmc_fixup __maybe_unused mmc_blk_fixups[] = { MMC_FIXUP("V10016", CID_MANFID_KINGSTON, CID_OEMID_ANY, add_quirk_mmc, MMC_QUIRK_TRIM_BROKEN), + /* + * Some SD cards reports discard support while they don't + */ + MMC_FIXUP(CID_NAME_ANY, CID_MANFID_SANDISK_SD, 0x5344, add_quirk_sd, + MMC_QUIRK_BROKEN_SD_DISCARD), + END_FIXUP }; diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 42df06c6b19c..ef870d1f4f5f 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -270,6 +270,7 @@ struct mmc_card { #define MMC_QUIRK_BROKEN_IRQ_POLLING (1<<11) /* Polling SDIO_CCCR_INTx could create a fake interrupt */ #define MMC_QUIRK_TRIM_BROKEN (1<<12) /* Skip trim */ #define MMC_QUIRK_BROKEN_HPI (1<<13) /* Disable broken HPI support */ +#define MMC_QUIRK_BROKEN_SD_DISCARD (1<<14) /* Disable broken SD discard support */ bool reenable_cmdq; /* Re-enable Command Queue */ From 31b1570677e8bf85f48be8eb95e21804399b8295 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Sun, 9 Oct 2022 18:10:38 +0800 Subject: [PATCH 0466/1477] blk-wbt: fix that 'rwb->wc' is always set to 1 in wbt_init() commit 285febabac4a16655372d23ff43e89ff6f216691 upstream. commit 8c5035dfbb94 ("blk-wbt: call rq_qos_add() after wb_normal is initialized") moves wbt_set_write_cache() before rq_qos_add(), which is wrong because wbt_rq_qos() is still NULL. Fix the problem by removing wbt_set_write_cache() and setting 'rwb->wc' directly. Noted that this patch also remove the redundant setting of 'rab->wc'. Fixes: 8c5035dfbb94 ("blk-wbt: call rq_qos_add() after wb_normal is initialized") Reported-by: kernel test robot Link: https://lore.kernel.org/r/202210081045.77ddf59b-yujie.liu@intel.com Signed-off-by: Yu Kuai Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20221009101038.1692875-1-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/blk-wbt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/block/blk-wbt.c b/block/blk-wbt.c index bafdb8098893..6f63920f073c 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -838,12 +838,11 @@ int wbt_init(struct request_queue *q) rwb->last_comp = rwb->last_issue = jiffies; rwb->win_nsec = RWB_WINDOW_NSEC; rwb->enable_state = WBT_STATE_ON_DEFAULT; - rwb->wc = 1; + rwb->wc = test_bit(QUEUE_FLAG_WC, &q->queue_flags); rwb->rq_depth.default_depth = RWB_DEF_DEPTH; rwb->min_lat_nsec = wbt_default_latency_nsec(q); wbt_queue_depth_changed(&rwb->rqos); - wbt_set_write_cache(q, test_bit(QUEUE_FLAG_WC, &q->queue_flags)); /* * Assign rwb and add the stats callback. From a50ed2d28727ff605d95fb9a53be8ff94e8eaaf4 Mon Sep 17 00:00:00 2001 From: Seth Jenkins Date: Thu, 27 Oct 2022 11:36:52 -0400 Subject: [PATCH 0467/1477] mm: /proc/pid/smaps_rollup: fix no vma's null-deref Commit 258f669e7e88 ("mm: /proc/pid/smaps_rollup: convert to single value seq_file") introduced a null-deref if there are no vma's in the task in show_smaps_rollup. Fixes: 258f669e7e88 ("mm: /proc/pid/smaps_rollup: convert to single value seq_file") Signed-off-by: Seth Jenkins Reviewed-by: Alexey Dobriyan Tested-by: Alexey Dobriyan Signed-off-by: Greg Kroah-Hartman --- fs/proc/task_mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index ef18f0d71b11..8b75a04836b6 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -951,7 +951,7 @@ static int show_smaps_rollup(struct seq_file *m, void *v) vma = vma->vm_next; } - show_vma_header_prefix(m, priv->mm->mmap->vm_start, + show_vma_header_prefix(m, priv->mm->mmap ? priv->mm->mmap->vm_start : 0, last_vma_end, 0, 0, 0, 0); seq_pad(m, ' '); seq_puts(m, "[rollup]\n"); From 43d5109296fab30b7467d7d399bb51f1bb27eff4 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 14 Oct 2022 11:26:25 -0700 Subject: [PATCH 0468/1477] udp: Update reuse->has_conns under reuseport_lock. commit 69421bf98482d089e50799f45e48b25ce4a8d154 upstream. When we call connect() for a UDP socket in a reuseport group, we have to update sk->sk_reuseport_cb->has_conns to 1. Otherwise, the kernel could select a unconnected socket wrongly for packets sent to the connected socket. However, the current way to set has_conns is illegal and possible to trigger that problem. reuseport_has_conns() changes has_conns under rcu_read_lock(), which upgrades the RCU reader to the updater. Then, it must do the update under the updater's lock, reuseport_lock, but it doesn't for now. For this reason, there is a race below where we fail to set has_conns resulting in the wrong socket selection. To avoid the race, let's split the reader and updater with proper locking. cpu1 cpu2 +----+ +----+ __ip[46]_datagram_connect() reuseport_grow() . . |- reuseport_has_conns(sk, true) |- more_reuse = __reuseport_alloc(more_socks_size) | . | | |- rcu_read_lock() | |- reuse = rcu_dereference(sk->sk_reuseport_cb) | | | | | /* reuse->has_conns == 0 here */ | | |- more_reuse->has_conns = reuse->has_conns | |- reuse->has_conns = 1 | /* more_reuse->has_conns SHOULD BE 1 HERE */ | | | | | |- rcu_assign_pointer(reuse->socks[i]->sk_reuseport_cb, | | | more_reuse) | `- rcu_read_unlock() `- kfree_rcu(reuse, rcu) | |- sk->sk_state = TCP_ESTABLISHED Note the likely(reuse) in reuseport_has_conns_set() is always true, but we put the test there for ease of review. [0] For the record, usually, sk_reuseport_cb is changed under lock_sock(). The only exception is reuseport_grow() & TCP reqsk migration case. 1) shutdown() TCP listener, which is moved into the latter part of reuse->socks[] to migrate reqsk. 2) New listen() overflows reuse->socks[] and call reuseport_grow(). 3) reuse->max_socks overflows u16 with the new listener. 4) reuseport_grow() pops the old shutdown()ed listener from the array and update its sk->sk_reuseport_cb as NULL without lock_sock(). shutdown()ed TCP sk->sk_reuseport_cb can be changed without lock_sock(), but, reuseport_has_conns_set() is called only for UDP under lock_sock(), so likely(reuse) never be false in reuseport_has_conns_set(). [0]: https://lore.kernel.org/netdev/CANn89iLja=eQHbsM_Ta2sQF0tOGU8vAGrh_izRuuHjuO1ouUag@mail.gmail.com/ Fixes: acdcecc61285 ("udp: correct reuseport selection with connected sockets") Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20221014182625.89913-1-kuniyu@amazon.com Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- include/net/sock_reuseport.h | 11 +++++------ net/core/sock_reuseport.c | 16 ++++++++++++++++ net/ipv4/datagram.c | 2 +- net/ipv4/udp.c | 2 +- net/ipv6/datagram.c | 2 +- net/ipv6/udp.c | 2 +- 6 files changed, 25 insertions(+), 10 deletions(-) diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h index 505f1e18e9bf..3eac185ae2e8 100644 --- a/include/net/sock_reuseport.h +++ b/include/net/sock_reuseport.h @@ -38,21 +38,20 @@ extern struct sock *reuseport_select_sock(struct sock *sk, extern int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog); extern int reuseport_detach_prog(struct sock *sk); -static inline bool reuseport_has_conns(struct sock *sk, bool set) +static inline bool reuseport_has_conns(struct sock *sk) { struct sock_reuseport *reuse; bool ret = false; rcu_read_lock(); reuse = rcu_dereference(sk->sk_reuseport_cb); - if (reuse) { - if (set) - reuse->has_conns = 1; - ret = reuse->has_conns; - } + if (reuse && reuse->has_conns) + ret = true; rcu_read_unlock(); return ret; } +void reuseport_has_conns_set(struct sock *sk); + #endif /* _SOCK_REUSEPORT_H */ diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c index b065f0a103ed..49f9c2c4ffd5 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c @@ -18,6 +18,22 @@ DEFINE_SPINLOCK(reuseport_lock); static DEFINE_IDA(reuseport_ida); +void reuseport_has_conns_set(struct sock *sk) +{ + struct sock_reuseport *reuse; + + if (!rcu_access_pointer(sk->sk_reuseport_cb)) + return; + + spin_lock_bh(&reuseport_lock); + reuse = rcu_dereference_protected(sk->sk_reuseport_cb, + lockdep_is_held(&reuseport_lock)); + if (likely(reuse)) + reuse->has_conns = 1; + spin_unlock_bh(&reuseport_lock); +} +EXPORT_SYMBOL(reuseport_has_conns_set); + static struct sock_reuseport *__reuseport_alloc(unsigned int max_socks) { unsigned int size = sizeof(struct sock_reuseport) + diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 4a8550c49202..112c6e892d30 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -70,7 +70,7 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len } inet->inet_daddr = fl4->daddr; inet->inet_dport = usin->sin_port; - reuseport_has_conns(sk, true); + reuseport_has_conns_set(sk); sk->sk_state = TCP_ESTABLISHED; sk_set_txhash(sk); inet->inet_id = prandom_u32(); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 4446aa8237ff..b093daaa3deb 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -446,7 +446,7 @@ static struct sock *udp4_lib_lookup2(struct net *net, result = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum); /* Fall back to scoring if group has connections */ - if (result && !reuseport_has_conns(sk, false)) + if (result && !reuseport_has_conns(sk)) return result; result = result ? : sk; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 206f66310a88..f4559e5bc84b 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -256,7 +256,7 @@ ipv4_connected: goto out; } - reuseport_has_conns(sk, true); + reuseport_has_conns_set(sk); sk->sk_state = TCP_ESTABLISHED; sk_set_txhash(sk); out: diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 9b504bf49214..514e6a55959f 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -179,7 +179,7 @@ static struct sock *udp6_lib_lookup2(struct net *net, result = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum); /* Fall back to scoring if group has connections */ - if (result && !reuseport_has_conns(sk, false)) + if (result && !reuseport_has_conns(sk)) return result; result = result ? : sk; From 7d51b4c67cfb95a069ccbe52f13963bfd9fe85b0 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 30 Oct 2022 09:41:19 +0100 Subject: [PATCH 0469/1477] Linux 5.10.152 Link: https://lore.kernel.org/r/20221027165054.270676357@linuxfoundation.org Tested-by: Sudip Mukherjee Tested-by: Jon Hunter Tested-by: Pavel Machek (CIP) Tested-by: Linux Kernel Functional Testing Link: https://lore.kernel.org/r/20221028120232.344548477@linuxfoundation.org Tested-by: Rudi Heitbaum Tested-by: Pavel Machek (CIP) Tested-by: Sudip Mukherjee Tested-by: Florian Fainelli Tested-by: Guenter Roeck Tested-by: Linux Kernel Functional Testing Tested-by: Salvatore Bonaccorso Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 8d63b1bff690..a0750d051982 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 10 -SUBLEVEL = 151 +SUBLEVEL = 152 EXTRAVERSION = NAME = Dare mighty things From 370be31cde501179b7c7a295732b0105274b3f58 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 27 Oct 2022 17:12:37 +0800 Subject: [PATCH 0470/1477] can: j1939: transport: j1939_session_skb_drop_old(): spin_unlock_irqrestore() before kfree_skb() commit c3c06c61890da80494bb196f75d89b791adda87f upstream. It is not allowed to call kfree_skb() from hardware interrupt context or with interrupts being disabled. The skb is unlinked from the queue, so it can be freed after spin_unlock_irqrestore(). Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Signed-off-by: Yang Yingliang Acked-by: Oleksij Rempel Link: https://lore.kernel.org/all/20221027091237.2290111-1-yangyingliang@huawei.com Cc: stable@vger.kernel.org [mkl: adjust subject] Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- net/can/j1939/transport.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index 2830a12a4dd1..78f6a9110699 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -338,10 +338,12 @@ static void j1939_session_skb_drop_old(struct j1939_session *session) __skb_unlink(do_skb, &session->skb_queue); /* drop ref taken in j1939_session_skb_queue() */ skb_unref(do_skb); + spin_unlock_irqrestore(&session->skb_queue.lock, flags); kfree_skb(do_skb); + } else { + spin_unlock_irqrestore(&session->skb_queue.lock, flags); } - spin_unlock_irqrestore(&session->skb_queue.lock, flags); } void j1939_session_skb_queue(struct j1939_session *session, From ca1034bff85a0cde000038e5af72756994f31560 Mon Sep 17 00:00:00 2001 From: Anssi Hannula Date: Mon, 10 Oct 2022 20:52:27 +0200 Subject: [PATCH 0471/1477] can: kvaser_usb: Fix possible completions during init_completion commit 2871edb32f4622c3a25ce4b3977bad9050b91974 upstream. kvaser_usb uses completions to signal when a response event is received for outgoing commands. However, it uses init_completion() to reinitialize the start_comp and stop_comp completions before sending the start/stop commands. In case the device sends the corresponding response just before the actual command is sent, complete() may be called concurrently with init_completion() which is not safe. This might be triggerable even with a properly functioning device by stopping the interface (CMD_STOP_CHIP) just after it goes bus-off (which also causes the driver to send CMD_STOP_CHIP when restart-ms is off), but that was not tested. Fix the issue by using reinit_completion() instead. Fixes: 080f40a6fa28 ("can: kvaser_usb: Add support for Kvaser CAN/USB devices") Tested-by: Jimmy Assarsson Signed-off-by: Anssi Hannula Signed-off-by: Jimmy Assarsson Link: https://lore.kernel.org/all/20221010185237.319219-2-extja@kvaser.com Cc: stable@vger.kernel.org Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c | 4 ++-- drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c index 5d642458bac5..45d278724883 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c @@ -1845,7 +1845,7 @@ static int kvaser_usb_hydra_start_chip(struct kvaser_usb_net_priv *priv) { int err; - init_completion(&priv->start_comp); + reinit_completion(&priv->start_comp); err = kvaser_usb_hydra_send_simple_cmd(priv->dev, CMD_START_CHIP_REQ, priv->channel); @@ -1863,7 +1863,7 @@ static int kvaser_usb_hydra_stop_chip(struct kvaser_usb_net_priv *priv) { int err; - init_completion(&priv->stop_comp); + reinit_completion(&priv->stop_comp); /* Make sure we do not report invalid BUS_OFF from CMD_CHIP_STATE_EVENT * see comment in kvaser_usb_hydra_update_state() diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c index 78d52a5e8fd5..15380cc08ee6 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c @@ -1324,7 +1324,7 @@ static int kvaser_usb_leaf_start_chip(struct kvaser_usb_net_priv *priv) { int err; - init_completion(&priv->start_comp); + reinit_completion(&priv->start_comp); err = kvaser_usb_leaf_send_simple_cmd(priv->dev, CMD_START_CHIP, priv->channel); @@ -1342,7 +1342,7 @@ static int kvaser_usb_leaf_stop_chip(struct kvaser_usb_net_priv *priv) { int err; - init_completion(&priv->stop_comp); + reinit_completion(&priv->stop_comp); err = kvaser_usb_leaf_send_simple_cmd(priv->dev, CMD_STOP_CHIP, priv->channel); From 2bf5b16315698f459dfb7bcfe34a428f7ce9dac6 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Wed, 26 Oct 2022 23:12:36 -0400 Subject: [PATCH 0472/1477] ALSA: Use del_timer_sync() before freeing timer commit f0a868788fcbf63cdab51f5adcf73b271ede8164 upstream. The current code for freeing the emux timer is extremely dangerous: CPU0 CPU1 ---- ---- snd_emux_timer_callback() snd_emux_free() spin_lock(&emu->voice_lock) del_timer(&emu->tlist); <-- returns immediately spin_unlock(&emu->voice_lock); [..] kfree(emu); spin_lock(&emu->voice_lock); [BOOM!] Instead just use del_timer_sync() which will wait for the timer to finish before continuing. No need to check if the timer is active or not when doing so. This doesn't fix the race of a possible re-arming of the timer, but at least it won't use the data that has just been freed. [ Fixed unused variable warning by tiwai ] Cc: stable@vger.kernel.org Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Steven Rostedt (Google) Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20221026231236.6834b551@gandalf.local.home Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/synth/emux/emux.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/sound/synth/emux/emux.c b/sound/synth/emux/emux.c index 6695530bba9b..c60ff81390a4 100644 --- a/sound/synth/emux/emux.c +++ b/sound/synth/emux/emux.c @@ -125,15 +125,10 @@ EXPORT_SYMBOL(snd_emux_register); */ int snd_emux_free(struct snd_emux *emu) { - unsigned long flags; - if (! emu) return -EINVAL; - spin_lock_irqsave(&emu->voice_lock, flags); - if (emu->timer_active) - del_timer(&emu->tlist); - spin_unlock_irqrestore(&emu->voice_lock, flags); + del_timer_sync(&emu->tlist); snd_emux_proc_free(emu); snd_emux_delete_virmidi(emu); From 8959092300081d01670829835f854ca8d355bd75 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 24 Oct 2022 18:29:29 +0200 Subject: [PATCH 0473/1477] ALSA: au88x0: use explicitly signed char commit ee03c0f200eb0d9f22dd8732d9fb7956d91019c2 upstream. With char becoming unsigned by default, and with `char` alone being ambiguous and based on architecture, signed chars need to be marked explicitly as such. This fixes warnings like: sound/pci/au88x0/au88x0_core.c:2029 vortex_adb_checkinout() warn: signedness bug returning '(-22)' sound/pci/au88x0/au88x0_core.c:2046 vortex_adb_checkinout() warn: signedness bug returning '(-12)' sound/pci/au88x0/au88x0_core.c:2125 vortex_adb_allocroute() warn: 'vortex_adb_checkinout(vortex, (0), en, 0)' is unsigned sound/pci/au88x0/au88x0_core.c:2170 vortex_adb_allocroute() warn: 'vortex_adb_checkinout(vortex, stream->resources, en, 4)' is unsigned As well, since one function returns errnos, return an `int` rather than a `signed char`. Signed-off-by: Jason A. Donenfeld Cc: Link: https://lore.kernel.org/r/20221024162929.536004-1-Jason@zx2c4.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/au88x0/au88x0.h | 6 +++--- sound/pci/au88x0/au88x0_core.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/pci/au88x0/au88x0.h b/sound/pci/au88x0/au88x0.h index 0aa7af049b1b..6cbb2bc4a048 100644 --- a/sound/pci/au88x0/au88x0.h +++ b/sound/pci/au88x0/au88x0.h @@ -141,7 +141,7 @@ struct snd_vortex { #ifndef CHIP_AU8810 stream_t dma_wt[NR_WT]; wt_voice_t wt_voice[NR_WT]; /* WT register cache. */ - char mixwt[(NR_WT / NR_WTPB) * 6]; /* WT mixin objects */ + s8 mixwt[(NR_WT / NR_WTPB) * 6]; /* WT mixin objects */ #endif /* Global resources */ @@ -235,8 +235,8 @@ static int vortex_alsafmt_aspfmt(snd_pcm_format_t alsafmt, vortex_t *v); static void vortex_connect_default(vortex_t * vortex, int en); static int vortex_adb_allocroute(vortex_t * vortex, int dma, int nr_ch, int dir, int type, int subdev); -static char vortex_adb_checkinout(vortex_t * vortex, int resmap[], int out, - int restype); +static int vortex_adb_checkinout(vortex_t * vortex, int resmap[], int out, + int restype); #ifndef CHIP_AU8810 static int vortex_wt_allocroute(vortex_t * vortex, int dma, int nr_ch); static void vortex_wt_connect(vortex_t * vortex, int en); diff --git a/sound/pci/au88x0/au88x0_core.c b/sound/pci/au88x0/au88x0_core.c index 5180f1bd1326..0b04436ac017 100644 --- a/sound/pci/au88x0/au88x0_core.c +++ b/sound/pci/au88x0/au88x0_core.c @@ -1998,7 +1998,7 @@ static const int resnum[VORTEX_RESOURCE_LAST] = out: Mean checkout if != 0. Else mean Checkin resource. restype: Indicates type of resource to be checked in or out. */ -static char +static int vortex_adb_checkinout(vortex_t * vortex, int resmap[], int out, int restype) { int i, qty = resnum[restype], resinuse = 0; From 4cc7a360ec3bea1a10a1256a21c1c00fc32aaa2f Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 25 Oct 2022 02:03:13 +0200 Subject: [PATCH 0474/1477] ALSA: rme9652: use explicitly signed char commit 50895a55bcfde8ac6f22a37c6bc8cff506b3c7c6 upstream. With char becoming unsigned by default, and with `char` alone being ambiguous and based on architecture, signed chars need to be marked explicitly as such. This fixes warnings like: sound/pci/rme9652/hdsp.c:3953 hdsp_channel_buffer_location() warn: 'hdsp->channel_map[channel]' is unsigned sound/pci/rme9652/hdsp.c:4153 snd_hdsp_channel_info() warn: impossible condition '(hdsp->channel_map[channel] < 0) => (0-255 < 0)' sound/pci/rme9652/rme9652.c:1833 rme9652_channel_buffer_location() warn: 'rme9652->channel_map[channel]' is unsigned Signed-off-by: Jason A. Donenfeld Cc: Link: https://lore.kernel.org/r/20221025000313.546261-1-Jason@zx2c4.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/rme9652/hdsp.c | 26 +++++++++++++------------- sound/pci/rme9652/rme9652.c | 22 +++++++++++----------- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/sound/pci/rme9652/hdsp.c b/sound/pci/rme9652/hdsp.c index 4aee30db034d..954347424500 100644 --- a/sound/pci/rme9652/hdsp.c +++ b/sound/pci/rme9652/hdsp.c @@ -436,7 +436,7 @@ struct hdsp_midi { struct snd_rawmidi *rmidi; struct snd_rawmidi_substream *input; struct snd_rawmidi_substream *output; - char istimer; /* timer in use */ + signed char istimer; /* timer in use */ struct timer_list timer; spinlock_t lock; int pending; @@ -479,7 +479,7 @@ struct hdsp { pid_t playback_pid; int running; int system_sample_rate; - const char *channel_map; + const signed char *channel_map; int dev; int irq; unsigned long port; @@ -501,7 +501,7 @@ struct hdsp { where the data for that channel can be read/written from/to. */ -static const char channel_map_df_ss[HDSP_MAX_CHANNELS] = { +static const signed char channel_map_df_ss[HDSP_MAX_CHANNELS] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 }; @@ -516,7 +516,7 @@ static const char channel_map_mf_ss[HDSP_MAX_CHANNELS] = { /* Multiface */ -1, -1, -1, -1, -1, -1, -1, -1 }; -static const char channel_map_ds[HDSP_MAX_CHANNELS] = { +static const signed char channel_map_ds[HDSP_MAX_CHANNELS] = { /* ADAT channels are remapped */ 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, /* channels 12 and 13 are S/PDIF */ @@ -525,7 +525,7 @@ static const char channel_map_ds[HDSP_MAX_CHANNELS] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }; -static const char channel_map_H9632_ss[HDSP_MAX_CHANNELS] = { +static const signed char channel_map_H9632_ss[HDSP_MAX_CHANNELS] = { /* ADAT channels */ 0, 1, 2, 3, 4, 5, 6, 7, /* SPDIF */ @@ -539,7 +539,7 @@ static const char channel_map_H9632_ss[HDSP_MAX_CHANNELS] = { -1, -1 }; -static const char channel_map_H9632_ds[HDSP_MAX_CHANNELS] = { +static const signed char channel_map_H9632_ds[HDSP_MAX_CHANNELS] = { /* ADAT */ 1, 3, 5, 7, /* SPDIF */ @@ -553,7 +553,7 @@ static const char channel_map_H9632_ds[HDSP_MAX_CHANNELS] = { -1, -1, -1, -1, -1, -1 }; -static const char channel_map_H9632_qs[HDSP_MAX_CHANNELS] = { +static const signed char channel_map_H9632_qs[HDSP_MAX_CHANNELS] = { /* ADAT is disabled in this mode */ /* SPDIF */ 8, 9, @@ -3869,7 +3869,7 @@ static snd_pcm_uframes_t snd_hdsp_hw_pointer(struct snd_pcm_substream *substream return hdsp_hw_pointer(hdsp); } -static char *hdsp_channel_buffer_location(struct hdsp *hdsp, +static signed char *hdsp_channel_buffer_location(struct hdsp *hdsp, int stream, int channel) @@ -3893,7 +3893,7 @@ static int snd_hdsp_playback_copy(struct snd_pcm_substream *substream, void __user *src, unsigned long count) { struct hdsp *hdsp = snd_pcm_substream_chip(substream); - char *channel_buf; + signed char *channel_buf; if (snd_BUG_ON(pos + count > HDSP_CHANNEL_BUFFER_BYTES)) return -EINVAL; @@ -3911,7 +3911,7 @@ static int snd_hdsp_playback_copy_kernel(struct snd_pcm_substream *substream, void *src, unsigned long count) { struct hdsp *hdsp = snd_pcm_substream_chip(substream); - char *channel_buf; + signed char *channel_buf; channel_buf = hdsp_channel_buffer_location(hdsp, substream->pstr->stream, channel); if (snd_BUG_ON(!channel_buf)) @@ -3925,7 +3925,7 @@ static int snd_hdsp_capture_copy(struct snd_pcm_substream *substream, void __user *dst, unsigned long count) { struct hdsp *hdsp = snd_pcm_substream_chip(substream); - char *channel_buf; + signed char *channel_buf; if (snd_BUG_ON(pos + count > HDSP_CHANNEL_BUFFER_BYTES)) return -EINVAL; @@ -3943,7 +3943,7 @@ static int snd_hdsp_capture_copy_kernel(struct snd_pcm_substream *substream, void *dst, unsigned long count) { struct hdsp *hdsp = snd_pcm_substream_chip(substream); - char *channel_buf; + signed char *channel_buf; channel_buf = hdsp_channel_buffer_location(hdsp, substream->pstr->stream, channel); if (snd_BUG_ON(!channel_buf)) @@ -3957,7 +3957,7 @@ static int snd_hdsp_hw_silence(struct snd_pcm_substream *substream, unsigned long count) { struct hdsp *hdsp = snd_pcm_substream_chip(substream); - char *channel_buf; + signed char *channel_buf; channel_buf = hdsp_channel_buffer_location (hdsp, substream->pstr->stream, channel); if (snd_BUG_ON(!channel_buf)) diff --git a/sound/pci/rme9652/rme9652.c b/sound/pci/rme9652/rme9652.c index 8def24673f35..459696844b8c 100644 --- a/sound/pci/rme9652/rme9652.c +++ b/sound/pci/rme9652/rme9652.c @@ -229,7 +229,7 @@ struct snd_rme9652 { int last_spdif_sample_rate; /* so that we can catch externally ... */ int last_adat_sample_rate; /* ... induced rate changes */ - const char *channel_map; + const signed char *channel_map; struct snd_card *card; struct snd_pcm *pcm; @@ -246,12 +246,12 @@ struct snd_rme9652 { where the data for that channel can be read/written from/to. */ -static const char channel_map_9652_ss[26] = { +static const signed char channel_map_9652_ss[26] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 }; -static const char channel_map_9636_ss[26] = { +static const signed char channel_map_9636_ss[26] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, /* channels 16 and 17 are S/PDIF */ 24, 25, @@ -259,7 +259,7 @@ static const char channel_map_9636_ss[26] = { -1, -1, -1, -1, -1, -1, -1, -1 }; -static const char channel_map_9652_ds[26] = { +static const signed char channel_map_9652_ds[26] = { /* ADAT channels are remapped */ 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, /* channels 12 and 13 are S/PDIF */ @@ -268,7 +268,7 @@ static const char channel_map_9652_ds[26] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }; -static const char channel_map_9636_ds[26] = { +static const signed char channel_map_9636_ds[26] = { /* ADAT channels are remapped */ 1, 3, 5, 7, 9, 11, 13, 15, /* channels 8 and 9 are S/PDIF */ @@ -1841,7 +1841,7 @@ static snd_pcm_uframes_t snd_rme9652_hw_pointer(struct snd_pcm_substream *substr return rme9652_hw_pointer(rme9652); } -static char *rme9652_channel_buffer_location(struct snd_rme9652 *rme9652, +static signed char *rme9652_channel_buffer_location(struct snd_rme9652 *rme9652, int stream, int channel) @@ -1869,7 +1869,7 @@ static int snd_rme9652_playback_copy(struct snd_pcm_substream *substream, void __user *src, unsigned long count) { struct snd_rme9652 *rme9652 = snd_pcm_substream_chip(substream); - char *channel_buf; + signed char *channel_buf; if (snd_BUG_ON(pos + count > RME9652_CHANNEL_BUFFER_BYTES)) return -EINVAL; @@ -1889,7 +1889,7 @@ static int snd_rme9652_playback_copy_kernel(struct snd_pcm_substream *substream, void *src, unsigned long count) { struct snd_rme9652 *rme9652 = snd_pcm_substream_chip(substream); - char *channel_buf; + signed char *channel_buf; channel_buf = rme9652_channel_buffer_location(rme9652, substream->pstr->stream, @@ -1905,7 +1905,7 @@ static int snd_rme9652_capture_copy(struct snd_pcm_substream *substream, void __user *dst, unsigned long count) { struct snd_rme9652 *rme9652 = snd_pcm_substream_chip(substream); - char *channel_buf; + signed char *channel_buf; if (snd_BUG_ON(pos + count > RME9652_CHANNEL_BUFFER_BYTES)) return -EINVAL; @@ -1925,7 +1925,7 @@ static int snd_rme9652_capture_copy_kernel(struct snd_pcm_substream *substream, void *dst, unsigned long count) { struct snd_rme9652 *rme9652 = snd_pcm_substream_chip(substream); - char *channel_buf; + signed char *channel_buf; channel_buf = rme9652_channel_buffer_location(rme9652, substream->pstr->stream, @@ -1941,7 +1941,7 @@ static int snd_rme9652_hw_silence(struct snd_pcm_substream *substream, unsigned long count) { struct snd_rme9652 *rme9652 = snd_pcm_substream_chip(substream); - char *channel_buf; + signed char *channel_buf; channel_buf = rme9652_channel_buffer_location (rme9652, substream->pstr->stream, From c29fcef5791d4c2782696f5a59316b3d92248c57 Mon Sep 17 00:00:00 2001 From: Hannu Hartikainen Date: Mon, 19 Sep 2022 20:16:10 +0300 Subject: [PATCH 0475/1477] USB: add RESET_RESUME quirk for NVIDIA Jetson devices in RCM commit fc4ade55c617dc73c7e9756b57f3230b4ff24540 upstream. NVIDIA Jetson devices in Force Recovery mode (RCM) do not support suspending, ie. flashing fails if the device has been suspended. The devices are still visible in lsusb and seem to work otherwise, making the issue hard to debug. This has been discovered in various forum posts, eg. [1]. The patch has been tested on NVIDIA Jetson AGX Xavier, but I'm adding all the Jetson models listed in [2] on the assumption that they all behave similarly. [1]: https://forums.developer.nvidia.com/t/flashing-not-working/72365 [2]: https://docs.nvidia.com/jetson/archives/l4t-archived/l4t-3271/index.html#page/Tegra%20Linux%20Driver%20Package%20Development%20Guide/quick_start.html Signed-off-by: Hannu Hartikainen Cc: stable # after 6.1-rc3 Link: https://lore.kernel.org/r/20220919171610.30484-1-hannu@hrtk.in Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 03473e20e218..eb3ea45d5d13 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -388,6 +388,15 @@ static const struct usb_device_id usb_quirk_list[] = { /* Kingston DataTraveler 3.0 */ { USB_DEVICE(0x0951, 0x1666), .driver_info = USB_QUIRK_NO_LPM }, + /* NVIDIA Jetson devices in Force Recovery mode */ + { USB_DEVICE(0x0955, 0x7018), .driver_info = USB_QUIRK_RESET_RESUME }, + { USB_DEVICE(0x0955, 0x7019), .driver_info = USB_QUIRK_RESET_RESUME }, + { USB_DEVICE(0x0955, 0x7418), .driver_info = USB_QUIRK_RESET_RESUME }, + { USB_DEVICE(0x0955, 0x7721), .driver_info = USB_QUIRK_RESET_RESUME }, + { USB_DEVICE(0x0955, 0x7c18), .driver_info = USB_QUIRK_RESET_RESUME }, + { USB_DEVICE(0x0955, 0x7e19), .driver_info = USB_QUIRK_RESET_RESUME }, + { USB_DEVICE(0x0955, 0x7f21), .driver_info = USB_QUIRK_RESET_RESUME }, + /* X-Rite/Gretag-Macbeth Eye-One Pro display colorimeter */ { USB_DEVICE(0x0971, 0x2000), .driver_info = USB_QUIRK_NO_SET_INTF }, From fb074d622ccc7e3999cde47f18ea7f8970b09d11 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Tue, 25 Oct 2022 15:10:14 -0700 Subject: [PATCH 0476/1477] usb: dwc3: gadget: Stop processing more requests on IMI commit f78961f8380b940e0cfc7e549336c21a2ad44f4d upstream. When servicing a transfer completion event, the dwc3 driver will reclaim TRBs of started requests up to the request associated with the interrupt event. Currently we don't check for interrupt due to missed isoc, and the driver may attempt to reclaim TRBs beyond the associated event. This causes invalid memory access when the hardware still owns the TRB. If there's a missed isoc TRB with IMI (interrupt on missed isoc), make sure to stop servicing further. Note that only the last TRB of chained TRBs has its status updated with missed isoc. Fixes: 72246da40f37 ("usb: Introduce DesignWare USB3 DRD Driver") Cc: stable@vger.kernel.org Reported-by: Jeff Vanhoof Reported-by: Dan Vacura Signed-off-by: Thinh Nguyen Reviewed-by: Jeff Vanhoof Tested-by: Jeff Vanhoof Link: https://lore.kernel.org/r/b29acbeab531b666095dfdafd8cb5c7654fbb3e1.1666735451.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 41ed2f6f8a8d..ff84ee8d719a 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2800,6 +2800,10 @@ static int dwc3_gadget_ep_reclaim_completed_trb(struct dwc3_ep *dep, if (event->status & DEPEVT_STATUS_SHORT && !chain) return 1; + if ((trb->ctrl & DWC3_TRB_CTRL_ISP_IMI) && + DWC3_TRB_SIZE_TRBSTS(trb->size) == DWC3_TRBSTS_MISSED_ISOC) + return 1; + if ((trb->ctrl & DWC3_TRB_CTRL_IOC) || (trb->ctrl & DWC3_TRB_CTRL_LST)) return 1; From e440957f9c8bedae784f718c42207af222c25818 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Tue, 25 Oct 2022 15:10:20 -0700 Subject: [PATCH 0477/1477] usb: dwc3: gadget: Don't set IMI for no_interrupt commit 308c316d16cbad99bb834767382baa693ac42169 upstream. The gadget driver may have a certain expectation of how the request completion flow should be from to its configuration. Make sure the controller driver respect that. That is, don't set IMI (Interrupt on Missed Isoc) when usb_request->no_interrupt is set. Also, the driver should only set IMI to the last TRB of a chain. Fixes: 72246da40f37 ("usb: Introduce DesignWare USB3 DRD Driver") Cc: stable@vger.kernel.org Signed-off-by: Thinh Nguyen Reviewed-by: Jeff Vanhoof Tested-by: Jeff Vanhoof Link: https://lore.kernel.org/r/ced336c84434571340c07994e3667a0ee284fefe.1666735451.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index ff84ee8d719a..347ba7e4bd81 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1064,8 +1064,8 @@ static void dwc3_prepare_one_trb(struct dwc3_ep *dep, trb->ctrl = DWC3_TRBCTL_ISOCHRONOUS; } - /* always enable Interrupt on Missed ISOC */ - trb->ctrl |= DWC3_TRB_CTRL_ISP_IMI; + if (!no_interrupt && !chain) + trb->ctrl |= DWC3_TRB_CTRL_ISP_IMI; break; case USB_ENDPOINT_XFER_BULK: From 2bc4f99ee24391bc05f0f464c4414ab0ad7b4f74 Mon Sep 17 00:00:00 2001 From: Justin Chen Date: Wed, 5 Oct 2022 12:13:55 -0700 Subject: [PATCH 0478/1477] usb: bdc: change state when port disconnected commit fb8f60dd1b67520e0e0d7978ef17d015690acfc1 upstream. When port is connected and then disconnected, the state stays as configured. Which is incorrect as the port is no longer configured, but in a not attached state. Signed-off-by: Justin Chen Acked-by: Florian Fainelli Fixes: efed421a94e6 ("usb: gadget: Add UDC driver for Broadcom USB3.0 device controller IP BDC") Cc: stable Link: https://lore.kernel.org/r/1664997235-18198-1-git-send-email-justinpopo6@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/bdc/bdc_udc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/gadget/udc/bdc/bdc_udc.c b/drivers/usb/gadget/udc/bdc/bdc_udc.c index 248426a3e88a..5f0b3fd93631 100644 --- a/drivers/usb/gadget/udc/bdc/bdc_udc.c +++ b/drivers/usb/gadget/udc/bdc/bdc_udc.c @@ -151,6 +151,7 @@ static void bdc_uspc_disconnected(struct bdc *bdc, bool reinit) bdc->delayed_status = false; bdc->reinit = reinit; bdc->test_mode = false; + usb_gadget_set_state(&bdc->gadget, USB_STATE_NOTATTACHED); } /* TNotify wkaeup timer */ From 228101fc832f0330f266e1f4002bd969a3be7850 Mon Sep 17 00:00:00 2001 From: Jens Glathe Date: Mon, 24 Oct 2022 17:27:17 +0300 Subject: [PATCH 0479/1477] usb: xhci: add XHCI_SPURIOUS_SUCCESS to ASM1042 despite being a V0.96 controller commit 4f547472380136718b56064ea5689a61e135f904 upstream. This appears to fix the error: "xhci_hcd
; ERROR Transfer event TRB DMA ptr not part of current TD ep_index 2 comp_code 13" that appear spuriously (or pretty often) when using a r8152 USB3 ethernet adapter with integrated hub. ASM1042 reports as a 0.96 controller, but appears to behave more like 1.0 Inspired by this email thread: https://markmail.org/thread/7vzqbe7t6du6qsw3 Cc: stable@vger.kernel.org Signed-off-by: Jens Glathe Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20221024142720.4122053-2-mathias.nyman@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 8952492d43be..1aa4bf7c0f6f 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -302,8 +302,14 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) } if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA && - pdev->device == PCI_DEVICE_ID_ASMEDIA_1042_XHCI) + pdev->device == PCI_DEVICE_ID_ASMEDIA_1042_XHCI) { + /* + * try to tame the ASMedia 1042 controller which reports 0.96 + * but appears to behave more like 1.0 + */ + xhci->quirks |= XHCI_SPURIOUS_SUCCESS; xhci->quirks |= XHCI_BROKEN_STREAMS; + } if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA && pdev->device == PCI_DEVICE_ID_ASMEDIA_1042A_XHCI) { xhci->quirks |= XHCI_TRUST_TX_LENGTH; From 63c7df3c818ef2a1b56977968adf1d37fe6e691b Mon Sep 17 00:00:00 2001 From: Tony O'Brien Date: Tue, 27 Sep 2022 15:47:28 +1300 Subject: [PATCH 0480/1477] mtd: rawnand: marvell: Use correct logic for nand-keep-config commit ce107713b722af57c4b7f2477594d445b496420e upstream. Originally the absence of the marvell,nand-keep-config property caused the setup_data_interface function to be provided. However when setup_data_interface was moved into nand_controller_ops the logic was unintentionally inverted. Update the logic so that only if the marvell,nand-keep-config property is present the bootloader NAND config kept. Cc: stable@vger.kernel.org Fixes: 7a08dbaedd36 ("mtd: rawnand: Move ->setup_data_interface() to nand_controller_ops") Signed-off-by: Tony O'Brien Signed-off-by: Chris Packham Reviewed-by: Boris Brezillon Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20220927024728.28447-1-chris.packham@alliedtelesis.co.nz Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/raw/marvell_nand.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/marvell_nand.c b/drivers/mtd/nand/raw/marvell_nand.c index d00c916f133b..dce35f81e0a5 100644 --- a/drivers/mtd/nand/raw/marvell_nand.c +++ b/drivers/mtd/nand/raw/marvell_nand.c @@ -2672,7 +2672,7 @@ static int marvell_nand_chip_init(struct device *dev, struct marvell_nfc *nfc, chip->controller = &nfc->controller; nand_set_flash_node(chip, np); - if (!of_property_read_bool(np, "marvell,nand-keep-config")) + if (of_property_read_bool(np, "marvell,nand-keep-config")) chip->options |= NAND_KEEP_TIMINGS; mtd = nand_to_mtd(chip); From 3b250824b6d3f109c8c18652280f7120c3803545 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Mon, 24 Oct 2022 17:27:18 +0300 Subject: [PATCH 0481/1477] xhci: Add quirk to reset host back to default state at shutdown commit 34cd2db408d591bc15771cbcc90939ade0a99a21 upstream. Systems based on Alder Lake P see significant boot time delay if boot firmware tries to control usb ports in unexpected link states. This is seen with self-powered usb devices that survive in U3 link suspended state over S5. A more generic solution to power off ports at shutdown was attempted in commit 83810f84ecf1 ("xhci: turn off port power in shutdown") but it caused regression. Add host specific XHCI_RESET_TO_DEFAULT quirk which will reset host and ports back to default state in shutdown. Cc: stable@vger.kernel.org Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20221024142720.4122053-3-mathias.nyman@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 4 ++++ drivers/usb/host/xhci.c | 10 ++++++++-- drivers/usb/host/xhci.h | 1 + 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 1aa4bf7c0f6f..64d5a593682b 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -253,6 +253,10 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == PCI_DEVICE_ID_INTEL_DNV_XHCI)) xhci->quirks |= XHCI_MISSING_CAS; + if (pdev->vendor == PCI_VENDOR_ID_INTEL && + pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_PCH_XHCI) + xhci->quirks |= XHCI_RESET_TO_DEFAULT; + if (pdev->vendor == PCI_VENDOR_ID_INTEL && (pdev->device == PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_2C_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_4C_XHCI || diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 8918e6ae5c4b..c968dd865314 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -794,9 +794,15 @@ void xhci_shutdown(struct usb_hcd *hcd) spin_lock_irq(&xhci->lock); xhci_halt(xhci); - /* Workaround for spurious wakeups at shutdown with HSW */ - if (xhci->quirks & XHCI_SPURIOUS_WAKEUP) + + /* + * Workaround for spurious wakeps at shutdown with HSW, and for boot + * firmware delay in ADL-P PCH if port are left in U3 at shutdown + */ + if (xhci->quirks & XHCI_SPURIOUS_WAKEUP || + xhci->quirks & XHCI_RESET_TO_DEFAULT) xhci_reset(xhci, XHCI_RESET_SHORT_USEC); + spin_unlock_irq(&xhci->lock); xhci_cleanup_msix(xhci); diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index e668740000b2..059050f13522 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1889,6 +1889,7 @@ struct xhci_hcd { #define XHCI_NO_SOFT_RETRY BIT_ULL(40) #define XHCI_EP_CTX_BROKEN_DCS BIT_ULL(42) #define XHCI_SUSPEND_RESUME_CLKS BIT_ULL(43) +#define XHCI_RESET_TO_DEFAULT BIT_ULL(44) unsigned int num_active_eps; unsigned int limit_active_eps; From 678d2cc2041cc6ce05030852dce9ad42719abcfc Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Mon, 24 Oct 2022 17:27:20 +0300 Subject: [PATCH 0482/1477] xhci: Remove device endpoints from bandwidth list when freeing the device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5aed5b7c2430ce318a8e62f752f181e66f0d1053 upstream. Endpoints are normally deleted from the bandwidth list when they are dropped, before the virt device is freed. If xHC host is dying or being removed then the endpoints aren't dropped cleanly due to functions returning early to avoid interacting with a non-accessible host controller. So check and delete endpoints that are still on the bandwidth list when freeing the virt device. Solves a list_del corruption kernel crash when unbinding xhci-pci, caused by xhci_mem_cleanup() when it later tried to delete already freed endpoints from the bandwidth list. This only affects hosts that use software bandwidth checking, which currenty is only the xHC in intel Panther Point PCH (Ivy Bridge) Cc: stable@vger.kernel.org Reported-by: Marek Marczykowski-Górecki Tested-by: Marek Marczykowski-Górecki Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20221024142720.4122053-5-mathias.nyman@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mem.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 1fba5605a88e..d1a42300ae58 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -915,15 +915,19 @@ void xhci_free_virt_device(struct xhci_hcd *xhci, int slot_id) if (dev->eps[i].stream_info) xhci_free_stream_info(xhci, dev->eps[i].stream_info); - /* Endpoints on the TT/root port lists should have been removed - * when usb_disable_device() was called for the device. - * We can't drop them anyway, because the udev might have gone - * away by this point, and we can't tell what speed it was. + /* + * Endpoints are normally deleted from the bandwidth list when + * endpoints are dropped, before device is freed. + * If host is dying or being removed then endpoints aren't + * dropped cleanly, so delete the endpoint from list here. + * Only applicable for hosts with software bandwidth checking. */ - if (!list_empty(&dev->eps[i].bw_endpoint_list)) - xhci_warn(xhci, "Slot %u endpoint %u " - "not removed from BW list!\n", - slot_id, i); + + if (!list_empty(&dev->eps[i].bw_endpoint_list)) { + list_del_init(&dev->eps[i].bw_endpoint_list); + xhci_dbg(xhci, "Slot %u endpoint %u not removed from BW list!\n", + slot_id, i); + } } /* If this is a hub, free the TT(s) from the TT list */ xhci_free_tt_info(xhci, dev, slot_id); From 90ff5bef2bc7e55862df3c623175a9ee125e7715 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Thu, 13 Oct 2022 15:04:04 +0300 Subject: [PATCH 0483/1477] tools: iio: iio_utils: fix digit calculation commit 72b2aa38191bcba28389b0e20bf6b4f15017ff2b upstream. The iio_utils uses a digit calculation in order to know length of the file name containing a buffer number. The digit calculation does not work for number 0. This leads to allocation of one character too small buffer for the file-name when file name contains value '0'. (Eg. buffer0). Fix digit calculation by returning one digit to be present for number '0'. Fixes: 096f9b862e60 ("tools:iio:iio_utils: implement digit calculation") Signed-off-by: Matti Vaittinen Link: https://lore.kernel.org/r/Y0f+tKCz+ZAIoroQ@dc75zzyyyyyyyyyyyyycy-3.rev.dnainternet.fi Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- tools/iio/iio_utils.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/iio/iio_utils.c b/tools/iio/iio_utils.c index 7399eb7f1378..d66b18c54606 100644 --- a/tools/iio/iio_utils.c +++ b/tools/iio/iio_utils.c @@ -543,6 +543,10 @@ static int calc_digits(int num) { int count = 0; + /* It takes a digit to represent zero */ + if (!num) + return 1; + while (num != 0) { num /= 10; count++; From af236da8552ecbe0ecb779411611826e87d7a831 Mon Sep 17 00:00:00 2001 From: Shreeya Patel Date: Fri, 26 Aug 2022 17:53:52 +0530 Subject: [PATCH 0484/1477] iio: light: tsl2583: Fix module unloading commit 0dec4d2f2636b9e54d9d29f17afc7687c5407f78 upstream. tsl2583 probe() uses devm_iio_device_register() and calling iio_device_unregister() causes the unregister to occur twice. s Switch to iio_device_register() instead of devm_iio_device_register() in probe to avoid the device managed cleanup. Fixes: 371894f5d1a0 ("iio: tsl2583: add runtime power management support") Signed-off-by: Shreeya Patel Link: https://lore.kernel.org/r/20220826122352.288438-1-shreeya.patel@collabora.com Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/light/tsl2583.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/light/tsl2583.c b/drivers/iio/light/tsl2583.c index 40b7dd266b31..e39d512145a6 100644 --- a/drivers/iio/light/tsl2583.c +++ b/drivers/iio/light/tsl2583.c @@ -856,7 +856,7 @@ static int tsl2583_probe(struct i2c_client *clientp, TSL2583_POWER_OFF_DELAY_MS); pm_runtime_use_autosuspend(&clientp->dev); - ret = devm_iio_device_register(indio_dev->dev.parent, indio_dev); + ret = iio_device_register(indio_dev); if (ret) { dev_err(&clientp->dev, "%s: iio registration failed\n", __func__); From f19f1a75d378c2d87d98a02e6cb9ffcdc5c9af73 Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Fri, 14 Oct 2022 15:37:22 +0300 Subject: [PATCH 0485/1477] iio: temperature: ltc2983: allocate iio channels once commit 4132f19173211856d35180958d2754f5c56d520a upstream. Currently, every time the device wakes up from sleep, the iio_chan array is reallocated, leaking the previous one until the device is removed (basically never). Move the allocation to the probe function to avoid this. Signed-off-by: Cosmin Tanislav Fixes: f110f3188e563 ("iio: temperature: Add support for LTC2983") Cc: Link: https://lore.kernel.org/r/20221014123724.1401011-2-demonsingur@gmail.com Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/temperature/ltc2983.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/iio/temperature/ltc2983.c b/drivers/iio/temperature/ltc2983.c index 3b4a0e60e605..8306daa77908 100644 --- a/drivers/iio/temperature/ltc2983.c +++ b/drivers/iio/temperature/ltc2983.c @@ -1376,13 +1376,6 @@ static int ltc2983_setup(struct ltc2983_data *st, bool assign_iio) return ret; } - st->iio_chan = devm_kzalloc(&st->spi->dev, - st->iio_channels * sizeof(*st->iio_chan), - GFP_KERNEL); - - if (!st->iio_chan) - return -ENOMEM; - ret = regmap_update_bits(st->regmap, LTC2983_GLOBAL_CONFIG_REG, LTC2983_NOTCH_FREQ_MASK, LTC2983_NOTCH_FREQ(st->filter_notch_freq)); @@ -1494,6 +1487,12 @@ static int ltc2983_probe(struct spi_device *spi) if (ret) return ret; + st->iio_chan = devm_kzalloc(&spi->dev, + st->iio_channels * sizeof(*st->iio_chan), + GFP_KERNEL); + if (!st->iio_chan) + return -ENOMEM; + ret = ltc2983_setup(st, true); if (ret) return ret; From d9ddfeb01fb95ffbbc7031d46a5ee2a5e45cbb86 Mon Sep 17 00:00:00 2001 From: Hyunwoo Kim Date: Thu, 20 Oct 2022 18:15:44 -0700 Subject: [PATCH 0486/1477] fbdev: smscufx: Fix several use-after-free bugs commit cc67482c9e5f2c80d62f623bcc347c29f9f648e1 upstream. Several types of UAFs can occur when physically removing a USB device. Adds ufx_ops_destroy() function to .fb_destroy of fb_ops, and in this function, there is kref_put() that finally calls ufx_free(). This fix prevents multiple UAFs. Signed-off-by: Hyunwoo Kim Link: https://lore.kernel.org/linux-fbdev/20221011153436.GA4446@ubuntu/ Cc: Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/smscufx.c | 57 +++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 26 deletions(-) diff --git a/drivers/video/fbdev/smscufx.c b/drivers/video/fbdev/smscufx.c index 7673db5da26b..5fa3f1e5dfe8 100644 --- a/drivers/video/fbdev/smscufx.c +++ b/drivers/video/fbdev/smscufx.c @@ -97,7 +97,6 @@ struct ufx_data { struct kref kref; int fb_count; bool virtualized; /* true when physical usb device not present */ - struct delayed_work free_framebuffer_work; atomic_t usb_active; /* 0 = update virtual buffer, but no usb traffic */ atomic_t lost_pixels; /* 1 = a render op failed. Need screen refresh */ u8 *edid; /* null until we read edid from hw or get from sysfs */ @@ -1116,15 +1115,24 @@ static void ufx_free(struct kref *kref) { struct ufx_data *dev = container_of(kref, struct ufx_data, kref); - /* this function will wait for all in-flight urbs to complete */ - if (dev->urbs.count > 0) - ufx_free_urb_list(dev); - - pr_debug("freeing ufx_data %p", dev); - kfree(dev); } +static void ufx_ops_destory(struct fb_info *info) +{ + struct ufx_data *dev = info->par; + int node = info->node; + + /* Assume info structure is freed after this point */ + framebuffer_release(info); + + pr_debug("fb_info for /dev/fb%d has been freed", node); + + /* release reference taken by kref_init in probe() */ + kref_put(&dev->kref, ufx_free); +} + + static void ufx_release_urb_work(struct work_struct *work) { struct urb_node *unode = container_of(work, struct urb_node, @@ -1133,14 +1141,9 @@ static void ufx_release_urb_work(struct work_struct *work) up(&unode->dev->urbs.limit_sem); } -static void ufx_free_framebuffer_work(struct work_struct *work) +static void ufx_free_framebuffer(struct ufx_data *dev) { - struct ufx_data *dev = container_of(work, struct ufx_data, - free_framebuffer_work.work); struct fb_info *info = dev->info; - int node = info->node; - - unregister_framebuffer(info); if (info->cmap.len != 0) fb_dealloc_cmap(&info->cmap); @@ -1152,11 +1155,6 @@ static void ufx_free_framebuffer_work(struct work_struct *work) dev->info = NULL; - /* Assume info structure is freed after this point */ - framebuffer_release(info); - - pr_debug("fb_info for /dev/fb%d has been freed", node); - /* ref taken in probe() as part of registering framebfufer */ kref_put(&dev->kref, ufx_free); } @@ -1168,11 +1166,13 @@ static int ufx_ops_release(struct fb_info *info, int user) { struct ufx_data *dev = info->par; + mutex_lock(&disconnect_mutex); + dev->fb_count--; /* We can't free fb_info here - fbmem will touch it when we return */ if (dev->virtualized && (dev->fb_count == 0)) - schedule_delayed_work(&dev->free_framebuffer_work, HZ); + ufx_free_framebuffer(dev); if ((dev->fb_count == 0) && (info->fbdefio)) { fb_deferred_io_cleanup(info); @@ -1185,6 +1185,8 @@ static int ufx_ops_release(struct fb_info *info, int user) kref_put(&dev->kref, ufx_free); + mutex_unlock(&disconnect_mutex); + return 0; } @@ -1291,6 +1293,7 @@ static const struct fb_ops ufx_ops = { .fb_blank = ufx_ops_blank, .fb_check_var = ufx_ops_check_var, .fb_set_par = ufx_ops_set_par, + .fb_destroy = ufx_ops_destory, }; /* Assumes &info->lock held by caller @@ -1672,9 +1675,6 @@ static int ufx_usb_probe(struct usb_interface *interface, goto destroy_modedb; } - INIT_DELAYED_WORK(&dev->free_framebuffer_work, - ufx_free_framebuffer_work); - retval = ufx_reg_read(dev, 0x3000, &id_rev); check_warn_goto_error(retval, "error %d reading 0x3000 register from device", retval); dev_dbg(dev->gdev, "ID_REV register value 0x%08x", id_rev); @@ -1747,10 +1747,12 @@ e_nomem: static void ufx_usb_disconnect(struct usb_interface *interface) { struct ufx_data *dev; + struct fb_info *info; mutex_lock(&disconnect_mutex); dev = usb_get_intfdata(interface); + info = dev->info; pr_debug("USB disconnect starting\n"); @@ -1764,12 +1766,15 @@ static void ufx_usb_disconnect(struct usb_interface *interface) /* if clients still have us open, will be freed on last close */ if (dev->fb_count == 0) - schedule_delayed_work(&dev->free_framebuffer_work, 0); + ufx_free_framebuffer(dev); - /* release reference taken by kref_init in probe() */ - kref_put(&dev->kref, ufx_free); + /* this function will wait for all in-flight urbs to complete */ + if (dev->urbs.count > 0) + ufx_free_urb_list(dev); - /* consider ufx_data freed */ + pr_debug("freeing ufx_data %p", dev); + + unregister_framebuffer(info); mutex_unlock(&disconnect_mutex); } From 706215300411d48db6b51a5832b872632a84bbc1 Mon Sep 17 00:00:00 2001 From: Li Zetao Date: Mon, 24 Oct 2022 23:44:21 +0800 Subject: [PATCH 0487/1477] fs/binfmt_elf: Fix memory leak in load_elf_binary() commit 594d2a14f2168c09b13b114c3d457aa939403e52 upstream. There is a memory leak reported by kmemleak: unreferenced object 0xffff88817104ef80 (size 224): comm "xfs_admin", pid 47165, jiffies 4298708825 (age 1333.476s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 60 a8 b3 00 81 88 ff ff a8 10 5a 00 81 88 ff ff `.........Z..... backtrace: [] __alloc_file+0x21/0x250 [] alloc_empty_file+0x41/0xf0 [] path_openat+0xea/0x3d30 [] do_filp_open+0x1b9/0x290 [] do_open_execat+0xce/0x5b0 [] open_exec+0x27/0x50 [] load_elf_binary+0x510/0x3ed0 [] bprm_execve+0x599/0x1240 [] do_execveat_common.isra.0+0x4c7/0x680 [] __x64_sys_execve+0x88/0xb0 [] do_syscall_64+0x35/0x80 If "interp_elf_ex" fails to allocate memory in load_elf_binary(), the program will take the "out_free_ph" error handing path, resulting in "interpreter" file resource is not released. Fix it by adding an error handing path "out_free_file", which will release the file resource when "interp_elf_ex" failed to allocate memory. Fixes: 0693ffebcfe5 ("fs/binfmt_elf.c: allocate less for static executable") Signed-off-by: Li Zetao Reviewed-by: Alexey Dobriyan Signed-off-by: Kees Cook Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20221024154421.982230-1-lizetao1@huawei.com Signed-off-by: Greg Kroah-Hartman --- fs/binfmt_elf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 213864bc7e8c..ccc4c6d8a578 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -907,7 +907,7 @@ static int load_elf_binary(struct linux_binprm *bprm) interp_elf_ex = kmalloc(sizeof(*interp_elf_ex), GFP_KERNEL); if (!interp_elf_ex) { retval = -ENOMEM; - goto out_free_ph; + goto out_free_file; } /* Get the exec headers */ @@ -1328,6 +1328,7 @@ out: out_free_dentry: kfree(interp_elf_ex); kfree(interp_elf_phdata); +out_free_file: allow_write_access(interpreter); if (interpreter) fput(interpreter); From 9ba2990f4e8027256e2f24d361e44e987246949f Mon Sep 17 00:00:00 2001 From: Bernd Edlinger Date: Mon, 7 Jun 2021 15:54:27 +0200 Subject: [PATCH 0488/1477] exec: Copy oldsighand->action under spin-lock commit 5bf2fedca8f59379025b0d52f917b9ddb9bfe17e upstream. unshare_sighand should only access oldsighand->action while holding oldsighand->siglock, to make sure that newsighand->action is in a consistent state. Signed-off-by: Bernd Edlinger Cc: stable@vger.kernel.org Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/AM8PR10MB470871DEBD1DED081F9CC391E4389@AM8PR10MB4708.EURPRD10.PROD.OUTLOOK.COM Signed-off-by: Greg Kroah-Hartman --- fs/exec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index b56bc4b4016e..983295c0b8ac 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1198,11 +1198,11 @@ static int unshare_sighand(struct task_struct *me) return -ENOMEM; refcount_set(&newsighand->count, 1); - memcpy(newsighand->action, oldsighand->action, - sizeof(newsighand->action)); write_lock_irq(&tasklist_lock); spin_lock(&oldsighand->siglock); + memcpy(newsighand->action, oldsighand->action, + sizeof(newsighand->action)); rcu_assign_pointer(me->sighand, newsighand); spin_unlock(&oldsighand->siglock); write_unlock_irq(&tasklist_lock); From c368f751da8edfe50c31179ff084e205bcf79ea0 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 20 Oct 2022 16:25:35 +0200 Subject: [PATCH 0489/1477] mac802154: Fix LQI recording commit 5a5c4e06fd03b595542d5590f2bc05a6b7fc5c2b upstream. Back in 2014, the LQI was saved in the skb control buffer (skb->cb, or mac_cb(skb)) without any actual reset of this area prior to its use. As part of a useful rework of the use of this region, 32edc40ae65c ("ieee802154: change _cb handling slightly") introduced mac_cb_init() to basically memset the cb field to 0. In particular, this new function got called at the beginning of mac802154_parse_frame_start(), right before the location where the buffer got actually filled. What went through unnoticed however, is the fact that the very first helper called by device drivers in the receive path already used this area to save the LQI value for later extraction. Resetting the cb field "so late" led to systematically zeroing the LQI. If we consider the reset of the cb field needed, we can make it as soon as we get an skb from a device driver, right before storing the LQI, as is the very first time we need to write something there. Cc: stable@vger.kernel.org Fixes: 32edc40ae65c ("ieee802154: change _cb handling slightly") Signed-off-by: Miquel Raynal Acked-by: Alexander Aring Link: https://lore.kernel.org/r/20221020142535.1038885-1-miquel.raynal@bootlin.com Signed-off-by: Stefan Schmidt Signed-off-by: Greg Kroah-Hartman --- net/mac802154/rx.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c index c439125ef2b9..726b47a4611b 100644 --- a/net/mac802154/rx.c +++ b/net/mac802154/rx.c @@ -132,7 +132,7 @@ static int ieee802154_parse_frame_start(struct sk_buff *skb, struct ieee802154_hdr *hdr) { int hlen; - struct ieee802154_mac_cb *cb = mac_cb_init(skb); + struct ieee802154_mac_cb *cb = mac_cb(skb); skb_reset_mac_header(skb); @@ -294,8 +294,9 @@ void ieee802154_rx_irqsafe(struct ieee802154_hw *hw, struct sk_buff *skb, u8 lqi) { struct ieee802154_local *local = hw_to_local(hw); + struct ieee802154_mac_cb *cb = mac_cb_init(skb); - mac_cb(skb)->lqi = lqi; + cb->lqi = lqi; skb->pkt_type = IEEE802154_RX_MSG; skb_queue_tail(&local->skb_queue, skb); tasklet_schedule(&local->tasklet); From 44a86d96fac89f9a5b3de50215c1ed41872b5126 Mon Sep 17 00:00:00 2001 From: Manish Rangankar Date: Tue, 27 Sep 2022 04:59:46 -0700 Subject: [PATCH 0490/1477] scsi: qla2xxx: Use transport-defined speed mask for supported_speeds commit 0b863257c17c5f57a41e0a48de140ed026957a63 upstream. One of the sysfs values reported for supported_speeds was not valid (20Gb/s reported instead of 64Gb/s). Instead of driver internal speed mask definition, use speed mask defined in transport_fc for reporting host->supported_speeds. Link: https://lore.kernel.org/r/20220927115946.17559-1-njavali@marvell.com Cc: stable@vger.kernel.org Reviewed-by: Himanshu Madhani Signed-off-by: Manish Rangankar Signed-off-by: Nilesh Javali Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_attr.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index d0407f44de78..61b9dc511d90 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -3262,11 +3262,34 @@ struct fc_function_template qla2xxx_transport_vport_functions = { .bsg_timeout = qla24xx_bsg_timeout, }; +static uint +qla2x00_get_host_supported_speeds(scsi_qla_host_t *vha, uint speeds) +{ + uint supported_speeds = FC_PORTSPEED_UNKNOWN; + + if (speeds & FDMI_PORT_SPEED_64GB) + supported_speeds |= FC_PORTSPEED_64GBIT; + if (speeds & FDMI_PORT_SPEED_32GB) + supported_speeds |= FC_PORTSPEED_32GBIT; + if (speeds & FDMI_PORT_SPEED_16GB) + supported_speeds |= FC_PORTSPEED_16GBIT; + if (speeds & FDMI_PORT_SPEED_8GB) + supported_speeds |= FC_PORTSPEED_8GBIT; + if (speeds & FDMI_PORT_SPEED_4GB) + supported_speeds |= FC_PORTSPEED_4GBIT; + if (speeds & FDMI_PORT_SPEED_2GB) + supported_speeds |= FC_PORTSPEED_2GBIT; + if (speeds & FDMI_PORT_SPEED_1GB) + supported_speeds |= FC_PORTSPEED_1GBIT; + + return supported_speeds; +} + void qla2x00_init_host_attr(scsi_qla_host_t *vha) { struct qla_hw_data *ha = vha->hw; - u32 speeds = FC_PORTSPEED_UNKNOWN; + u32 speeds = 0, fdmi_speed = 0; fc_host_dev_loss_tmo(vha->host) = ha->port_down_retry_count; fc_host_node_name(vha->host) = wwn_to_u64(vha->node_name); @@ -3276,7 +3299,8 @@ qla2x00_init_host_attr(scsi_qla_host_t *vha) fc_host_max_npiv_vports(vha->host) = ha->max_npiv_vports; fc_host_npiv_vports_inuse(vha->host) = ha->cur_vport_count; - speeds = qla25xx_fdmi_port_speed_capability(ha); + fdmi_speed = qla25xx_fdmi_port_speed_capability(ha); + speeds = qla2x00_get_host_supported_speeds(vha, fdmi_speed); fc_host_supported_speeds(vha->host) = speeds; } From 21c4679af01f1027cb559330c2e7d410089b2b36 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 13 Sep 2022 10:53:13 +0200 Subject: [PATCH 0491/1477] drm/msm/dsi: fix memory corruption with too many bridges commit 2e786eb2f9cebb07e317226b60054df510b60c65 upstream. Add the missing sanity check on the bridge counter to avoid corrupting data beyond the fixed-sized bridge array in case there are ever more than eight bridges. Fixes: a689554ba6ed ("drm/msm: Initial add DSI connector support") Cc: stable@vger.kernel.org # 4.1 Signed-off-by: Johan Hovold Tested-by: Kuogee Hsieh Reviewed-by: Kuogee Hsieh Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/502668/ Link: https://lore.kernel.org/r/20220913085320.8577-4-johan+linaro@kernel.org Signed-off-by: Abhinav Kumar Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/msm/dsi/dsi.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/msm/dsi/dsi.c b/drivers/gpu/drm/msm/dsi/dsi.c index f845333593da..7377596a13f4 100644 --- a/drivers/gpu/drm/msm/dsi/dsi.c +++ b/drivers/gpu/drm/msm/dsi/dsi.c @@ -205,6 +205,12 @@ int msm_dsi_modeset_init(struct msm_dsi *msm_dsi, struct drm_device *dev, return -EINVAL; priv = dev->dev_private; + + if (priv->num_bridges == ARRAY_SIZE(priv->bridges)) { + DRM_DEV_ERROR(dev->dev, "too many bridges\n"); + return -ENOSPC; + } + msm_dsi->dev = dev; ret = msm_dsi_host_modeset_init(msm_dsi->host, dev); From 08c7375fa27a8ceee028868e03ffb3a0db919d44 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 13 Sep 2022 10:53:14 +0200 Subject: [PATCH 0492/1477] drm/msm/hdmi: fix memory corruption with too many bridges commit 4c1294da6aed1f16d47a417dcfe6602833c3c95c upstream. Add the missing sanity check on the bridge counter to avoid corrupting data beyond the fixed-sized bridge array in case there are ever more than eight bridges. Fixes: a3376e3ec81c ("drm/msm: convert to drm_bridge") Cc: stable@vger.kernel.org # 3.12 Signed-off-by: Johan Hovold Tested-by: Kuogee Hsieh Reviewed-by: Kuogee Hsieh Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/502670/ Link: https://lore.kernel.org/r/20220913085320.8577-5-johan+linaro@kernel.org Signed-off-by: Abhinav Kumar Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/msm/hdmi/hdmi.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.c b/drivers/gpu/drm/msm/hdmi/hdmi.c index 28b33b35a30c..47796e12b432 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi.c @@ -293,6 +293,11 @@ int msm_hdmi_modeset_init(struct hdmi *hdmi, struct platform_device *pdev = hdmi->pdev; int ret; + if (priv->num_bridges == ARRAY_SIZE(priv->bridges)) { + DRM_DEV_ERROR(dev->dev, "too many bridges\n"); + return -ENOSPC; + } + hdmi->dev = dev; hdmi->encoder = encoder; From b686ffc0acb859f288535ce1a00b102ab8a66f7a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 13 Sep 2022 10:53:15 +0200 Subject: [PATCH 0493/1477] drm/msm/dp: fix IRQ lifetime commit a79343dcaba4b11adb57350e0b6426906a9b658e upstream. Device-managed resources allocated post component bind must be tied to the lifetime of the aggregate DRM device or they will not necessarily be released when binding of the aggregate device is deferred. This is specifically true for the DP IRQ, which will otherwise remain requested so that the next bind attempt fails when requesting the IRQ a second time. Since commit c3bf8e21b38a ("drm/msm/dp: Add eDP support via aux_bus") this can happen when the aux-bus panel driver has not yet been loaded so that probe is deferred. Fix this by tying the device-managed lifetime of the DP IRQ to the DRM device so that it is released when bind fails. Fixes: c943b4948b58 ("drm/msm/dp: add displayPort driver support") Cc: stable@vger.kernel.org # 5.10 Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Tested-by: Kuogee Hsieh Reviewed-by: Kuogee Hsieh Patchwork: https://patchwork.freedesktop.org/patch/502679/ Link: https://lore.kernel.org/r/20220913085320.8577-6-johan+linaro@kernel.org Signed-off-by: Abhinav Kumar Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/msm/dp/dp_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index a3de1d0523ea..5a152d505dfb 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -1201,7 +1201,7 @@ int dp_display_request_irq(struct msm_dp *dp_display) return -EINVAL; } - rc = devm_request_irq(&dp->pdev->dev, dp->irq, + rc = devm_request_irq(dp_display->drm_dev->dev, dp->irq, dp_display_irq_handler, IRQF_TRIGGER_HIGH, "dp_display_isr", dp); if (rc < 0) { From 5684808b269b1d05df7bca9d46baa32e31dc23dc Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 24 Oct 2022 11:02:59 -0700 Subject: [PATCH 0494/1477] mmc: sdhci_am654: 'select', not 'depends' REGMAP_MMIO commit 8d280b1df87e0b3d1355aeac7e62b62214b93f1c upstream. REGMAP_MMIO is not user-configurable, so we can only satisfy this dependency by enabling some other Kconfig symbol that properly 'select's it. Use select like everybody else. Noticed when trying to enable this driver for compile testing. Fixes: 59592cc1f593 ("mmc: sdhci_am654: Add dependency on MMC_SDHCI_AM654") Signed-off-by: Brian Norris Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20221024180300.2292208-1-briannorris@chromium.org Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig index 30ff42fd173e..82e1fbd6b2ff 100644 --- a/drivers/mmc/host/Kconfig +++ b/drivers/mmc/host/Kconfig @@ -1079,9 +1079,10 @@ config MMC_SDHCI_OMAP config MMC_SDHCI_AM654 tristate "Support for the SDHCI Controller in TI's AM654 SOCs" - depends on MMC_SDHCI_PLTFM && OF && REGMAP_MMIO + depends on MMC_SDHCI_PLTFM && OF select MMC_SDHCI_IO_ACCESSORS select MMC_CQHCI + select REGMAP_MMIO help This selects the Secure Digital Host Controller Interface (SDHCI) support present in TI's AM654 SOCs. The controller supports From 8bf037279b5869ae9331c42bb1527d2680ebba96 Mon Sep 17 00:00:00 2001 From: Matthew Ma Date: Fri, 14 Oct 2022 11:49:51 +0800 Subject: [PATCH 0495/1477] mmc: core: Fix kernel panic when remove non-standard SDIO card commit 9972e6b404884adae9eec7463e30d9b3c9a70b18 upstream. SDIO tuple is only allocated for standard SDIO card, especially it causes memory corruption issues when the non-standard SDIO card has removed, which is because the card device's reference counter does not increase for it at sdio_init_func(), but all SDIO card device reference counter gets decreased at sdio_release_func(). Fixes: 6f51be3d37df ("sdio: allow non-standard SDIO cards") Signed-off-by: Matthew Ma Reviewed-by: Weizhao Ouyang Reviewed-by: John Wang Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20221014034951.2300386-1-ouyangweizhao@zeku.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/sdio_bus.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/core/sdio_bus.c b/drivers/mmc/core/sdio_bus.c index 3d709029e07c..a448535c1265 100644 --- a/drivers/mmc/core/sdio_bus.c +++ b/drivers/mmc/core/sdio_bus.c @@ -292,7 +292,8 @@ static void sdio_release_func(struct device *dev) { struct sdio_func *func = dev_to_sdio_func(dev); - sdio_free_func_cis(func); + if (!(func->card->quirks & MMC_QUIRK_NONSTD_SDIO)) + sdio_free_func_cis(func); kfree(func->info); kfree(func->tmpbuf); From 0bcd1ab3e8b3e897141e6e757a3ca00040bd49b8 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Tue, 18 Oct 2022 08:10:14 -0400 Subject: [PATCH 0496/1477] counter: microchip-tcb-capture: Handle Signal1 read and Synapse commit d917a62af81b133f35f627e7936e193c842a7947 upstream. The signal_read(), action_read(), and action_write() callbacks have been assuming Signal0 is requested without checking. This results in requests for Signal1 returning data for Signal0. This patch fixes these oversights by properly checking for the Signal's id in the respective callbacks and handling accordingly based on the particular Signal requested. The trig_inverted member of the mchp_tc_data is removed as superfluous. Fixes: 106b104137fd ("counter: Add microchip TCB capture counter") Cc: stable@vger.kernel.org Reviewed-by: Kamel Bouhara Link: https://lore.kernel.org/r/20221018121014.7368-1-william.gray@linaro.org/ Signed-off-by: William Breathitt Gray Signed-off-by: Greg Kroah-Hartman --- drivers/counter/microchip-tcb-capture.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/counter/microchip-tcb-capture.c b/drivers/counter/microchip-tcb-capture.c index 710acc0a3704..85fbbac06d31 100644 --- a/drivers/counter/microchip-tcb-capture.c +++ b/drivers/counter/microchip-tcb-capture.c @@ -29,7 +29,6 @@ struct mchp_tc_data { int qdec_mode; int num_channels; int channel[2]; - bool trig_inverted; }; enum mchp_tc_count_function { @@ -163,7 +162,7 @@ static int mchp_tc_count_signal_read(struct counter_device *counter, regmap_read(priv->regmap, ATMEL_TC_REG(priv->channel[0], SR), &sr); - if (priv->trig_inverted) + if (signal->id == 1) sigstatus = (sr & ATMEL_TC_MTIOB); else sigstatus = (sr & ATMEL_TC_MTIOA); @@ -181,6 +180,17 @@ static int mchp_tc_count_action_get(struct counter_device *counter, struct mchp_tc_data *const priv = counter->priv; u32 cmr; + if (priv->qdec_mode) { + *action = COUNTER_SYNAPSE_ACTION_BOTH_EDGES; + return 0; + } + + /* Only TIOA signal is evaluated in non-QDEC mode */ + if (synapse->signal->id != 0) { + *action = COUNTER_SYNAPSE_ACTION_NONE; + return 0; + } + regmap_read(priv->regmap, ATMEL_TC_REG(priv->channel[0], CMR), &cmr); switch (cmr & ATMEL_TC_ETRGEDG) { @@ -209,8 +219,8 @@ static int mchp_tc_count_action_set(struct counter_device *counter, struct mchp_tc_data *const priv = counter->priv; u32 edge = ATMEL_TC_ETRGEDG_NONE; - /* QDEC mode is rising edge only */ - if (priv->qdec_mode) + /* QDEC mode is rising edge only; only TIOA handled in non-QDEC mode */ + if (priv->qdec_mode || synapse->signal->id != 0) return -EINVAL; switch (action) { From 6f72a3977ba9d0e5491a5c01315204272e7f9c44 Mon Sep 17 00:00:00 2001 From: "Christian A. Ehrhardt" Date: Tue, 13 Sep 2022 14:17:23 +0200 Subject: [PATCH 0497/1477] kernfs: fix use-after-free in __kernfs_remove commit 4abc99652812a2ddf932f137515d5c5a04723538 upstream. Syzkaller managed to trigger concurrent calls to kernfs_remove_by_name_ns() for the same file resulting in a KASAN detected use-after-free. The race occurs when the root node is freed during kernfs_drain(). To prevent this acquire an additional reference for the root of the tree that is removed before calling __kernfs_remove(). Found by syzkaller with the following reproducer (slab_nomerge is required): syz_mount_image$ext4(0x0, &(0x7f0000000100)='./file0\x00', 0x100000, 0x0, 0x0, 0x0, 0x0) r0 = openat(0xffffffffffffff9c, &(0x7f0000000080)='/proc/self/exe\x00', 0x0, 0x0) close(r0) pipe2(&(0x7f0000000140)={0xffffffffffffffff, 0xffffffffffffffff}, 0x800) mount$9p_fd(0x0, &(0x7f0000000040)='./file0\x00', &(0x7f00000000c0), 0x408, &(0x7f0000000280)={'trans=fd,', {'rfdno', 0x3d, r0}, 0x2c, {'wfdno', 0x3d, r1}, 0x2c, {[{@cache_loose}, {@mmap}, {@loose}, {@loose}, {@mmap}], [{@mask={'mask', 0x3d, '^MAY_EXEC'}}, {@fsmagic={'fsmagic', 0x3d, 0x10001}}, {@dont_hash}]}}) Sample report: ================================================================== BUG: KASAN: use-after-free in kernfs_type include/linux/kernfs.h:335 [inline] BUG: KASAN: use-after-free in kernfs_leftmost_descendant fs/kernfs/dir.c:1261 [inline] BUG: KASAN: use-after-free in __kernfs_remove.part.0+0x843/0x960 fs/kernfs/dir.c:1369 Read of size 2 at addr ffff8880088807f0 by task syz-executor.2/857 CPU: 0 PID: 857 Comm: syz-executor.2 Not tainted 6.0.0-rc3-00363-g7726d4c3e60b #5 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x6e/0x91 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:317 [inline] print_report.cold+0x5e/0x5e5 mm/kasan/report.c:433 kasan_report+0xa3/0x130 mm/kasan/report.c:495 kernfs_type include/linux/kernfs.h:335 [inline] kernfs_leftmost_descendant fs/kernfs/dir.c:1261 [inline] __kernfs_remove.part.0+0x843/0x960 fs/kernfs/dir.c:1369 __kernfs_remove fs/kernfs/dir.c:1356 [inline] kernfs_remove_by_name_ns+0x108/0x190 fs/kernfs/dir.c:1589 sysfs_slab_add+0x133/0x1e0 mm/slub.c:5943 __kmem_cache_create+0x3e0/0x550 mm/slub.c:4899 create_cache mm/slab_common.c:229 [inline] kmem_cache_create_usercopy+0x167/0x2a0 mm/slab_common.c:335 p9_client_create+0xd4d/0x1190 net/9p/client.c:993 v9fs_session_init+0x1e6/0x13c0 fs/9p/v9fs.c:408 v9fs_mount+0xb9/0xbd0 fs/9p/vfs_super.c:126 legacy_get_tree+0xf1/0x200 fs/fs_context.c:610 vfs_get_tree+0x85/0x2e0 fs/super.c:1530 do_new_mount fs/namespace.c:3040 [inline] path_mount+0x675/0x1d00 fs/namespace.c:3370 do_mount fs/namespace.c:3383 [inline] __do_sys_mount fs/namespace.c:3591 [inline] __se_sys_mount fs/namespace.c:3568 [inline] __x64_sys_mount+0x282/0x300 fs/namespace.c:3568 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x38/0x90 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7f725f983aed Code: 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f725f0f7028 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5 RAX: ffffffffffffffda RBX: 00007f725faa3f80 RCX: 00007f725f983aed RDX: 00000000200000c0 RSI: 0000000020000040 RDI: 0000000000000000 RBP: 00007f725f9f419c R08: 0000000020000280 R09: 0000000000000000 R10: 0000000000000408 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000006 R14: 00007f725faa3f80 R15: 00007f725f0d7000 Allocated by task 855: kasan_save_stack+0x1e/0x40 mm/kasan/common.c:38 kasan_set_track mm/kasan/common.c:45 [inline] set_alloc_info mm/kasan/common.c:437 [inline] __kasan_slab_alloc+0x66/0x80 mm/kasan/common.c:470 kasan_slab_alloc include/linux/kasan.h:224 [inline] slab_post_alloc_hook mm/slab.h:727 [inline] slab_alloc_node mm/slub.c:3243 [inline] slab_alloc mm/slub.c:3251 [inline] __kmem_cache_alloc_lru mm/slub.c:3258 [inline] kmem_cache_alloc+0xbf/0x200 mm/slub.c:3268 kmem_cache_zalloc include/linux/slab.h:723 [inline] __kernfs_new_node+0xd4/0x680 fs/kernfs/dir.c:593 kernfs_new_node fs/kernfs/dir.c:655 [inline] kernfs_create_dir_ns+0x9c/0x220 fs/kernfs/dir.c:1010 sysfs_create_dir_ns+0x127/0x290 fs/sysfs/dir.c:59 create_dir lib/kobject.c:63 [inline] kobject_add_internal+0x24a/0x8d0 lib/kobject.c:223 kobject_add_varg lib/kobject.c:358 [inline] kobject_init_and_add+0x101/0x160 lib/kobject.c:441 sysfs_slab_add+0x156/0x1e0 mm/slub.c:5954 __kmem_cache_create+0x3e0/0x550 mm/slub.c:4899 create_cache mm/slab_common.c:229 [inline] kmem_cache_create_usercopy+0x167/0x2a0 mm/slab_common.c:335 p9_client_create+0xd4d/0x1190 net/9p/client.c:993 v9fs_session_init+0x1e6/0x13c0 fs/9p/v9fs.c:408 v9fs_mount+0xb9/0xbd0 fs/9p/vfs_super.c:126 legacy_get_tree+0xf1/0x200 fs/fs_context.c:610 vfs_get_tree+0x85/0x2e0 fs/super.c:1530 do_new_mount fs/namespace.c:3040 [inline] path_mount+0x675/0x1d00 fs/namespace.c:3370 do_mount fs/namespace.c:3383 [inline] __do_sys_mount fs/namespace.c:3591 [inline] __se_sys_mount fs/namespace.c:3568 [inline] __x64_sys_mount+0x282/0x300 fs/namespace.c:3568 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x38/0x90 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Freed by task 857: kasan_save_stack+0x1e/0x40 mm/kasan/common.c:38 kasan_set_track+0x21/0x30 mm/kasan/common.c:45 kasan_set_free_info+0x20/0x40 mm/kasan/generic.c:370 ____kasan_slab_free mm/kasan/common.c:367 [inline] ____kasan_slab_free mm/kasan/common.c:329 [inline] __kasan_slab_free+0x108/0x190 mm/kasan/common.c:375 kasan_slab_free include/linux/kasan.h:200 [inline] slab_free_hook mm/slub.c:1754 [inline] slab_free_freelist_hook mm/slub.c:1780 [inline] slab_free mm/slub.c:3534 [inline] kmem_cache_free+0x9c/0x340 mm/slub.c:3551 kernfs_put.part.0+0x2b2/0x520 fs/kernfs/dir.c:547 kernfs_put+0x42/0x50 fs/kernfs/dir.c:521 __kernfs_remove.part.0+0x72d/0x960 fs/kernfs/dir.c:1407 __kernfs_remove fs/kernfs/dir.c:1356 [inline] kernfs_remove_by_name_ns+0x108/0x190 fs/kernfs/dir.c:1589 sysfs_slab_add+0x133/0x1e0 mm/slub.c:5943 __kmem_cache_create+0x3e0/0x550 mm/slub.c:4899 create_cache mm/slab_common.c:229 [inline] kmem_cache_create_usercopy+0x167/0x2a0 mm/slab_common.c:335 p9_client_create+0xd4d/0x1190 net/9p/client.c:993 v9fs_session_init+0x1e6/0x13c0 fs/9p/v9fs.c:408 v9fs_mount+0xb9/0xbd0 fs/9p/vfs_super.c:126 legacy_get_tree+0xf1/0x200 fs/fs_context.c:610 vfs_get_tree+0x85/0x2e0 fs/super.c:1530 do_new_mount fs/namespace.c:3040 [inline] path_mount+0x675/0x1d00 fs/namespace.c:3370 do_mount fs/namespace.c:3383 [inline] __do_sys_mount fs/namespace.c:3591 [inline] __se_sys_mount fs/namespace.c:3568 [inline] __x64_sys_mount+0x282/0x300 fs/namespace.c:3568 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x38/0x90 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd The buggy address belongs to the object at ffff888008880780 which belongs to the cache kernfs_node_cache of size 128 The buggy address is located 112 bytes inside of 128-byte region [ffff888008880780, ffff888008880800) The buggy address belongs to the physical page: page:00000000732833f8 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x8880 flags: 0x100000000000200(slab|node=0|zone=1) raw: 0100000000000200 0000000000000000 dead000000000122 ffff888001147280 raw: 0000000000000000 0000000000150015 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff888008880680: fc fc fc fc fc fc fc fc fa fb fb fb fb fb fb fb ffff888008880700: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc >ffff888008880780: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff888008880800: fc fc fc fc fc fc fc fc fa fb fb fb fb fb fb fb ffff888008880880: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc ================================================================== Acked-by: Tejun Heo Cc: stable # -rc3 Signed-off-by: Christian A. Ehrhardt Link: https://lore.kernel.org/r/20220913121723.691454-1-lk@c--e.de Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/dir.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index afb39e1bbe3b..8b3c86a502da 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -1519,8 +1519,11 @@ int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, mutex_lock(&kernfs_mutex); kn = kernfs_find_ns(parent, name, ns); - if (kn) + if (kn) { + kernfs_get(kn); __kernfs_remove(kn); + kernfs_put(kn); + } mutex_unlock(&kernfs_mutex); From 449070996ce6e5b1fc3b66538f04ab37c2c0591f Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 26 Oct 2022 10:27:36 +0300 Subject: [PATCH 0498/1477] perf auxtrace: Fix address filter symbol name match for modules commit cba04f3136b658583adb191556f99d087589c1cc upstream. For modules, names from kallsyms__parse() contain the module name which meant that module symbols did not match exactly by name. Fix by matching the name string up to the separating tab character. Fixes: 1b36c03e356936d6 ("perf record: Add support for using symbols in address filters") Signed-off-by: Adrian Hunter Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20221026072736.2982-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/auxtrace.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index d3c15b53495d..d96e86ddd2c5 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -2164,11 +2164,19 @@ struct sym_args { bool near; }; +static bool kern_sym_name_match(const char *kname, const char *name) +{ + size_t n = strlen(name); + + return !strcmp(kname, name) || + (!strncmp(kname, name, n) && kname[n] == '\t'); +} + static bool kern_sym_match(struct sym_args *args, const char *name, char type) { /* A function with the same name, and global or the n'th found or any */ return kallsyms__is_function(type) && - !strcmp(name, args->name) && + kern_sym_name_match(name, args->name) && ((args->global && isupper(type)) || (args->selected && ++(args->cnt) == args->idx) || (!args->global && !args->selected)); From fe187c801a44d9a6a796878f84a5ae92ed70bb7e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 18 Oct 2022 13:44:11 +0200 Subject: [PATCH 0499/1477] s390/futex: add missing EX_TABLE entry to __futex_atomic_op() commit a262d3ad6a433e4080cecd0a8841104a5906355e upstream. For some exception types the instruction address points behind the instruction that caused the exception. Take that into account and add the missing exception table entry. Cc: Reviewed-by: Vasily Gorbik Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Greg Kroah-Hartman --- arch/s390/include/asm/futex.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h index 26f9144562c9..e1d0b2aaaddd 100644 --- a/arch/s390/include/asm/futex.h +++ b/arch/s390/include/asm/futex.h @@ -16,7 +16,8 @@ "3: jl 1b\n" \ " lhi %0,0\n" \ "4: sacf 768\n" \ - EX_TABLE(0b,4b) EX_TABLE(2b,4b) EX_TABLE(3b,4b) \ + EX_TABLE(0b,4b) EX_TABLE(1b,4b) \ + EX_TABLE(2b,4b) EX_TABLE(3b,4b) \ : "=d" (ret), "=&d" (oldval), "=&d" (newval), \ "=m" (*uaddr) \ : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \ From 467230b9ef40526a3b0d0fc4587c2384f17e5a3b Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 18 Oct 2022 13:48:34 +0200 Subject: [PATCH 0500/1477] s390/pci: add missing EX_TABLE entries to __pcistg_mio_inuser()/__pcilg_mio_inuser() commit 6ec803025cf3173a57222e4411097166bd06fa98 upstream. For some exception types the instruction address points behind the instruction that caused the exception. Take that into account and add the missing exception table entry. Cc: Fixes: f058599e22d5 ("s390/pci: Fix s390_mmio_read/write with MIO") Reviewed-by: Niklas Schnelle Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Greg Kroah-Hartman --- arch/s390/pci/pci_mmio.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c index 37b1bbd1a27c..1ec8076209ca 100644 --- a/arch/s390/pci/pci_mmio.c +++ b/arch/s390/pci/pci_mmio.c @@ -64,7 +64,7 @@ static inline int __pcistg_mio_inuser( asm volatile ( " sacf 256\n" "0: llgc %[tmp],0(%[src])\n" - " sllg %[val],%[val],8\n" + "4: sllg %[val],%[val],8\n" " aghi %[src],1\n" " ogr %[val],%[tmp]\n" " brctg %[cnt],0b\n" @@ -72,7 +72,7 @@ static inline int __pcistg_mio_inuser( "2: ipm %[cc]\n" " srl %[cc],28\n" "3: sacf 768\n" - EX_TABLE(0b, 3b) EX_TABLE(1b, 3b) EX_TABLE(2b, 3b) + EX_TABLE(0b, 3b) EX_TABLE(4b, 3b) EX_TABLE(1b, 3b) EX_TABLE(2b, 3b) : [src] "+a" (src), [cnt] "+d" (cnt), [val] "+d" (val), [tmp] "=d" (tmp), @@ -222,10 +222,10 @@ static inline int __pcilg_mio_inuser( "2: ahi %[shift],-8\n" " srlg %[tmp],%[val],0(%[shift])\n" "3: stc %[tmp],0(%[dst])\n" - " aghi %[dst],1\n" + "5: aghi %[dst],1\n" " brctg %[cnt],2b\n" "4: sacf 768\n" - EX_TABLE(0b, 4b) EX_TABLE(1b, 4b) EX_TABLE(3b, 4b) + EX_TABLE(0b, 4b) EX_TABLE(1b, 4b) EX_TABLE(3b, 4b) EX_TABLE(5b, 4b) : [cc] "+d" (cc), [val] "=d" (val), [len] "+d" (len), [dst] "+a" (dst), [cnt] "+d" (cnt), [tmp] "=d" (tmp), From a3f2cc11d6b6cd25edce81fb5b15dfcfd15f82e7 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 17 Sep 2021 08:13:08 +0200 Subject: [PATCH 0501/1477] Xen/gntdev: don't ignore kernel unmapping error commit f28347cc66395e96712f5c2db0a302ee75bafce6 upstream. While working on XSA-361 and its follow-ups, I failed to spot another place where the kernel mapping part of an operation was not treated the same as the user space part. Detect and propagate errors and add a 2nd pr_debug(). Signed-off-by: Jan Beulich Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/c2513395-74dc-aea3-9192-fd265aa44e35@suse.com Signed-off-by: Juergen Gross Signed-off-by: Demi Marie Obenour Co-authored-by: Demi Marie Obenour Signed-off-by: Greg Kroah-Hartman --- drivers/xen/gntdev.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index ff195b571763..1578fd2186b3 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -395,6 +395,14 @@ static void __unmap_grant_pages_done(int result, map->unmap_ops[offset+i].handle, map->unmap_ops[offset+i].status); map->unmap_ops[offset+i].handle = -1; + if (use_ptemod) { + WARN_ON(map->kunmap_ops[offset+i].status && + map->kunmap_ops[offset+i].handle != -1); + pr_debug("kunmap handle=%u st=%d\n", + map->kunmap_ops[offset+i].handle, + map->kunmap_ops[offset+i].status); + map->kunmap_ops[offset+i].handle = -1; + } } /* * Decrease the live-grant counter. This must happen after the loop to From 49db6cb81400ba863e1a85e55fcdf1031807c23f Mon Sep 17 00:00:00 2001 From: "M. Vefa Bicakci" Date: Sun, 2 Oct 2022 18:20:05 -0400 Subject: [PATCH 0502/1477] xen/gntdev: Prevent leaking grants commit 0991028cd49567d7016d1b224fe0117c35059f86 upstream. Prior to this commit, if a grant mapping operation failed partially, some of the entries in the map_ops array would be invalid, whereas all of the entries in the kmap_ops array would be valid. This in turn would cause the following logic in gntdev_map_grant_pages to become invalid: for (i = 0; i < map->count; i++) { if (map->map_ops[i].status == GNTST_okay) { map->unmap_ops[i].handle = map->map_ops[i].handle; if (!use_ptemod) alloced++; } if (use_ptemod) { if (map->kmap_ops[i].status == GNTST_okay) { if (map->map_ops[i].status == GNTST_okay) alloced++; map->kunmap_ops[i].handle = map->kmap_ops[i].handle; } } } ... atomic_add(alloced, &map->live_grants); Assume that use_ptemod is true (i.e., the domain mapping the granted pages is a paravirtualized domain). In the code excerpt above, note that the "alloced" variable is only incremented when both kmap_ops[i].status and map_ops[i].status are set to GNTST_okay (i.e., both mapping operations are successful). However, as also noted above, there are cases where a grant mapping operation fails partially, breaking the assumption of the code excerpt above. The aforementioned causes map->live_grants to be incorrectly set. In some cases, all of the map_ops mappings fail, but all of the kmap_ops mappings succeed, meaning that live_grants may remain zero. This in turn makes it impossible to unmap the successfully grant-mapped pages pointed to by kmap_ops, because unmap_grant_pages has the following snippet of code at its beginning: if (atomic_read(&map->live_grants) == 0) return; /* Nothing to do */ In other cases where only some of the map_ops mappings fail but all kmap_ops mappings succeed, live_grants is made positive, but when the user requests unmapping the grant-mapped pages, __unmap_grant_pages_done will then make map->live_grants negative, because the latter function does not check if all of the pages that were requested to be unmapped were actually unmapped, and the same function unconditionally subtracts "data->count" (i.e., a value that can be greater than map->live_grants) from map->live_grants. The side effects of a negative live_grants value have not been studied. The net effect of all of this is that grant references are leaked in one of the above conditions. In Qubes OS v4.1 (which uses Xen's grant mechanism extensively for X11 GUI isolation), this issue manifests itself with warning messages like the following to be printed out by the Linux kernel in the VM that had granted pages (that contain X11 GUI window data) to dom0: "g.e. 0x1234 still pending", especially after the user rapidly resizes GUI VM windows (causing some grant-mapping operations to partially or completely fail, due to the fact that the VM unshares some of the pages as part of the window resizing, making the pages impossible to grant-map from dom0). The fix for this issue involves counting all successful map_ops and kmap_ops mappings separately, and then adding the sum to live_grants. During unmapping, only the number of successfully unmapped grants is subtracted from live_grants. The code is also modified to check for negative live_grants values after the subtraction and warn the user. Link: https://github.com/QubesOS/qubes-issues/issues/7631 Fixes: dbe97cff7dd9 ("xen/gntdev: Avoid blocking in unmap_grant_pages()") Cc: stable@vger.kernel.org Signed-off-by: M. Vefa Bicakci Acked-by: Demi Marie Obenour Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20221002222006.2077-2-m.v.b@runbox.com Signed-off-by: Juergen Gross Signed-off-by: Demi Marie Obenour Signed-off-by: Greg Kroah-Hartman --- drivers/xen/gntdev.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 1578fd2186b3..16acddaff9ae 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -360,8 +360,7 @@ int gntdev_map_grant_pages(struct gntdev_grant_map *map) for (i = 0; i < map->count; i++) { if (map->map_ops[i].status == GNTST_okay) { map->unmap_ops[i].handle = map->map_ops[i].handle; - if (!use_ptemod) - alloced++; + alloced++; } else if (!err) err = -EINVAL; @@ -370,8 +369,7 @@ int gntdev_map_grant_pages(struct gntdev_grant_map *map) if (use_ptemod) { if (map->kmap_ops[i].status == GNTST_okay) { - if (map->map_ops[i].status == GNTST_okay) - alloced++; + alloced++; map->kunmap_ops[i].handle = map->kmap_ops[i].handle; } else if (!err) err = -EINVAL; @@ -387,8 +385,14 @@ static void __unmap_grant_pages_done(int result, unsigned int i; struct gntdev_grant_map *map = data->data; unsigned int offset = data->unmap_ops - map->unmap_ops; + int successful_unmaps = 0; + int live_grants; for (i = 0; i < data->count; i++) { + if (map->unmap_ops[offset + i].status == GNTST_okay && + map->unmap_ops[offset + i].handle != -1) + successful_unmaps++; + WARN_ON(map->unmap_ops[offset+i].status && map->unmap_ops[offset+i].handle != -1); pr_debug("unmap handle=%d st=%d\n", @@ -396,6 +400,10 @@ static void __unmap_grant_pages_done(int result, map->unmap_ops[offset+i].status); map->unmap_ops[offset+i].handle = -1; if (use_ptemod) { + if (map->kunmap_ops[offset + i].status == GNTST_okay && + map->kunmap_ops[offset + i].handle != -1) + successful_unmaps++; + WARN_ON(map->kunmap_ops[offset+i].status && map->kunmap_ops[offset+i].handle != -1); pr_debug("kunmap handle=%u st=%d\n", @@ -404,11 +412,15 @@ static void __unmap_grant_pages_done(int result, map->kunmap_ops[offset+i].handle = -1; } } + /* * Decrease the live-grant counter. This must happen after the loop to * prevent premature reuse of the grants by gnttab_mmap(). */ - atomic_sub(data->count, &map->live_grants); + live_grants = atomic_sub_return(successful_unmaps, &map->live_grants); + if (WARN_ON(live_grants < 0)) + pr_err("%s: live_grants became negative (%d) after unmapping %d pages!\n", + __func__, live_grants, successful_unmaps); /* Release reference taken by __unmap_grant_pages */ gntdev_put_map(NULL, map); From 935a8b6202101d7f58fe9cd11287f9cec0d8dd32 Mon Sep 17 00:00:00 2001 From: Yuanzheng Song Date: Fri, 28 Oct 2022 03:07:05 +0000 Subject: [PATCH 0503/1477] mm/memory: add non-anonymous page check in the copy_present_page() The vma->anon_vma of the child process may be NULL because the entire vma does not contain anonymous pages. In this case, a BUG will occur when the copy_present_page() passes a copy of a non-anonymous page of that vma to the page_add_new_anon_rmap() to set up new anonymous rmap. ------------[ cut here ]------------ kernel BUG at mm/rmap.c:1044! Internal error: Oops - BUG: 0 [#1] SMP Modules linked in: CPU: 2 PID: 3617 Comm: test Not tainted 5.10.149 #1 Hardware name: linux,dummy-virt (DT) pstate: 80000005 (Nzcv daif -PAN -UAO -TCO BTYPE=--) pc : __page_set_anon_rmap+0xbc/0xf8 lr : __page_set_anon_rmap+0xbc/0xf8 sp : ffff800014c1b870 x29: ffff800014c1b870 x28: 0000000000000001 x27: 0000000010100073 x26: ffff1d65c517baa8 x25: ffff1d65cab0f000 x24: ffff1d65c416d800 x23: ffff1d65cab5f248 x22: 0000000020000000 x21: 0000000000000001 x20: 0000000000000000 x19: fffffe75970023c0 x18: 0000000000000000 x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000 x11: 0000000000000000 x10: 0000000000000000 x9 : ffffc3096d5fb858 x8 : 0000000000000000 x7 : 0000000000000011 x6 : ffff5a5c9089c000 x5 : 0000000000020000 x4 : ffff5a5c9089c000 x3 : ffffc3096d200000 x2 : ffffc3096e8d0000 x1 : ffff1d65ca3da740 x0 : 0000000000000000 Call trace: __page_set_anon_rmap+0xbc/0xf8 page_add_new_anon_rmap+0x1e0/0x390 copy_pte_range+0xd00/0x1248 copy_page_range+0x39c/0x620 dup_mmap+0x2e0/0x5a8 dup_mm+0x78/0x140 copy_process+0x918/0x1a20 kernel_clone+0xac/0x638 __do_sys_clone+0x78/0xb0 __arm64_sys_clone+0x30/0x40 el0_svc_common.constprop.0+0xb0/0x308 do_el0_svc+0x48/0xb8 el0_svc+0x24/0x38 el0_sync_handler+0x160/0x168 el0_sync+0x180/0x1c0 Code: 97f8ff85 f9400294 17ffffeb 97f8ff82 (d4210000) ---[ end trace a972347688dc9bd4 ]--- Kernel panic - not syncing: Oops - BUG: Fatal exception SMP: stopping secondary CPUs Kernel Offset: 0x43095d200000 from 0xffff800010000000 PHYS_OFFSET: 0xffffe29a80000000 CPU features: 0x08200022,61806082 Memory Limit: none ---[ end Kernel panic - not syncing: Oops - BUG: Fatal exception ]--- This problem has been fixed by the commit ("mm/rmap: split page_dup_rmap() into page_dup_file_rmap() and page_try_dup_anon_rmap()"), but still exists in the linux-5.10.y branch. This patch is not applicable to this version because of the large version differences. Therefore, fix it by adding non-anonymous page check in the copy_present_page(). Cc: stable@vger.kernel.org Fixes: 70e806e4e645 ("mm: Do early cow for pinned pages during fork() for ptes") Signed-off-by: Yuanzheng Song Acked-by: Peter Xu Signed-off-by: Greg Kroah-Hartman --- mm/memory.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/mm/memory.c b/mm/memory.c index cc50fa0f4590..cbc0a163d705 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -823,6 +823,17 @@ copy_present_page(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma if (likely(!page_maybe_dma_pinned(page))) return 1; + /* + * The vma->anon_vma of the child process may be NULL + * because the entire vma does not contain anonymous pages. + * A BUG will occur when the copy_present_page() passes + * a copy of a non-anonymous page of that vma to the + * page_add_new_anon_rmap() to set up new anonymous rmap. + * Return 1 if the page is not an anonymous page. + */ + if (!PageAnon(page)) + return 1; + new_page = *prealloc; if (!new_page) return -EAGAIN; From 568e3812b1778b4c0c229649b59977d88f400ece Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Mon, 17 Oct 2022 20:25:05 -0400 Subject: [PATCH 0504/1477] mm,hugetlb: take hugetlb_lock before decrementing h->resv_huge_pages commit 12df140f0bdfae5dcfc81800970dd7f6f632e00c upstream. The h->*_huge_pages counters are protected by the hugetlb_lock, but alloc_huge_page has a corner case where it can decrement the counter outside of the lock. This could lead to a corrupted value of h->resv_huge_pages, which we have observed on our systems. Take the hugetlb_lock before decrementing h->resv_huge_pages to avoid a potential race. Link: https://lkml.kernel.org/r/20221017202505.0e6a4fcd@imladris.surriel.com Fixes: a88c76954804 ("mm: hugetlb: fix hugepage memory leak caused by wrong reserve count") Signed-off-by: Rik van Riel Reviewed-by: Mike Kravetz Cc: Naoya Horiguchi Cc: Glen McCready Cc: Mike Kravetz Cc: Muchun Song Cc: Signed-off-by: Andrew Morton Signed-off-by: Mike Kravetz Signed-off-by: Greg Kroah-Hartman --- mm/hugetlb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index c57c165bfbbc..d8c63d79af20 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2387,11 +2387,11 @@ struct page *alloc_huge_page(struct vm_area_struct *vma, page = alloc_buddy_huge_page_with_mpol(h, vma, addr); if (!page) goto out_uncharge_cgroup; + spin_lock(&hugetlb_lock); if (!avoid_reserve && vma_has_reserves(vma, gbl_chg)) { SetPagePrivate(page); h->resv_huge_pages--; } - spin_lock(&hugetlb_lock); list_add(&page->lru, &h->hugepage_activelist); /* Fall through */ } From 9d00384270b15ed19c3987effe826b061b567171 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 19 Sep 2022 16:08:30 +0000 Subject: [PATCH 0505/1477] net: ieee802154: fix error return code in dgram_bind() commit 444d8ad4916edec8a9fc684e841287db9b1e999f upstream. Fix to return error code -EINVAL from the error handling case instead of 0, as done elsewhere in this function. Fixes: 94160108a70c ("net/ieee802154: fix uninit value bug in dgram_sendmsg") Signed-off-by: Wei Yongjun Link: https://lore.kernel.org/r/20220919160830.1436109-1-weiyongjun@huaweicloud.com Signed-off-by: Stefan Schmidt Signed-off-by: Greg Kroah-Hartman --- net/ieee802154/socket.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index ecc0d5fbde04..d4c275e56d82 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -503,8 +503,10 @@ static int dgram_bind(struct sock *sk, struct sockaddr *uaddr, int len) if (err < 0) goto out; - if (addr->family != AF_IEEE802154) + if (addr->family != AF_IEEE802154) { + err = -EINVAL; goto out; + } ieee802154_addr_from_sa(&haddr, &addr->addr); dev = ieee802154_get_dev(sock_net(sk), &haddr); From 4953a989b72d2b809b18dde7a4c2844cba4232d4 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Fri, 22 Jul 2022 09:11:31 +0200 Subject: [PATCH 0506/1477] media: v4l2: Fix v4l2_i2c_subdev_set_name function documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit bb9ea2c31fa11b789ade4c3abcdda3c5370a76ab ] The doc says the I²C device's name is used if devname is NULL, but actually the I²C device driver's name is used. Fixes: 0658293012af ("media: v4l: subdev: Add a function to set an I²C sub-device's name") Signed-off-by: Alexander Stein Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- include/media/v4l2-common.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/media/v4l2-common.h b/include/media/v4l2-common.h index a3083529b698..2e53ee1c8db4 100644 --- a/include/media/v4l2-common.h +++ b/include/media/v4l2-common.h @@ -175,7 +175,8 @@ struct v4l2_subdev *v4l2_i2c_new_subdev_board(struct v4l2_device *v4l2_dev, * * @sd: pointer to &struct v4l2_subdev * @client: pointer to struct i2c_client - * @devname: the name of the device; if NULL, the I²C device's name will be used + * @devname: the name of the device; if NULL, the I²C device drivers's name + * will be used * @postfix: sub-device specific string to put right after the I²C device name; * may be NULL */ From 6571f6ca8a218dc978dec7b6b71ad654ff79e9ad Mon Sep 17 00:00:00 2001 From: Nathan Huckleberry Date: Tue, 13 Sep 2022 13:55:48 -0700 Subject: [PATCH 0507/1477] drm/msm: Fix return type of mdp4_lvds_connector_mode_valid [ Upstream commit 0b33a33bd15d5bab73b87152b220a8d0153a4587 ] The mode_valid field in drm_connector_helper_funcs is expected to be of type: enum drm_mode_status (* mode_valid) (struct drm_connector *connector, struct drm_display_mode *mode); The mismatched return type breaks forward edge kCFI since the underlying function definition does not match the function hook definition. The return type of mdp4_lvds_connector_mode_valid should be changed from int to enum drm_mode_status. Reported-by: Dan Carpenter Link: https://github.com/ClangBuiltLinux/linux/issues/1703 Cc: llvm@lists.linux.dev Signed-off-by: Nathan Huckleberry Fixes: 3e87599b68e7 ("drm/msm/mdp4: add LVDS panel support") Reviewed-by: Abhinav Kumar Reviewed-by: Nathan Chancellor Patchwork: https://patchwork.freedesktop.org/patch/502878/ Link: https://lore.kernel.org/r/20220913205551.155128-1-nhuck@google.com Signed-off-by: Abhinav Kumar Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/disp/mdp4/mdp4_lvds_connector.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_lvds_connector.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_lvds_connector.c index 7288041dd86a..7444b75c4215 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_lvds_connector.c +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_lvds_connector.c @@ -56,8 +56,9 @@ static int mdp4_lvds_connector_get_modes(struct drm_connector *connector) return ret; } -static int mdp4_lvds_connector_mode_valid(struct drm_connector *connector, - struct drm_display_mode *mode) +static enum drm_mode_status +mdp4_lvds_connector_mode_valid(struct drm_connector *connector, + struct drm_display_mode *mode) { struct mdp4_lvds_connector *mdp4_lvds_connector = to_mdp4_lvds_connector(connector); From 648ac633e7645aca21af4f8caf0bddd5b792746d Mon Sep 17 00:00:00 2001 From: Srinivasa Rao Mandadapu Date: Thu, 13 Oct 2022 10:38:31 +0530 Subject: [PATCH 0508/1477] ASoC: qcom: lpass-cpu: mark HDMI TX registers as volatile [ Upstream commit c9a3545b1d771fb7b06a487796c40288c02c41c5 ] Update HDMI volatile registers list as DMA, Channel Selection registers, vbit control registers are being reflected by hardware DP port disconnection. This update is required to fix no display and no sound issue observed after reconnecting TAMA/SANWA DP cables. Once DP cable is unplugged, DMA control registers are being reset by hardware, however at second plugin, new dma control values does not updated to the dma hardware registers since new register value and cached values at the time of first plugin are same. Fixes: 7cb37b7bd0d3 ("ASoC: qcom: Add support for lpass hdmi driver") Signed-off-by: Srinivasa Rao Mandadapu Reported-by: Kuogee Hsieh Link: https://lore.kernel.org/r/1665637711-13300-1-git-send-email-quic_srivasam@quicinc.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/qcom/lpass-cpu.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/soc/qcom/lpass-cpu.c b/sound/soc/qcom/lpass-cpu.c index 03abb3d719d0..9aa7c445b6b0 100644 --- a/sound/soc/qcom/lpass-cpu.c +++ b/sound/soc/qcom/lpass-cpu.c @@ -745,10 +745,18 @@ static bool lpass_hdmi_regmap_volatile(struct device *dev, unsigned int reg) return true; if (reg == LPASS_HDMI_TX_LEGACY_ADDR(v)) return true; + if (reg == LPASS_HDMI_TX_VBIT_CTL_ADDR(v)) + return true; for (i = 0; i < v->hdmi_rdma_channels; ++i) { if (reg == LPAIF_HDMI_RDMACURR_REG(v, i)) return true; + if (reg == LPASS_HDMI_TX_DMA_ADDR(v, i)) + return true; + if (reg == LPASS_HDMI_TX_CH_LSB_ADDR(v, i)) + return true; + if (reg == LPASS_HDMI_TX_CH_MSB_ADDR(v, i)) + return true; } return false; } From a5275572995601d8f20277a81a0869b7276afd61 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 9 Oct 2022 19:28:46 -0700 Subject: [PATCH 0509/1477] arc: iounmap() arg is volatile [ Upstream commit c44f15c1c09481d50fd33478ebb5b8284f8f5edb ] Add 'volatile' to iounmap()'s argument to prevent build warnings. This make it the same as other major architectures. Placates these warnings: (12 such warnings) ../drivers/video/fbdev/riva/fbdev.c: In function 'rivafb_probe': ../drivers/video/fbdev/riva/fbdev.c:2067:42: error: passing argument 1 of 'iounmap' discards 'volatile' qualifier from pointer target type [-Werror=discarded-qualifiers] 2067 | iounmap(default_par->riva.PRAMIN); Fixes: 1162b0701b14b ("ARC: I/O and DMA Mappings") Signed-off-by: Randy Dunlap Cc: Vineet Gupta Cc: linux-snps-arc@lists.infradead.org Cc: Arnd Bergmann Signed-off-by: Vineet Gupta Signed-off-by: Sasha Levin --- arch/arc/include/asm/io.h | 2 +- arch/arc/mm/ioremap.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arc/include/asm/io.h b/arch/arc/include/asm/io.h index 8f777d6441a5..80347382a380 100644 --- a/arch/arc/include/asm/io.h +++ b/arch/arc/include/asm/io.h @@ -32,7 +32,7 @@ static inline void ioport_unmap(void __iomem *addr) { } -extern void iounmap(const void __iomem *addr); +extern void iounmap(const volatile void __iomem *addr); /* * io{read,write}{16,32}be() macros diff --git a/arch/arc/mm/ioremap.c b/arch/arc/mm/ioremap.c index 95c649fbc95a..d3b1ea16e9cd 100644 --- a/arch/arc/mm/ioremap.c +++ b/arch/arc/mm/ioremap.c @@ -93,7 +93,7 @@ void __iomem *ioremap_prot(phys_addr_t paddr, unsigned long size, EXPORT_SYMBOL(ioremap_prot); -void iounmap(const void __iomem *addr) +void iounmap(const volatile void __iomem *addr) { /* weird double cast to handle phys_addr_t > 32 bits */ if (arc_uncached_addr_space((phys_addr_t)(u32)addr)) From 2663b16c76d0e8a5ca20d28b5d85ac6993c3954c Mon Sep 17 00:00:00 2001 From: Srinivasa Rao Mandadapu Date: Sat, 15 Oct 2022 14:48:50 +0530 Subject: [PATCH 0510/1477] ASoC: qcom: lpass-cpu: Mark HDMI TX parity register as volatile [ Upstream commit 1dd5166102e7ca91e8c5d833110333835e147ddb ] Update LPASS_HDMI_TX_PARITY_ADDR register as volatile, to fix dp audio failures observed with some of external monitors. Fixes: 7cb37b7bd0d3 ("ASoC: qcom: Add support for lpass hdmi driver") Signed-off-by: Srinivasa Rao Mandadapu Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/1665825530-7593-1-git-send-email-quic_srivasam@quicinc.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/qcom/lpass-cpu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/qcom/lpass-cpu.c b/sound/soc/qcom/lpass-cpu.c index 9aa7c445b6b0..ecd6c049ace2 100644 --- a/sound/soc/qcom/lpass-cpu.c +++ b/sound/soc/qcom/lpass-cpu.c @@ -747,6 +747,8 @@ static bool lpass_hdmi_regmap_volatile(struct device *dev, unsigned int reg) return true; if (reg == LPASS_HDMI_TX_VBIT_CTL_ADDR(v)) return true; + if (reg == LPASS_HDMI_TX_PARITY_ADDR(v)) + return true; for (i = 0; i < v->hdmi_rdma_channels; ++i) { if (reg == LPAIF_HDMI_RDMACURR_REG(v, i)) From bfce73088682ef0770da951f51156c36a89be490 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Wed, 19 Oct 2022 17:30:25 +0800 Subject: [PATCH 0511/1477] ALSA: ac97: fix possible memory leak in snd_ac97_dev_register() [ Upstream commit 4881bda5ea05c8c240fc8afeaa928e2bc43f61fa ] If device_register() fails in snd_ac97_dev_register(), it should call put_device() to give up reference, or the name allocated in dev_set_name() is leaked. Fixes: 0ca06a00e206 ("[ALSA] AC97 bus interface for ad-hoc drivers") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221019093025.1179475-1-yangyingliang@huawei.com Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/ac97/ac97_codec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/ac97/ac97_codec.c b/sound/pci/ac97/ac97_codec.c index 963731cf0d8c..cd66632bf1c3 100644 --- a/sound/pci/ac97/ac97_codec.c +++ b/sound/pci/ac97/ac97_codec.c @@ -1946,6 +1946,7 @@ static int snd_ac97_dev_register(struct snd_device *device) snd_ac97_get_short_name(ac97)); if ((err = device_register(&ac97->dev)) < 0) { ac97_err(ac97, "Can't register ac97 bus\n"); + put_device(&ac97->dev); ac97->dev.bus = NULL; return err; } From fb94152aae8859335655b89e98685db742a2dd09 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Mon, 18 Jul 2022 17:11:19 +0300 Subject: [PATCH 0512/1477] perf/x86/intel/lbr: Use setup_clear_cpu_cap() instead of clear_cpu_cap() [ Upstream commit b329f5ddc9ce4b622d9c7aaf5c6df4de52caf91a ] clear_cpu_cap(&boot_cpu_data) is very similar to setup_clear_cpu_cap() except that the latter also sets a bit in 'cpu_caps_cleared' which later clears the same cap in secondary cpus, which is likely what is meant here. Fixes: 47125db27e47 ("perf/x86/intel/lbr: Support Architectural LBR") Signed-off-by: Maxim Levitsky Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Kan Liang Link: https://lkml.kernel.org/r/20220718141123.136106-2-mlevitsk@redhat.com Signed-off-by: Sasha Levin --- arch/x86/events/intel/lbr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 42173a7be3bb..4b6c39c5facb 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -1847,7 +1847,7 @@ void __init intel_pmu_arch_lbr_init(void) return; clear_arch_lbr: - clear_cpu_cap(&boot_cpu_data, X86_FEATURE_ARCH_LBR); + setup_clear_cpu_cap(X86_FEATURE_ARCH_LBR); } /** From 32a3d4660b34ce49ac0162338ebe362098e2f5df Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 18 Oct 2022 15:19:50 -0400 Subject: [PATCH 0513/1477] tipc: fix a null-ptr-deref in tipc_topsrv_accept [ Upstream commit 82cb4e4612c633a9ce320e1773114875604a3cce ] syzbot found a crash in tipc_topsrv_accept: KASAN: null-ptr-deref in range [0x0000000000000008-0x000000000000000f] Workqueue: tipc_rcv tipc_topsrv_accept RIP: 0010:kernel_accept+0x22d/0x350 net/socket.c:3487 Call Trace: tipc_topsrv_accept+0x197/0x280 net/tipc/topsrv.c:460 process_one_work+0x991/0x1610 kernel/workqueue.c:2289 worker_thread+0x665/0x1080 kernel/workqueue.c:2436 kthread+0x2e4/0x3a0 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:306 It was caused by srv->listener that might be set to null by tipc_topsrv_stop() in net .exit whereas it's still used in tipc_topsrv_accept() worker. srv->listener is protected by srv->idr_lock in tipc_topsrv_stop(), so add a check for srv->listener under srv->idr_lock in tipc_topsrv_accept() to avoid the null-ptr-deref. To ensure the lsock is not released during the tipc_topsrv_accept(), move sock_release() after tipc_topsrv_work_stop() where it's waiting until the tipc_topsrv_accept worker to be done. Note that sk_callback_lock is used to protect sk->sk_user_data instead of srv->listener, and it should check srv in tipc_topsrv_listener_data_ready() instead. This also ensures that no more tipc_topsrv_accept worker will be started after tipc_conn_close() is called in tipc_topsrv_stop() where it sets sk->sk_user_data to null. Fixes: 0ef897be12b8 ("tipc: separate topology server listener socket from subcsriber sockets") Reported-by: syzbot+c5ce866a8d30f4be0651@syzkaller.appspotmail.com Signed-off-by: Xin Long Acked-by: Jon Maloy Link: https://lore.kernel.org/r/4eee264380c409c61c6451af1059b7fb271a7e7b.1666120790.git.lucien.xin@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/tipc/topsrv.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c index d9e2c0fea3f2..561e709ae06a 100644 --- a/net/tipc/topsrv.c +++ b/net/tipc/topsrv.c @@ -450,12 +450,19 @@ static void tipc_conn_data_ready(struct sock *sk) static void tipc_topsrv_accept(struct work_struct *work) { struct tipc_topsrv *srv = container_of(work, struct tipc_topsrv, awork); - struct socket *lsock = srv->listener; - struct socket *newsock; + struct socket *newsock, *lsock; struct tipc_conn *con; struct sock *newsk; int ret; + spin_lock_bh(&srv->idr_lock); + if (!srv->listener) { + spin_unlock_bh(&srv->idr_lock); + return; + } + lsock = srv->listener; + spin_unlock_bh(&srv->idr_lock); + while (1) { ret = kernel_accept(lsock, &newsock, O_NONBLOCK); if (ret < 0) @@ -489,7 +496,7 @@ static void tipc_topsrv_listener_data_ready(struct sock *sk) read_lock_bh(&sk->sk_callback_lock); srv = sk->sk_user_data; - if (srv->listener) + if (srv) queue_work(srv->rcv_wq, &srv->awork); read_unlock_bh(&sk->sk_callback_lock); } @@ -699,8 +706,9 @@ static void tipc_topsrv_stop(struct net *net) __module_get(lsock->sk->sk_prot_creator->owner); srv->listener = NULL; spin_unlock_bh(&srv->idr_lock); - sock_release(lsock); + tipc_topsrv_work_stop(srv); + sock_release(lsock); idr_destroy(&srv->conn_idr); kfree(srv); } From 62f0a08e82a6312efd7df7f595c0b11d4ffde610 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Wed, 19 Oct 2022 14:41:04 +0800 Subject: [PATCH 0514/1477] net: netsec: fix error handling in netsec_register_mdio() [ Upstream commit 94423589689124e8cd145b38a1034be7f25835b2 ] If phy_device_register() fails, phy_device_free() need be called to put refcount, so memory of phy device and device name can be freed in callback function. If get_phy_device() fails, mdiobus_unregister() need be called, or it will cause warning in mdiobus_free() and kobject is leaked. Fixes: 533dd11a12f6 ("net: socionext: Add Synquacer NetSec driver") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221019064104.3228892-1-yangyingliang@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/socionext/netsec.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c index ef3634d1b9f7..b9acee214bb6 100644 --- a/drivers/net/ethernet/socionext/netsec.c +++ b/drivers/net/ethernet/socionext/netsec.c @@ -1958,11 +1958,13 @@ static int netsec_register_mdio(struct netsec_priv *priv, u32 phy_addr) ret = PTR_ERR(priv->phydev); dev_err(priv->dev, "get_phy_device err(%d)\n", ret); priv->phydev = NULL; + mdiobus_unregister(bus); return -ENODEV; } ret = phy_device_register(priv->phydev); if (ret) { + phy_device_free(priv->phydev); mdiobus_unregister(bus); dev_err(priv->dev, "phy_device_register err(%d)\n", ret); From ce605b68db5316f7def485d6c4f790b26174698b Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Wed, 19 Oct 2022 17:57:51 +0800 Subject: [PATCH 0515/1477] net: hinic: fix incorrect assignment issue in hinic_set_interrupt_cfg() [ Upstream commit c0605cd6750f2db9890c43a91ea4d77be8fb4908 ] The value of lli_credit_cnt is incorrectly assigned, fix it. Fixes: a0337c0dee68 ("hinic: add support to set and get irq coalesce") Signed-off-by: Zhengchao Shao Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c index 799b85c88eff..bcf2476512a5 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c @@ -892,7 +892,7 @@ int hinic_set_interrupt_cfg(struct hinic_hwdev *hwdev, if (err) return -EINVAL; - interrupt_info->lli_credit_cnt = temp_info.lli_timer_cnt; + interrupt_info->lli_credit_cnt = temp_info.lli_credit_cnt; interrupt_info->lli_timer_cnt = temp_info.lli_timer_cnt; err = hinic_msg_to_mgmt(&pfhwdev->pf_to_mgmt, HINIC_MOD_COMM, From bb01910763f935b16538084b4269696e0de17f79 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Wed, 19 Oct 2022 17:57:52 +0800 Subject: [PATCH 0516/1477] net: hinic: fix memory leak when reading function table [ Upstream commit 4c1f602df8956bc0decdafd7e4fc7eef50c550b1 ] When the input parameter idx meets the expected case option in hinic_dbg_get_func_table(), read_data is not released. Fix it. Fixes: 5215e16244ee ("hinic: add support to query function table") Signed-off-by: Zhengchao Shao Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../net/ethernet/huawei/hinic/hinic_debugfs.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_debugfs.c b/drivers/net/ethernet/huawei/hinic/hinic_debugfs.c index 19eb839177ec..061952c6c21a 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_debugfs.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_debugfs.c @@ -85,6 +85,7 @@ static int hinic_dbg_get_func_table(struct hinic_dev *nic_dev, int idx) struct tag_sml_funcfg_tbl *funcfg_table_elem; struct hinic_cmd_lt_rd *read_data; u16 out_size = sizeof(*read_data); + int ret = ~0; int err; read_data = kzalloc(sizeof(*read_data), GFP_KERNEL); @@ -111,20 +112,25 @@ static int hinic_dbg_get_func_table(struct hinic_dev *nic_dev, int idx) switch (idx) { case VALID: - return funcfg_table_elem->dw0.bs.valid; + ret = funcfg_table_elem->dw0.bs.valid; + break; case RX_MODE: - return funcfg_table_elem->dw0.bs.nic_rx_mode; + ret = funcfg_table_elem->dw0.bs.nic_rx_mode; + break; case MTU: - return funcfg_table_elem->dw1.bs.mtu; + ret = funcfg_table_elem->dw1.bs.mtu; + break; case RQ_DEPTH: - return funcfg_table_elem->dw13.bs.cfg_rq_depth; + ret = funcfg_table_elem->dw13.bs.cfg_rq_depth; + break; case QUEUE_NUM: - return funcfg_table_elem->dw13.bs.cfg_q_num; + ret = funcfg_table_elem->dw13.bs.cfg_q_num; + break; } kfree(read_data); - return ~0; + return ret; } static ssize_t hinic_dbg_cmd_read(struct file *filp, char __user *buffer, size_t count, From 6603843c80b16957f5d7d14d897faf13cef2b8b9 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Wed, 19 Oct 2022 17:57:53 +0800 Subject: [PATCH 0517/1477] net: hinic: fix the issue of CMDQ memory leaks [ Upstream commit 363cc87767f6ddcfb9158ad2e2afa2f8d5c4b94e ] When hinic_set_cmdq_depth() fails in hinic_init_cmdqs(), the cmdq memory is not released correctly. Fix it. Fixes: 72ef908bb3ff ("hinic: add three net_device_ops of vf") Signed-off-by: Zhengchao Shao Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c index 21b8235952d3..dff979f5d08b 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c @@ -929,7 +929,7 @@ int hinic_init_cmdqs(struct hinic_cmdqs *cmdqs, struct hinic_hwif *hwif, err_set_cmdq_depth: hinic_ceq_unregister_cb(&func_to_io->ceqs, HINIC_CEQ_CMDQ); - + free_cmdq(&cmdqs->cmdq[HINIC_CMDQ_SYNC]); err_cmdq_ctxt: hinic_wqs_cmdq_free(&cmdqs->cmdq_pages, cmdqs->saved_wqs, HINIC_MAX_CMDQ_TYPES); From 0ce1ef335300e93f7c2b80e2e9454383ba361933 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Wed, 19 Oct 2022 17:57:54 +0800 Subject: [PATCH 0518/1477] net: hinic: fix the issue of double release MBOX callback of VF [ Upstream commit 8ec2f4c6b2e11a4249bba77460f0cfe6d95a82f8 ] In hinic_vf_func_init(), if VF fails to register information with PF through the MBOX, the MBOX callback function of VF is released once. But it is released again in hinic_init_hwdev(). Remove one. Fixes: 7dd29ee12865 ("hinic: add sriov feature support") Signed-off-by: Zhengchao Shao Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/huawei/hinic/hinic_sriov.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_sriov.c b/drivers/net/ethernet/huawei/hinic/hinic_sriov.c index f8a26459ff65..4d82ebfe27f9 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_sriov.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_sriov.c @@ -1178,7 +1178,6 @@ int hinic_vf_func_init(struct hinic_hwdev *hwdev) dev_err(&hwdev->hwif->pdev->dev, "Failed to register VF, err: %d, status: 0x%x, out size: 0x%x\n", err, register_info.status, out_size); - hinic_unregister_vf_mbox_cb(hwdev, HINIC_MOD_L2NIC); return -EIO; } } else { From b55d6ea965ba1f4c16c238af49f35b26f6bfc3fe Mon Sep 17 00:00:00 2001 From: Chen Zhongjin Date: Wed, 27 Jul 2022 11:15:06 +0800 Subject: [PATCH 0519/1477] x86/unwind/orc: Fix unreliable stack dump with gcov [ Upstream commit 230db82413c091bc16acee72650f48d419cebe49 ] When a console stack dump is initiated with CONFIG_GCOV_PROFILE_ALL enabled, show_trace_log_lvl() gets out of sync with the ORC unwinder, causing the stack trace to show all text addresses as unreliable: # echo l > /proc/sysrq-trigger [ 477.521031] sysrq: Show backtrace of all active CPUs [ 477.523813] NMI backtrace for cpu 0 [ 477.524492] CPU: 0 PID: 1021 Comm: bash Not tainted 6.0.0 #65 [ 477.525295] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.0-1.fc36 04/01/2014 [ 477.526439] Call Trace: [ 477.526854] [ 477.527216] ? dump_stack_lvl+0xc7/0x114 [ 477.527801] ? dump_stack+0x13/0x1f [ 477.528331] ? nmi_cpu_backtrace.cold+0xb5/0x10d [ 477.528998] ? lapic_can_unplug_cpu+0xa0/0xa0 [ 477.529641] ? nmi_trigger_cpumask_backtrace+0x16a/0x1f0 [ 477.530393] ? arch_trigger_cpumask_backtrace+0x1d/0x30 [ 477.531136] ? sysrq_handle_showallcpus+0x1b/0x30 [ 477.531818] ? __handle_sysrq.cold+0x4e/0x1ae [ 477.532451] ? write_sysrq_trigger+0x63/0x80 [ 477.533080] ? proc_reg_write+0x92/0x110 [ 477.533663] ? vfs_write+0x174/0x530 [ 477.534265] ? handle_mm_fault+0x16f/0x500 [ 477.534940] ? ksys_write+0x7b/0x170 [ 477.535543] ? __x64_sys_write+0x1d/0x30 [ 477.536191] ? do_syscall_64+0x6b/0x100 [ 477.536809] ? entry_SYSCALL_64_after_hwframe+0x63/0xcd [ 477.537609] This happens when the compiled code for show_stack() has a single word on the stack, and doesn't use a tail call to show_stack_log_lvl(). (CONFIG_GCOV_PROFILE_ALL=y is the only known case of this.) Then the __unwind_start() skip logic hits an off-by-one bug and fails to unwind all the way to the intended starting frame. Fix it by reverting the following commit: f1d9a2abff66 ("x86/unwind/orc: Don't skip the first frame for inactive tasks") The original justification for that commit no longer exists. That original issue was later fixed in a different way, with the following commit: f2ac57a4c49d ("x86/unwind/orc: Fix inactive tasks with stack pointer in %sp on GCC 10 compiled kernels") Fixes: f1d9a2abff66 ("x86/unwind/orc: Don't skip the first frame for inactive tasks") Signed-off-by: Chen Zhongjin [jpoimboe: rewrite commit log] Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra Signed-off-by: Sasha Levin --- arch/x86/kernel/unwind_orc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index cc071c4c6524..d557a545f4bc 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -697,7 +697,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, /* Otherwise, skip ahead to the user-specified starting frame: */ while (!unwind_done(state) && (!on_stack(&state->stack_info, first_frame, sizeof(long)) || - state->sp < (unsigned long)first_frame)) + state->sp <= (unsigned long)first_frame)) unwind_next_frame(state); return; From 17350734fdca3de84c6a0da5012f341dfd2ff75f Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Thu, 20 Oct 2022 12:12:14 +0530 Subject: [PATCH 0520/1477] amd-xgbe: fix the SFP compliance codes check for DAC cables [ Upstream commit 09c5f6bf11ac98874339e55f4f5f79a9dbc9b375 ] The current XGBE code assumes that offset 6 of EEPROM SFP DAC (passive) cables is NULL. However, some cables (the 5 meter and 7 meter Molex passive cables) have non-zero data at offset 6. Fix the logic by moving the passive cable check above the active checks, so as not to be improperly identified as an active cable. This will fix the issue for any passive cable that advertises 1000Base-CX in offset 6. Fixes: abf0a1c2b26a ("amd-xgbe: Add support for SFP+ modules") Signed-off-by: Raju Rangoju Acked-by: Tom Lendacky Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c index 213769054391..21e38b720d87 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c @@ -1151,7 +1151,10 @@ static void xgbe_phy_sfp_parse_eeprom(struct xgbe_prv_data *pdata) } /* Determine the type of SFP */ - if (sfp_base[XGBE_SFP_BASE_10GBE_CC] & XGBE_SFP_BASE_10GBE_CC_SR) + if (phy_data->sfp_cable == XGBE_SFP_CABLE_PASSIVE && + xgbe_phy_sfp_bit_rate(sfp_eeprom, XGBE_SFP_SPEED_10000)) + phy_data->sfp_base = XGBE_SFP_BASE_10000_CR; + else if (sfp_base[XGBE_SFP_BASE_10GBE_CC] & XGBE_SFP_BASE_10GBE_CC_SR) phy_data->sfp_base = XGBE_SFP_BASE_10000_SR; else if (sfp_base[XGBE_SFP_BASE_10GBE_CC] & XGBE_SFP_BASE_10GBE_CC_LR) phy_data->sfp_base = XGBE_SFP_BASE_10000_LR; @@ -1167,9 +1170,6 @@ static void xgbe_phy_sfp_parse_eeprom(struct xgbe_prv_data *pdata) phy_data->sfp_base = XGBE_SFP_BASE_1000_CX; else if (sfp_base[XGBE_SFP_BASE_1GBE_CC] & XGBE_SFP_BASE_1GBE_CC_T) phy_data->sfp_base = XGBE_SFP_BASE_1000_T; - else if ((phy_data->sfp_cable == XGBE_SFP_CABLE_PASSIVE) && - xgbe_phy_sfp_bit_rate(sfp_eeprom, XGBE_SFP_SPEED_10000)) - phy_data->sfp_base = XGBE_SFP_BASE_10000_CR; switch (phy_data->sfp_base) { case XGBE_SFP_BASE_1000_T: From d7ccd49c4dd9fad87581357263b6cae8a3393a07 Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Thu, 20 Oct 2022 12:12:15 +0530 Subject: [PATCH 0521/1477] amd-xgbe: add the bit rate quirk for Molex cables [ Upstream commit 170a9e341a3b02c0b2ea0df16ef14a33a4f41de8 ] The offset 12 (bit-rate) of EEPROM SFP DAC (passive) cables is expected to be in the range 0x64 to 0x68. However, the 5 meter and 7 meter Molex passive cables have the rate ceiling 0x78 at offset 12. Add a quirk for Molex passive cables to extend the rate ceiling to 0x78. Fixes: abf0a1c2b26a ("amd-xgbe: Add support for SFP+ modules") Signed-off-by: Raju Rangoju Acked-by: Tom Lendacky Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c index 21e38b720d87..a7166cd1179f 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c @@ -239,6 +239,7 @@ enum xgbe_sfp_speed { #define XGBE_SFP_BASE_BR_1GBE_MAX 0x0d #define XGBE_SFP_BASE_BR_10GBE_MIN 0x64 #define XGBE_SFP_BASE_BR_10GBE_MAX 0x68 +#define XGBE_MOLEX_SFP_BASE_BR_10GBE_MAX 0x78 #define XGBE_SFP_BASE_CU_CABLE_LEN 18 @@ -284,6 +285,8 @@ struct xgbe_sfp_eeprom { #define XGBE_BEL_FUSE_VENDOR "BEL-FUSE " #define XGBE_BEL_FUSE_PARTNO "1GBT-SFP06 " +#define XGBE_MOLEX_VENDOR "Molex Inc. " + struct xgbe_sfp_ascii { union { char vendor[XGBE_SFP_BASE_VENDOR_NAME_LEN + 1]; @@ -834,7 +837,11 @@ static bool xgbe_phy_sfp_bit_rate(struct xgbe_sfp_eeprom *sfp_eeprom, break; case XGBE_SFP_SPEED_10000: min = XGBE_SFP_BASE_BR_10GBE_MIN; - max = XGBE_SFP_BASE_BR_10GBE_MAX; + if (memcmp(&sfp_eeprom->base[XGBE_SFP_BASE_VENDOR_NAME], + XGBE_MOLEX_VENDOR, XGBE_SFP_BASE_VENDOR_NAME_LEN) == 0) + max = XGBE_MOLEX_SFP_BASE_BR_10GBE_MAX; + else + max = XGBE_SFP_BASE_BR_10GBE_MAX; break; default: return false; From af7879529e5a859cb93a41b148d8d256046d1b28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=8D=C3=B1igo=20Huguet?= Date: Thu, 20 Oct 2022 09:53:10 +0200 Subject: [PATCH 0522/1477] atlantic: fix deadlock at aq_nic_stop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 6960d133f66ecddcd3af2b1cbd0c7dcd104268b8 ] NIC is stopped with rtnl_lock held, and during the stop it cancels the 'service_task' work and free irqs. However, if CONFIG_MACSEC is set, rtnl_lock is acquired both from aq_nic_service_task and aq_linkstate_threaded_isr. Then a deadlock happens if aq_nic_stop tries to cancel/disable them when they've already started their execution. As the deadlock is caused by rtnl_lock, it causes many other processes to stall, not only atlantic related stuff. Fix it by introducing a mutex that protects each NIC's macsec related data, and locking it instead of the rtnl_lock from the service task and the threaded IRQ. Before this patch, all macsec data was protected with rtnl_lock, but maybe not all of it needs to be protected. With this new mutex, further efforts can be made to limit the protected data only to that which requires it. However, probably it doesn't worth it because all macsec's data accesses are infrequent, and almost all are done from macsec_ops or ethtool callbacks, called holding rtnl_lock, so macsec_mutex won't never be much contended. The issue appeared repeteadly attaching and deattaching the NIC to a bond interface. Doing that after this patch I cannot reproduce the bug. Fixes: 62c1c2e606f6 ("net: atlantic: MACSec offload skeleton") Reported-by: Li Liang Suggested-by: Andrew Lunn Signed-off-by: Íñigo Huguet Reviewed-by: Igor Russkikh Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../ethernet/aquantia/atlantic/aq_macsec.c | 96 ++++++++++++++----- .../net/ethernet/aquantia/atlantic/aq_nic.h | 2 + 2 files changed, 74 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c b/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c index 4a6dfac857ca..7c6e0811f2e6 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c @@ -1451,26 +1451,57 @@ static void aq_check_txsa_expiration(struct aq_nic_s *nic) egress_sa_threshold_expired); } +#define AQ_LOCKED_MDO_DEF(mdo) \ +static int aq_locked_mdo_##mdo(struct macsec_context *ctx) \ +{ \ + struct aq_nic_s *nic = netdev_priv(ctx->netdev); \ + int ret; \ + mutex_lock(&nic->macsec_mutex); \ + ret = aq_mdo_##mdo(ctx); \ + mutex_unlock(&nic->macsec_mutex); \ + return ret; \ +} + +AQ_LOCKED_MDO_DEF(dev_open) +AQ_LOCKED_MDO_DEF(dev_stop) +AQ_LOCKED_MDO_DEF(add_secy) +AQ_LOCKED_MDO_DEF(upd_secy) +AQ_LOCKED_MDO_DEF(del_secy) +AQ_LOCKED_MDO_DEF(add_rxsc) +AQ_LOCKED_MDO_DEF(upd_rxsc) +AQ_LOCKED_MDO_DEF(del_rxsc) +AQ_LOCKED_MDO_DEF(add_rxsa) +AQ_LOCKED_MDO_DEF(upd_rxsa) +AQ_LOCKED_MDO_DEF(del_rxsa) +AQ_LOCKED_MDO_DEF(add_txsa) +AQ_LOCKED_MDO_DEF(upd_txsa) +AQ_LOCKED_MDO_DEF(del_txsa) +AQ_LOCKED_MDO_DEF(get_dev_stats) +AQ_LOCKED_MDO_DEF(get_tx_sc_stats) +AQ_LOCKED_MDO_DEF(get_tx_sa_stats) +AQ_LOCKED_MDO_DEF(get_rx_sc_stats) +AQ_LOCKED_MDO_DEF(get_rx_sa_stats) + const struct macsec_ops aq_macsec_ops = { - .mdo_dev_open = aq_mdo_dev_open, - .mdo_dev_stop = aq_mdo_dev_stop, - .mdo_add_secy = aq_mdo_add_secy, - .mdo_upd_secy = aq_mdo_upd_secy, - .mdo_del_secy = aq_mdo_del_secy, - .mdo_add_rxsc = aq_mdo_add_rxsc, - .mdo_upd_rxsc = aq_mdo_upd_rxsc, - .mdo_del_rxsc = aq_mdo_del_rxsc, - .mdo_add_rxsa = aq_mdo_add_rxsa, - .mdo_upd_rxsa = aq_mdo_upd_rxsa, - .mdo_del_rxsa = aq_mdo_del_rxsa, - .mdo_add_txsa = aq_mdo_add_txsa, - .mdo_upd_txsa = aq_mdo_upd_txsa, - .mdo_del_txsa = aq_mdo_del_txsa, - .mdo_get_dev_stats = aq_mdo_get_dev_stats, - .mdo_get_tx_sc_stats = aq_mdo_get_tx_sc_stats, - .mdo_get_tx_sa_stats = aq_mdo_get_tx_sa_stats, - .mdo_get_rx_sc_stats = aq_mdo_get_rx_sc_stats, - .mdo_get_rx_sa_stats = aq_mdo_get_rx_sa_stats, + .mdo_dev_open = aq_locked_mdo_dev_open, + .mdo_dev_stop = aq_locked_mdo_dev_stop, + .mdo_add_secy = aq_locked_mdo_add_secy, + .mdo_upd_secy = aq_locked_mdo_upd_secy, + .mdo_del_secy = aq_locked_mdo_del_secy, + .mdo_add_rxsc = aq_locked_mdo_add_rxsc, + .mdo_upd_rxsc = aq_locked_mdo_upd_rxsc, + .mdo_del_rxsc = aq_locked_mdo_del_rxsc, + .mdo_add_rxsa = aq_locked_mdo_add_rxsa, + .mdo_upd_rxsa = aq_locked_mdo_upd_rxsa, + .mdo_del_rxsa = aq_locked_mdo_del_rxsa, + .mdo_add_txsa = aq_locked_mdo_add_txsa, + .mdo_upd_txsa = aq_locked_mdo_upd_txsa, + .mdo_del_txsa = aq_locked_mdo_del_txsa, + .mdo_get_dev_stats = aq_locked_mdo_get_dev_stats, + .mdo_get_tx_sc_stats = aq_locked_mdo_get_tx_sc_stats, + .mdo_get_tx_sa_stats = aq_locked_mdo_get_tx_sa_stats, + .mdo_get_rx_sc_stats = aq_locked_mdo_get_rx_sc_stats, + .mdo_get_rx_sa_stats = aq_locked_mdo_get_rx_sa_stats, }; int aq_macsec_init(struct aq_nic_s *nic) @@ -1492,6 +1523,7 @@ int aq_macsec_init(struct aq_nic_s *nic) nic->ndev->features |= NETIF_F_HW_MACSEC; nic->ndev->macsec_ops = &aq_macsec_ops; + mutex_init(&nic->macsec_mutex); return 0; } @@ -1515,7 +1547,7 @@ int aq_macsec_enable(struct aq_nic_s *nic) if (!nic->macsec_cfg) return 0; - rtnl_lock(); + mutex_lock(&nic->macsec_mutex); if (nic->aq_fw_ops->send_macsec_req) { struct macsec_cfg_request cfg = { 0 }; @@ -1564,7 +1596,7 @@ int aq_macsec_enable(struct aq_nic_s *nic) ret = aq_apply_macsec_cfg(nic); unlock: - rtnl_unlock(); + mutex_unlock(&nic->macsec_mutex); return ret; } @@ -1576,9 +1608,9 @@ void aq_macsec_work(struct aq_nic_s *nic) if (!netif_carrier_ok(nic->ndev)) return; - rtnl_lock(); + mutex_lock(&nic->macsec_mutex); aq_check_txsa_expiration(nic); - rtnl_unlock(); + mutex_unlock(&nic->macsec_mutex); } int aq_macsec_rx_sa_cnt(struct aq_nic_s *nic) @@ -1589,21 +1621,30 @@ int aq_macsec_rx_sa_cnt(struct aq_nic_s *nic) if (!cfg) return 0; + mutex_lock(&nic->macsec_mutex); + for (i = 0; i < AQ_MACSEC_MAX_SC; i++) { if (!test_bit(i, &cfg->rxsc_idx_busy)) continue; cnt += hweight_long(cfg->aq_rxsc[i].rx_sa_idx_busy); } + mutex_unlock(&nic->macsec_mutex); return cnt; } int aq_macsec_tx_sc_cnt(struct aq_nic_s *nic) { + int cnt; + if (!nic->macsec_cfg) return 0; - return hweight_long(nic->macsec_cfg->txsc_idx_busy); + mutex_lock(&nic->macsec_mutex); + cnt = hweight_long(nic->macsec_cfg->txsc_idx_busy); + mutex_unlock(&nic->macsec_mutex); + + return cnt; } int aq_macsec_tx_sa_cnt(struct aq_nic_s *nic) @@ -1614,12 +1655,15 @@ int aq_macsec_tx_sa_cnt(struct aq_nic_s *nic) if (!cfg) return 0; + mutex_lock(&nic->macsec_mutex); + for (i = 0; i < AQ_MACSEC_MAX_SC; i++) { if (!test_bit(i, &cfg->txsc_idx_busy)) continue; cnt += hweight_long(cfg->aq_txsc[i].tx_sa_idx_busy); } + mutex_unlock(&nic->macsec_mutex); return cnt; } @@ -1691,6 +1735,8 @@ u64 *aq_macsec_get_stats(struct aq_nic_s *nic, u64 *data) if (!cfg) return data; + mutex_lock(&nic->macsec_mutex); + aq_macsec_update_stats(nic); common_stats = &cfg->stats; @@ -1773,5 +1819,7 @@ u64 *aq_macsec_get_stats(struct aq_nic_s *nic, u64 *data) data += i; + mutex_unlock(&nic->macsec_mutex); + return data; } diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h index 926cca9a0c83..6da3efa289a3 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h @@ -152,6 +152,8 @@ struct aq_nic_s { struct mutex fwreq_mutex; #if IS_ENABLED(CONFIG_MACSEC) struct aq_macsec_cfg *macsec_cfg; + /* mutex to protect data in macsec_cfg */ + struct mutex macsec_mutex; #endif /* PTP support */ struct aq_ptp_s *aq_ptp; From c325f92d8d9b223d5842609ca067e898e9d34566 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 20 Oct 2022 22:45:11 +0000 Subject: [PATCH 0523/1477] kcm: annotate data-races around kcm->rx_psock [ Upstream commit 15e4dabda11b0fa31d510a915d1a580f47dfc92e ] kcm->rx_psock can be read locklessly in kcm_rfree(). Annotate the read and writes accordingly. We do the same for kcm->rx_wait in the following patch. syzbot reported: BUG: KCSAN: data-race in kcm_rfree / unreserve_rx_kcm write to 0xffff888123d827b8 of 8 bytes by task 2758 on cpu 1: unreserve_rx_kcm+0x72/0x1f0 net/kcm/kcmsock.c:313 kcm_rcv_strparser+0x2b5/0x3a0 net/kcm/kcmsock.c:373 __strp_recv+0x64c/0xd20 net/strparser/strparser.c:301 strp_recv+0x6d/0x80 net/strparser/strparser.c:335 tcp_read_sock+0x13e/0x5a0 net/ipv4/tcp.c:1703 strp_read_sock net/strparser/strparser.c:358 [inline] do_strp_work net/strparser/strparser.c:406 [inline] strp_work+0xe8/0x180 net/strparser/strparser.c:415 process_one_work+0x3d3/0x720 kernel/workqueue.c:2289 worker_thread+0x618/0xa70 kernel/workqueue.c:2436 kthread+0x1a9/0x1e0 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:306 read to 0xffff888123d827b8 of 8 bytes by task 5859 on cpu 0: kcm_rfree+0x14c/0x220 net/kcm/kcmsock.c:181 skb_release_head_state+0x8e/0x160 net/core/skbuff.c:841 skb_release_all net/core/skbuff.c:852 [inline] __kfree_skb net/core/skbuff.c:868 [inline] kfree_skb_reason+0x5c/0x260 net/core/skbuff.c:891 kfree_skb include/linux/skbuff.h:1216 [inline] kcm_recvmsg+0x226/0x2b0 net/kcm/kcmsock.c:1161 ____sys_recvmsg+0x16c/0x2e0 ___sys_recvmsg net/socket.c:2743 [inline] do_recvmmsg+0x2f1/0x710 net/socket.c:2837 __sys_recvmmsg net/socket.c:2916 [inline] __do_sys_recvmmsg net/socket.c:2939 [inline] __se_sys_recvmmsg net/socket.c:2932 [inline] __x64_sys_recvmmsg+0xde/0x160 net/socket.c:2932 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x2b/0x70 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0xffff88812971ce00 -> 0x0000000000000000 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 5859 Comm: syz-executor.3 Not tainted 6.0.0-syzkaller-12189-g19d17ab7c68b-dirty #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/22/2022 Fixes: ab7ac4eb9832 ("kcm: Kernel Connection Multiplexor module") Reported-by: syzbot Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/kcm/kcmsock.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 18469f1f707e..a1dc2b74b52f 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -177,7 +177,7 @@ static void kcm_rfree(struct sk_buff *skb) /* For reading rx_wait and rx_psock without holding lock */ smp_mb__after_atomic(); - if (!kcm->rx_wait && !kcm->rx_psock && + if (!kcm->rx_wait && !READ_ONCE(kcm->rx_psock) && sk_rmem_alloc_get(sk) < sk->sk_rcvlowat) { spin_lock_bh(&mux->rx_lock); kcm_rcv_ready(kcm); @@ -282,7 +282,8 @@ static struct kcm_sock *reserve_rx_kcm(struct kcm_psock *psock, kcm->rx_wait = false; psock->rx_kcm = kcm; - kcm->rx_psock = psock; + /* paired with lockless reads in kcm_rfree() */ + WRITE_ONCE(kcm->rx_psock, psock); spin_unlock_bh(&mux->rx_lock); @@ -309,7 +310,8 @@ static void unreserve_rx_kcm(struct kcm_psock *psock, spin_lock_bh(&mux->rx_lock); psock->rx_kcm = NULL; - kcm->rx_psock = NULL; + /* paired with lockless reads in kcm_rfree() */ + WRITE_ONCE(kcm->rx_psock, NULL); /* Commit kcm->rx_psock before sk_rmem_alloc_get to sync with * kcm_rfree From e2a28807b1ceaa309164b92c38d73d12feea33df Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 20 Oct 2022 22:45:12 +0000 Subject: [PATCH 0524/1477] kcm: annotate data-races around kcm->rx_wait [ Upstream commit 0c745b5141a45a076f1cb9772a399f7ebcb0948a ] kcm->rx_psock can be read locklessly in kcm_rfree(). Annotate the read and writes accordingly. syzbot reported: BUG: KCSAN: data-race in kcm_rcv_strparser / kcm_rfree write to 0xffff88810784e3d0 of 1 bytes by task 1823 on cpu 1: reserve_rx_kcm net/kcm/kcmsock.c:283 [inline] kcm_rcv_strparser+0x250/0x3a0 net/kcm/kcmsock.c:363 __strp_recv+0x64c/0xd20 net/strparser/strparser.c:301 strp_recv+0x6d/0x80 net/strparser/strparser.c:335 tcp_read_sock+0x13e/0x5a0 net/ipv4/tcp.c:1703 strp_read_sock net/strparser/strparser.c:358 [inline] do_strp_work net/strparser/strparser.c:406 [inline] strp_work+0xe8/0x180 net/strparser/strparser.c:415 process_one_work+0x3d3/0x720 kernel/workqueue.c:2289 worker_thread+0x618/0xa70 kernel/workqueue.c:2436 kthread+0x1a9/0x1e0 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:306 read to 0xffff88810784e3d0 of 1 bytes by task 17869 on cpu 0: kcm_rfree+0x121/0x220 net/kcm/kcmsock.c:181 skb_release_head_state+0x8e/0x160 net/core/skbuff.c:841 skb_release_all net/core/skbuff.c:852 [inline] __kfree_skb net/core/skbuff.c:868 [inline] kfree_skb_reason+0x5c/0x260 net/core/skbuff.c:891 kfree_skb include/linux/skbuff.h:1216 [inline] kcm_recvmsg+0x226/0x2b0 net/kcm/kcmsock.c:1161 ____sys_recvmsg+0x16c/0x2e0 ___sys_recvmsg net/socket.c:2743 [inline] do_recvmmsg+0x2f1/0x710 net/socket.c:2837 __sys_recvmmsg net/socket.c:2916 [inline] __do_sys_recvmmsg net/socket.c:2939 [inline] __se_sys_recvmmsg net/socket.c:2932 [inline] __x64_sys_recvmmsg+0xde/0x160 net/socket.c:2932 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x2b/0x70 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x01 -> 0x00 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 17869 Comm: syz-executor.2 Not tainted 6.1.0-rc1-syzkaller-00010-gbb1a1146467a-dirty #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/22/2022 Fixes: ab7ac4eb9832 ("kcm: Kernel Connection Multiplexor module") Reported-by: syzbot Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/kcm/kcmsock.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index a1dc2b74b52f..6b362b362f79 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -161,7 +161,8 @@ static void kcm_rcv_ready(struct kcm_sock *kcm) /* Buffer limit is okay now, add to ready list */ list_add_tail(&kcm->wait_rx_list, &kcm->mux->kcm_rx_waiters); - kcm->rx_wait = true; + /* paired with lockless reads in kcm_rfree() */ + WRITE_ONCE(kcm->rx_wait, true); } static void kcm_rfree(struct sk_buff *skb) @@ -177,7 +178,7 @@ static void kcm_rfree(struct sk_buff *skb) /* For reading rx_wait and rx_psock without holding lock */ smp_mb__after_atomic(); - if (!kcm->rx_wait && !READ_ONCE(kcm->rx_psock) && + if (!READ_ONCE(kcm->rx_wait) && !READ_ONCE(kcm->rx_psock) && sk_rmem_alloc_get(sk) < sk->sk_rcvlowat) { spin_lock_bh(&mux->rx_lock); kcm_rcv_ready(kcm); @@ -236,7 +237,8 @@ try_again: if (kcm_queue_rcv_skb(&kcm->sk, skb)) { /* Should mean socket buffer full */ list_del(&kcm->wait_rx_list); - kcm->rx_wait = false; + /* paired with lockless reads in kcm_rfree() */ + WRITE_ONCE(kcm->rx_wait, false); /* Commit rx_wait to read in kcm_free */ smp_wmb(); @@ -279,7 +281,8 @@ static struct kcm_sock *reserve_rx_kcm(struct kcm_psock *psock, kcm = list_first_entry(&mux->kcm_rx_waiters, struct kcm_sock, wait_rx_list); list_del(&kcm->wait_rx_list); - kcm->rx_wait = false; + /* paired with lockless reads in kcm_rfree() */ + WRITE_ONCE(kcm->rx_wait, false); psock->rx_kcm = kcm; /* paired with lockless reads in kcm_rfree() */ @@ -1241,7 +1244,8 @@ static void kcm_recv_disable(struct kcm_sock *kcm) if (!kcm->rx_psock) { if (kcm->rx_wait) { list_del(&kcm->wait_rx_list); - kcm->rx_wait = false; + /* paired with lockless reads in kcm_rfree() */ + WRITE_ONCE(kcm->rx_wait, false); } requeue_rx_msgs(mux, &kcm->sk.sk_receive_queue); @@ -1794,7 +1798,8 @@ static void kcm_done(struct kcm_sock *kcm) if (kcm->rx_wait) { list_del(&kcm->wait_rx_list); - kcm->rx_wait = false; + /* paired with lockless reads in kcm_rfree() */ + WRITE_ONCE(kcm->rx_wait, false); } /* Move any pending receive messages to other kcm sockets */ requeue_rx_msgs(mux, &sk->sk_receive_queue); From c3edc6e808209aa705185f732e682a370981ced1 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Thu, 20 Oct 2022 10:42:13 +0800 Subject: [PATCH 0525/1477] net: fix UAF issue in nfqnl_nf_hook_drop() when ops_init() failed [ Upstream commit d266935ac43d57586e311a087510fe6a084af742 ] When the ops_init() interface is invoked to initialize the net, but ops->init() fails, data is released. However, the ptr pointer in net->gen is invalid. In this case, when nfqnl_nf_hook_drop() is invoked to release the net, invalid address access occurs. The process is as follows: setup_net() ops_init() data = kzalloc(...) ---> alloc "data" net_assign_generic() ---> assign "date" to ptr in net->gen ... ops->init() ---> failed ... kfree(data); ---> ptr in net->gen is invalid ... ops_exit_list() ... nfqnl_nf_hook_drop() *q = nfnl_queue_pernet(net) ---> q is invalid The following is the Call Trace information: BUG: KASAN: use-after-free in nfqnl_nf_hook_drop+0x264/0x280 Read of size 8 at addr ffff88810396b240 by task ip/15855 Call Trace: dump_stack_lvl+0x8e/0xd1 print_report+0x155/0x454 kasan_report+0xba/0x1f0 nfqnl_nf_hook_drop+0x264/0x280 nf_queue_nf_hook_drop+0x8b/0x1b0 __nf_unregister_net_hook+0x1ae/0x5a0 nf_unregister_net_hooks+0xde/0x130 ops_exit_list+0xb0/0x170 setup_net+0x7ac/0xbd0 copy_net_ns+0x2e6/0x6b0 create_new_namespaces+0x382/0xa50 unshare_nsproxy_namespaces+0xa6/0x1c0 ksys_unshare+0x3a4/0x7e0 __x64_sys_unshare+0x2d/0x40 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 Allocated by task 15855: kasan_save_stack+0x1e/0x40 kasan_set_track+0x21/0x30 __kasan_kmalloc+0xa1/0xb0 __kmalloc+0x49/0xb0 ops_init+0xe7/0x410 setup_net+0x5aa/0xbd0 copy_net_ns+0x2e6/0x6b0 create_new_namespaces+0x382/0xa50 unshare_nsproxy_namespaces+0xa6/0x1c0 ksys_unshare+0x3a4/0x7e0 __x64_sys_unshare+0x2d/0x40 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 Freed by task 15855: kasan_save_stack+0x1e/0x40 kasan_set_track+0x21/0x30 kasan_save_free_info+0x2a/0x40 ____kasan_slab_free+0x155/0x1b0 slab_free_freelist_hook+0x11b/0x220 __kmem_cache_free+0xa4/0x360 ops_init+0xb9/0x410 setup_net+0x5aa/0xbd0 copy_net_ns+0x2e6/0x6b0 create_new_namespaces+0x382/0xa50 unshare_nsproxy_namespaces+0xa6/0x1c0 ksys_unshare+0x3a4/0x7e0 __x64_sys_unshare+0x2d/0x40 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 Fixes: f875bae06533 ("net: Automatically allocate per namespace data.") Signed-off-by: Zhengchao Shao Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/core/net_namespace.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index cbff7d94b993..a3b7d965e9c0 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -135,6 +135,7 @@ static int net_assign_generic(struct net *net, unsigned int id, void *data) static int ops_init(const struct pernet_operations *ops, struct net *net) { + struct net_generic *ng; int err = -ENOMEM; void *data = NULL; @@ -153,7 +154,13 @@ static int ops_init(const struct pernet_operations *ops, struct net *net) if (!err) return 0; + if (ops->id && ops->size) { cleanup: + ng = rcu_dereference_protected(net->gen, + lockdep_is_held(&pernet_ops_rwsem)); + ng->ptr[*ops->id] = NULL; + } + kfree(data); out: From aab883bd60bcd682b1ccd0bf94989fbabf2f9ad5 Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Fri, 21 Oct 2022 09:32:24 +0800 Subject: [PATCH 0526/1477] net: lantiq_etop: don't free skb when returning NETDEV_TX_BUSY [ Upstream commit 9c1eaa27ec599fcc25ed4970c0b73c247d147a2b ] The ndo_start_xmit() method must not free skb when returning NETDEV_TX_BUSY, since caller is going to requeue freed skb. Fixes: 504d4721ee8e ("MIPS: Lantiq: Add ethernet driver") Signed-off-by: Zhang Changzhong Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/lantiq_etop.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c index 2d0c52f7106b..5ea626b1e578 100644 --- a/drivers/net/ethernet/lantiq_etop.c +++ b/drivers/net/ethernet/lantiq_etop.c @@ -466,7 +466,6 @@ ltq_etop_tx(struct sk_buff *skb, struct net_device *dev) len = skb->len < ETH_ZLEN ? ETH_ZLEN : skb->len; if ((desc->ctl & (LTQ_DMA_OWN | LTQ_DMA_C)) || ch->skb[ch->dma.desc]) { - dev_kfree_skb_any(skb); netdev_err(dev, "tx ring full\n"); netif_tx_stop_queue(txq); return NETDEV_TX_BUSY; From 49713d7c38588311815889cb8c766591255ec836 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Nov 2021 11:02:30 -0800 Subject: [PATCH 0527/1477] tcp: minor optimization in tcp_add_backlog() [ Upstream commit d519f350967a60b85a574ad8aeac43f2b4384746 ] If packet is going to be coalesced, sk_sndbuf/sk_rcvbuf values are not used. Defer their access to the point we need them. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Stable-dep-of: ec791d8149ff ("tcp: fix a signed-integer-overflow bug in tcp_add_backlog()") Signed-off-by: Sasha Levin --- net/ipv4/tcp_ipv4.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 5c1e6b0687e2..78cef6930484 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1770,8 +1770,7 @@ int tcp_v4_early_demux(struct sk_buff *skb) bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) { - u32 limit = READ_ONCE(sk->sk_rcvbuf) + READ_ONCE(sk->sk_sndbuf); - u32 tail_gso_size, tail_gso_segs; + u32 limit, tail_gso_size, tail_gso_segs; struct skb_shared_info *shinfo; const struct tcphdr *th; struct tcphdr *thtail; @@ -1878,7 +1877,7 @@ no_coalesce: * to reduce memory overhead, so add a little headroom here. * Few sockets backlog are possibly concurrently non empty. */ - limit += 64*1024; + limit = READ_ONCE(sk->sk_rcvbuf) + READ_ONCE(sk->sk_sndbuf) + 64*1024; if (unlikely(sk_add_backlog(sk, skb, limit))) { bh_unlock_sock(sk); From 4f23cb2be530785db284a685d1b1c30224d8a538 Mon Sep 17 00:00:00 2001 From: Lu Wei Date: Fri, 21 Oct 2022 12:06:22 +0800 Subject: [PATCH 0528/1477] tcp: fix a signed-integer-overflow bug in tcp_add_backlog() [ Upstream commit ec791d8149ff60c40ad2074af3b92a39c916a03f ] The type of sk_rcvbuf and sk_sndbuf in struct sock is int, and in tcp_add_backlog(), the variable limit is caculated by adding sk_rcvbuf, sk_sndbuf and 64 * 1024, it may exceed the max value of int and overflow. This patch reduces the limit budget by halving the sndbuf to solve this issue since ACK packets are much smaller than the payload. Fixes: c9c3321257e1 ("tcp: add tcp_add_backlog()") Signed-off-by: Lu Wei Reviewed-by: Eric Dumazet Acked-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/tcp_ipv4.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 78cef6930484..31a8009f74ee 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1873,11 +1873,13 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) __skb_push(skb, hdrlen); no_coalesce: + limit = (u32)READ_ONCE(sk->sk_rcvbuf) + (u32)(READ_ONCE(sk->sk_sndbuf) >> 1); + /* Only socket owner can try to collapse/prune rx queues * to reduce memory overhead, so add a little headroom here. * Few sockets backlog are possibly concurrently non empty. */ - limit = READ_ONCE(sk->sk_rcvbuf) + READ_ONCE(sk->sk_sndbuf) + 64*1024; + limit += 64 * 1024; if (unlikely(sk_add_backlog(sk, skb, limit))) { bh_unlock_sock(sk); From 1634d5d39cfdf10c79a44e5a75cf47dd266ca704 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Fri, 21 Oct 2022 17:08:21 +0000 Subject: [PATCH 0529/1477] tcp: fix indefinite deferral of RTO with SACK reneging [ Upstream commit 3d2af9cce3133b3bc596a9d065c6f9d93419ccfb ] This commit fixes a bug that can cause a TCP data sender to repeatedly defer RTOs when encountering SACK reneging. The bug is that when we're in fast recovery in a scenario with SACK reneging, every time we get an ACK we call tcp_check_sack_reneging() and it can note the apparent SACK reneging and rearm the RTO timer for srtt/2 into the future. In some SACK reneging scenarios that can happen repeatedly until the receive window fills up, at which point the sender can't send any more, the ACKs stop arriving, and the RTO fires at srtt/2 after the last ACK. But that can take far too long (O(10 secs)), since the connection is stuck in fast recovery with a low cwnd that cannot grow beyond ssthresh, even if more bandwidth is available. This fix changes the logic in tcp_check_sack_reneging() to only rearm the RTO timer if data is cumulatively ACKed, indicating forward progress. This avoids this kind of nearly infinite loop of RTO timer re-arming. In addition, this meets the goals of tcp_check_sack_reneging() in handling Windows TCP behavior that looks temporarily like SACK reneging but is not really. Many thanks to Jakub Kicinski and Neil Spring, who reported this issue and provided critical packet traces that enabled root-causing this issue. Also, many thanks to Jakub Kicinski for testing this fix. Fixes: 5ae344c949e7 ("tcp: reduce spurious retransmits due to transient SACK reneging") Reported-by: Jakub Kicinski Reported-by: Neil Spring Signed-off-by: Neal Cardwell Reviewed-by: Eric Dumazet Cc: Yuchung Cheng Tested-by: Jakub Kicinski Link: https://lore.kernel.org/r/20221021170821.1093930-1-ncardwell.kernel@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv4/tcp_input.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 377cba9b124d..541758cd0b81 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2175,7 +2175,8 @@ void tcp_enter_loss(struct sock *sk) */ static bool tcp_check_sack_reneging(struct sock *sk, int flag) { - if (flag & FLAG_SACK_RENEGING) { + if (flag & FLAG_SACK_RENEGING && + flag & FLAG_SND_UNA_ADVANCED) { struct tcp_sock *tp = tcp_sk(sk); unsigned long delay = max(usecs_to_jiffies(tp->srtt_us >> 4), msecs_to_jiffies(10)); From 6b2d07fc0b0a3f99d1ef918d42bd10ca3c497a54 Mon Sep 17 00:00:00 2001 From: Dongliang Mu Date: Mon, 24 Oct 2022 19:48:07 +0800 Subject: [PATCH 0530/1477] can: mscan: mpc5xxx: mpc5xxx_can_probe(): add missing put_clock() in error path [ Upstream commit 3e5b3418827cefb5e1cc658806f02965791b8f07 ] The commit 1149108e2fbf ("can: mscan: improve clock API use") only adds put_clock() in mpc5xxx_can_remove() function, forgetting to add put_clock() in the error handling code. Fix this bug by adding put_clock() in the error handling code. Fixes: 1149108e2fbf ("can: mscan: improve clock API use") Signed-off-by: Dongliang Mu Link: https://lore.kernel.org/all/20221024133828.35881-1-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- drivers/net/can/mscan/mpc5xxx_can.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/can/mscan/mpc5xxx_can.c b/drivers/net/can/mscan/mpc5xxx_can.c index e254e04ae257..ef649764f9b4 100644 --- a/drivers/net/can/mscan/mpc5xxx_can.c +++ b/drivers/net/can/mscan/mpc5xxx_can.c @@ -325,14 +325,14 @@ static int mpc5xxx_can_probe(struct platform_device *ofdev) &mscan_clksrc); if (!priv->can.clock.freq) { dev_err(&ofdev->dev, "couldn't get MSCAN clock properties\n"); - goto exit_free_mscan; + goto exit_put_clock; } err = register_mscandev(dev, mscan_clksrc); if (err) { dev_err(&ofdev->dev, "registering %s failed (err=%d)\n", DRV_NAME, err); - goto exit_free_mscan; + goto exit_put_clock; } dev_info(&ofdev->dev, "MSCAN at 0x%p, irq %d, clock %d Hz\n", @@ -340,7 +340,9 @@ static int mpc5xxx_can_probe(struct platform_device *ofdev) return 0; -exit_free_mscan: +exit_put_clock: + if (data->put_clock) + data->put_clock(ofdev); free_candev(dev); exit_dispose_irq: irq_dispose_mapping(irq); From 0eb19ecbd0a97304e8fa400c34c9e076ac35661f Mon Sep 17 00:00:00 2001 From: Dongliang Mu Date: Mon, 24 Oct 2022 17:02:52 +0800 Subject: [PATCH 0531/1477] can: mcp251x: mcp251x_can_probe(): add missing unregister_candev() in error path [ Upstream commit b1a09b63684cea56774786ca14c13b7041ffee63 ] In mcp251x_can_probe(), if mcp251x_gpio_setup() fails, it forgets to unregister the CAN device. Fix this by unregistering can device in mcp251x_can_probe(). Fixes: 2d52dabbef60 ("can: mcp251x: add GPIO support") Signed-off-by: Dongliang Mu Link: https://lore.kernel.org/all/20221024090256.717236-1-dzm91@hust.edu.cn [mkl: adjust label] Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- drivers/net/can/spi/mcp251x.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/spi/mcp251x.c b/drivers/net/can/spi/mcp251x.c index 5dde3c42d241..ffcb04aac972 100644 --- a/drivers/net/can/spi/mcp251x.c +++ b/drivers/net/can/spi/mcp251x.c @@ -1419,11 +1419,14 @@ static int mcp251x_can_probe(struct spi_device *spi) ret = mcp251x_gpio_setup(priv); if (ret) - goto error_probe; + goto out_unregister_candev; netdev_info(net, "MCP%x successfully initialized.\n", priv->model); return 0; +out_unregister_candev: + unregister_candev(net); + error_probe: destroy_workqueue(priv->wq); priv->wq = NULL; From 3221c2701d191f2ba6442c933154fa7e49b028da Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 12 Oct 2022 22:50:17 -0500 Subject: [PATCH 0532/1477] PM: hibernate: Allow hybrid sleep to work with s2idle [ Upstream commit 85850af4fc47132f3f2f0dd698b90f67906600b4 ] Hybrid sleep is currently hardcoded to only operate with S3 even on systems that might not support it. Instead of assuming this mode is what the user wants to use, for hybrid sleep follow the setting of `mem_sleep_current` which will respect mem_sleep_default kernel command line and policy decisions made by the presence of the FADT low power idle bit. Fixes: 81d45bdf8913 ("PM / hibernate: Untangle power_down()") Reported-and-tested-by: kolAflash Link: https://bugzilla.kernel.org/show_bug.cgi?id=216574 Signed-off-by: Mario Limonciello Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- kernel/power/hibernate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 522cb1387462..59a1b126c369 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -637,7 +637,7 @@ static void power_down(void) int error; if (hibernation_mode == HIBERNATION_SUSPEND) { - error = suspend_devices_and_enter(PM_SUSPEND_MEM); + error = suspend_devices_and_enter(mem_sleep_current); if (error) { hibernation_mode = hibernation_ops ? HIBERNATION_PLATFORM : From 1cf51d51581c1e0a876623e0a89d10029fc8cdc4 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 12 Oct 2022 15:32:28 +0100 Subject: [PATCH 0533/1477] media: vivid: s_fbuf: add more sanity checks [ Upstream commit f8bcaf714abfc94818dff8c0db84d750433984f4 ] VIDIOC_S_FBUF is by definition a scary ioctl, which is why only root can use it. But at least check if the framebuffer parameters match that of one of the framebuffer created by vivid, and reject anything else. Signed-off-by: Hans Verkuil Fixes: ef834f7836ec ([media] vivid: add the video capture and output parts) Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/test-drivers/vivid/vivid-core.c | 22 +++++++++++++++++++ drivers/media/test-drivers/vivid/vivid-core.h | 2 ++ .../media/test-drivers/vivid/vivid-vid-cap.c | 9 +++++++- 3 files changed, 32 insertions(+), 1 deletion(-) diff --git a/drivers/media/test-drivers/vivid/vivid-core.c b/drivers/media/test-drivers/vivid/vivid-core.c index 1e356dc65d31..f69c64e5a149 100644 --- a/drivers/media/test-drivers/vivid/vivid-core.c +++ b/drivers/media/test-drivers/vivid/vivid-core.c @@ -330,6 +330,28 @@ static int vidioc_g_fbuf(struct file *file, void *fh, struct v4l2_framebuffer *a return vivid_vid_out_g_fbuf(file, fh, a); } +/* + * Only support the framebuffer of one of the vivid instances. + * Anything else is rejected. + */ +bool vivid_validate_fb(const struct v4l2_framebuffer *a) +{ + struct vivid_dev *dev; + int i; + + for (i = 0; i < n_devs; i++) { + dev = vivid_devs[i]; + if (!dev || !dev->video_pbase) + continue; + if ((unsigned long)a->base == dev->video_pbase && + a->fmt.width <= dev->display_width && + a->fmt.height <= dev->display_height && + a->fmt.bytesperline <= dev->display_byte_stride) + return true; + } + return false; +} + static int vidioc_s_fbuf(struct file *file, void *fh, const struct v4l2_framebuffer *a) { struct video_device *vdev = video_devdata(file); diff --git a/drivers/media/test-drivers/vivid/vivid-core.h b/drivers/media/test-drivers/vivid/vivid-core.h index 99e69b8f770f..6aa32c8e6fb5 100644 --- a/drivers/media/test-drivers/vivid/vivid-core.h +++ b/drivers/media/test-drivers/vivid/vivid-core.h @@ -609,4 +609,6 @@ static inline bool vivid_is_hdmi_out(const struct vivid_dev *dev) return dev->output_type[dev->output] == HDMI; } +bool vivid_validate_fb(const struct v4l2_framebuffer *a); + #endif diff --git a/drivers/media/test-drivers/vivid/vivid-vid-cap.c b/drivers/media/test-drivers/vivid/vivid-vid-cap.c index eadf28ab1e39..d4e30cf64e5f 100644 --- a/drivers/media/test-drivers/vivid/vivid-vid-cap.c +++ b/drivers/media/test-drivers/vivid/vivid-vid-cap.c @@ -1276,7 +1276,14 @@ int vivid_vid_cap_s_fbuf(struct file *file, void *fh, return -EINVAL; if (a->fmt.bytesperline < (a->fmt.width * fmt->bit_depth[0]) / 8) return -EINVAL; - if (a->fmt.height * a->fmt.bytesperline < a->fmt.sizeimage) + if (a->fmt.bytesperline > a->fmt.sizeimage / a->fmt.height) + return -EINVAL; + + /* + * Only support the framebuffer of one of the vivid instances. + * Anything else is rejected. + */ + if (!vivid_validate_fb(a)) return -EINVAL; dev->fb_vbase_cap = phys_to_virt((unsigned long)a->base); From 147b8f1892aaa474f912ac75babfd316ee0de672 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Thu, 13 Oct 2022 15:18:46 +0100 Subject: [PATCH 0534/1477] media: vivid: dev->bitmap_cap wasn't freed in all cases [ Upstream commit 1f65ea411cc7b6ff128d82a3493d7b5648054e6f ] Whenever the compose width/height values change, the dev->bitmap_cap vmalloc'ed array must be freed and dev->bitmap_cap set to NULL. This was done in some places, but not all. This is only an issue if overlay support is enabled and the bitmap clipping is used. Signed-off-by: Hans Verkuil Fixes: ef834f7836ec ([media] vivid: add the video capture and output parts) Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- .../media/test-drivers/vivid/vivid-vid-cap.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/media/test-drivers/vivid/vivid-vid-cap.c b/drivers/media/test-drivers/vivid/vivid-vid-cap.c index d4e30cf64e5f..d493bd17481b 100644 --- a/drivers/media/test-drivers/vivid/vivid-vid-cap.c +++ b/drivers/media/test-drivers/vivid/vivid-vid-cap.c @@ -452,6 +452,12 @@ void vivid_update_format_cap(struct vivid_dev *dev, bool keep_controls) tpg_reset_source(&dev->tpg, dev->src_rect.width, dev->src_rect.height, dev->field_cap); dev->crop_cap = dev->src_rect; dev->crop_bounds_cap = dev->src_rect; + if (dev->bitmap_cap && + (dev->compose_cap.width != dev->crop_cap.width || + dev->compose_cap.height != dev->crop_cap.height)) { + vfree(dev->bitmap_cap); + dev->bitmap_cap = NULL; + } dev->compose_cap = dev->crop_cap; if (V4L2_FIELD_HAS_T_OR_B(dev->field_cap)) dev->compose_cap.height /= 2; @@ -909,6 +915,8 @@ int vivid_vid_cap_s_selection(struct file *file, void *fh, struct v4l2_selection struct vivid_dev *dev = video_drvdata(file); struct v4l2_rect *crop = &dev->crop_cap; struct v4l2_rect *compose = &dev->compose_cap; + unsigned orig_compose_w = compose->width; + unsigned orig_compose_h = compose->height; unsigned factor = V4L2_FIELD_HAS_T_OR_B(dev->field_cap) ? 2 : 1; int ret; @@ -1025,17 +1033,17 @@ int vivid_vid_cap_s_selection(struct file *file, void *fh, struct v4l2_selection s->r.height /= factor; } v4l2_rect_map_inside(&s->r, &dev->fmt_cap_rect); - if (dev->bitmap_cap && (compose->width != s->r.width || - compose->height != s->r.height)) { - vfree(dev->bitmap_cap); - dev->bitmap_cap = NULL; - } *compose = s->r; break; default: return -EINVAL; } + if (dev->bitmap_cap && (compose->width != orig_compose_w || + compose->height != orig_compose_h)) { + vfree(dev->bitmap_cap); + dev->bitmap_cap = NULL; + } tpg_s_crop_compose(&dev->tpg, crop, compose); return 0; } From 683015ae163481457a16fad2317af66360dc4762 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Thu, 13 Oct 2022 09:00:34 +0100 Subject: [PATCH 0535/1477] media: v4l2-dv-timings: add sanity checks for blanking values [ Upstream commit 4b6d66a45ed34a15721cb9e11492fa1a24bc83df ] Add sanity checks to v4l2_valid_dv_timings() to ensure that the provided blanking values are reasonable. Signed-off-by: Hans Verkuil Fixes: b18787ed1ce3 ([media] v4l2-dv-timings: add new helper module) Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/v4l2-core/v4l2-dv-timings.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/media/v4l2-core/v4l2-dv-timings.c b/drivers/media/v4l2-core/v4l2-dv-timings.c index af48705c704f..003c32fed3f7 100644 --- a/drivers/media/v4l2-core/v4l2-dv-timings.c +++ b/drivers/media/v4l2-core/v4l2-dv-timings.c @@ -161,6 +161,20 @@ bool v4l2_valid_dv_timings(const struct v4l2_dv_timings *t, (bt->interlaced && !(caps & V4L2_DV_BT_CAP_INTERLACED)) || (!bt->interlaced && !(caps & V4L2_DV_BT_CAP_PROGRESSIVE))) return false; + + /* sanity checks for the blanking timings */ + if (!bt->interlaced && + (bt->il_vbackporch || bt->il_vsync || bt->il_vfrontporch)) + return false; + if (bt->hfrontporch > 2 * bt->width || + bt->hsync > 1024 || bt->hbackporch > 1024) + return false; + if (bt->vfrontporch > 4096 || + bt->vsync > 128 || bt->vbackporch > 4096) + return false; + if (bt->interlaced && (bt->il_vfrontporch > 4096 || + bt->il_vsync > 128 || bt->il_vbackporch > 4096)) + return false; return fnc == NULL || fnc(t, fnc_handle); } EXPORT_SYMBOL_GPL(v4l2_valid_dv_timings); From b6c7446d0a38725c64305bfb4728625d4f411f50 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 12 Oct 2022 16:46:17 +0100 Subject: [PATCH 0536/1477] media: videodev2.h: V4L2_DV_BT_BLANKING_HEIGHT should check 'interlaced' [ Upstream commit 8da7f0976b9071b528c545008de9d10cc81883b1 ] If it is a progressive (non-interlaced) format, then ignore the interlaced timing values. Signed-off-by: Hans Verkuil Fixes: 7f68127fa11f ([media] videodev2.h: defines to calculate blanking and frame sizes) Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- include/uapi/linux/videodev2.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 534eaa4d39bc..b28817c59fdf 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -1552,7 +1552,8 @@ struct v4l2_bt_timings { ((bt)->width + V4L2_DV_BT_BLANKING_WIDTH(bt)) #define V4L2_DV_BT_BLANKING_HEIGHT(bt) \ ((bt)->vfrontporch + (bt)->vsync + (bt)->vbackporch + \ - (bt)->il_vfrontporch + (bt)->il_vsync + (bt)->il_vbackporch) + ((bt)->interlaced ? \ + ((bt)->il_vfrontporch + (bt)->il_vsync + (bt)->il_vbackporch) : 0)) #define V4L2_DV_BT_FRAME_HEIGHT(bt) \ ((bt)->height + V4L2_DV_BT_BLANKING_HEIGHT(bt)) From 905f05c0ab1950e6f24611b2ea69625f154392d5 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Mon, 17 Oct 2022 15:09:06 +0100 Subject: [PATCH 0537/1477] media: vivid: set num_in/outputs to 0 if not supported [ Upstream commit 69d78a80da4ef12faf2a6f9cfa2097ab4ac43983 ] If node_types does not have video/vbi/meta inputs or outputs, then set num_inputs/num_outputs to 0 instead of 1. Signed-off-by: Hans Verkuil Fixes: 0c90f649d2f5 (media: vivid: add vivid_create_queue() helper) Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/test-drivers/vivid/vivid-core.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/media/test-drivers/vivid/vivid-core.c b/drivers/media/test-drivers/vivid/vivid-core.c index f69c64e5a149..761d2abd4006 100644 --- a/drivers/media/test-drivers/vivid/vivid-core.c +++ b/drivers/media/test-drivers/vivid/vivid-core.c @@ -872,8 +872,12 @@ static int vivid_detect_feature_set(struct vivid_dev *dev, int inst, /* how many inputs do we have and of what type? */ dev->num_inputs = num_inputs[inst]; - if (dev->num_inputs < 1) - dev->num_inputs = 1; + if (node_type & 0x20007) { + if (dev->num_inputs < 1) + dev->num_inputs = 1; + } else { + dev->num_inputs = 0; + } if (dev->num_inputs >= MAX_INPUTS) dev->num_inputs = MAX_INPUTS; for (i = 0; i < dev->num_inputs; i++) { @@ -890,8 +894,12 @@ static int vivid_detect_feature_set(struct vivid_dev *dev, int inst, /* how many outputs do we have and of what type? */ dev->num_outputs = num_outputs[inst]; - if (dev->num_outputs < 1) - dev->num_outputs = 1; + if (node_type & 0x40300) { + if (dev->num_outputs < 1) + dev->num_outputs = 1; + } else { + dev->num_outputs = 0; + } if (dev->num_outputs >= MAX_OUTPUTS) dev->num_outputs = MAX_OUTPUTS; for (i = 0; i < dev->num_outputs; i++) { From 44affe7ede596f078c4f2f41e0d160266ccda818 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 23 Oct 2022 19:01:24 -0700 Subject: [PATCH 0538/1477] ipv6: ensure sane device mtu in tunnels [ Upstream commit d89d7ff01235f218dad37de84457717f699dee79 ] Another syzbot report [1] with no reproducer hints at a bug in ip6_gre tunnel (dev:ip6gretap0) Since ipv6 mcast code makes sure to read dev->mtu once and applies a sanity check on it (see commit b9b312a7a451 "ipv6: mcast: better catch silly mtu values"), a remaining possibility is that a layer is able to set dev->mtu to an underflowed value (high order bit set). This could happen indeed in ip6gre_tnl_link_config_route(), ip6_tnl_link_config() and ipip6_tunnel_bind_dev() Make sure to sanitize mtu value in a local variable before it is written once on dev->mtu, as lockless readers could catch wrong temporary value. [1] skbuff: skb_over_panic: text:ffff80000b7a2f38 len:40 put:40 head:ffff000149dcf200 data:ffff000149dcf2b0 tail:0xd8 end:0xc0 dev:ip6gretap0 ------------[ cut here ]------------ kernel BUG at net/core/skbuff.c:120 Internal error: Oops - BUG: 00000000f2000800 [#1] PREEMPT SMP Modules linked in: CPU: 1 PID: 10241 Comm: kworker/1:1 Not tainted 6.0.0-rc7-syzkaller-18095-gbbed346d5a96 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/30/2022 Workqueue: mld mld_ifc_work pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : skb_panic+0x4c/0x50 net/core/skbuff.c:116 lr : skb_panic+0x4c/0x50 net/core/skbuff.c:116 sp : ffff800020dd3b60 x29: ffff800020dd3b70 x28: 0000000000000000 x27: ffff00010df2a800 x26: 00000000000000c0 x25: 00000000000000b0 x24: ffff000149dcf200 x23: 00000000000000c0 x22: 00000000000000d8 x21: ffff80000b7a2f38 x20: ffff00014c2f7800 x19: 0000000000000028 x18: 00000000000001a9 x17: 0000000000000000 x16: ffff80000db49158 x15: ffff000113bf1a80 x14: 0000000000000000 x13: 00000000ffffffff x12: ffff000113bf1a80 x11: ff808000081c0d5c x10: 0000000000000000 x9 : 73f125dc5c63ba00 x8 : 73f125dc5c63ba00 x7 : ffff800008161d1c x6 : 0000000000000000 x5 : 0000000000000080 x4 : 0000000000000001 x3 : 0000000000000000 x2 : ffff0001fefddcd0 x1 : 0000000100000000 x0 : 0000000000000089 Call trace: skb_panic+0x4c/0x50 net/core/skbuff.c:116 skb_over_panic net/core/skbuff.c:125 [inline] skb_put+0xd4/0xdc net/core/skbuff.c:2049 ip6_mc_hdr net/ipv6/mcast.c:1714 [inline] mld_newpack+0x14c/0x270 net/ipv6/mcast.c:1765 add_grhead net/ipv6/mcast.c:1851 [inline] add_grec+0xa20/0xae0 net/ipv6/mcast.c:1989 mld_send_cr+0x438/0x5a8 net/ipv6/mcast.c:2115 mld_ifc_work+0x38/0x290 net/ipv6/mcast.c:2653 process_one_work+0x2d8/0x504 kernel/workqueue.c:2289 worker_thread+0x340/0x610 kernel/workqueue.c:2436 kthread+0x12c/0x158 kernel/kthread.c:376 ret_from_fork+0x10/0x20 arch/arm64/kernel/entry.S:860 Code: 91011400 aa0803e1 a90027ea 94373093 (d4210000) Fixes: c12b395a4664 ("gre: Support GRE over IPv6") Reported-by: syzbot Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20221024020124.3756833-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv6/ip6_gre.c | 16 +++++++++------- net/ipv6/ip6_tunnel.c | 11 ++++++----- net/ipv6/sit.c | 8 +++++--- 3 files changed, 20 insertions(+), 15 deletions(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 9e0890738d93..0010f9e54f13 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1153,14 +1153,16 @@ static void ip6gre_tnl_link_config_route(struct ip6_tnl *t, int set_mtu, dev->needed_headroom = dst_len; if (set_mtu) { - dev->mtu = rt->dst.dev->mtu - t_hlen; - if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) - dev->mtu -= 8; - if (dev->type == ARPHRD_ETHER) - dev->mtu -= ETH_HLEN; + int mtu = rt->dst.dev->mtu - t_hlen; - if (dev->mtu < IPV6_MIN_MTU) - dev->mtu = IPV6_MIN_MTU; + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + mtu -= 8; + if (dev->type == ARPHRD_ETHER) + mtu -= ETH_HLEN; + + if (mtu < IPV6_MIN_MTU) + mtu = IPV6_MIN_MTU; + WRITE_ONCE(dev->mtu, mtu); } } ip6_rt_put(rt); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 3a2741569b84..0d4cab94c5dd 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1476,8 +1476,8 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) struct net_device *tdev = NULL; struct __ip6_tnl_parm *p = &t->parms; struct flowi6 *fl6 = &t->fl.u.ip6; - unsigned int mtu; int t_hlen; + int mtu; memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); @@ -1524,12 +1524,13 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) dev->hard_header_len = tdev->hard_header_len + t_hlen; mtu = min_t(unsigned int, tdev->mtu, IP6_MAX_MTU); - dev->mtu = mtu - t_hlen; + mtu = mtu - t_hlen; if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) - dev->mtu -= 8; + mtu -= 8; - if (dev->mtu < IPV6_MIN_MTU) - dev->mtu = IPV6_MIN_MTU; + if (mtu < IPV6_MIN_MTU) + mtu = IPV6_MIN_MTU; + WRITE_ONCE(dev->mtu, mtu); } } } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 3c92e8cacbba..1ce486a9bc07 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1123,10 +1123,12 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev) if (tdev && !netif_is_l3_master(tdev)) { int t_hlen = tunnel->hlen + sizeof(struct iphdr); + int mtu; - dev->mtu = tdev->mtu - t_hlen; - if (dev->mtu < IPV6_MIN_MTU) - dev->mtu = IPV6_MIN_MTU; + mtu = tdev->mtu - t_hlen; + if (mtu < IPV6_MIN_MTU) + mtu = IPV6_MIN_MTU; + WRITE_ONCE(dev->mtu, mtu); } } From 23d5599058a060793a862b47d29edc87364e50f9 Mon Sep 17 00:00:00 2001 From: Slawomir Laba Date: Mon, 24 Oct 2022 03:05:24 -0700 Subject: [PATCH 0539/1477] i40e: Fix ethtool rx-flow-hash setting for X722 [ Upstream commit 54b5af5a438076082d482cab105b1bd484ab5074 ] When enabling flow type for RSS hash via ethtool: ethtool -N $pf rx-flow-hash tcp4|tcp6|udp4|udp6 s|d the driver would fail to setup this setting on X722 device since it was using the mask on the register dedicated for X710 devices. Apply a different mask on the register when setting the RSS hash for the X722 device. When displaying the flow types enabled via ethtool: ethtool -n $pf rx-flow-hash tcp4|tcp6|udp4|udp6 the driver would print wrong values for X722 device. Fix this issue by testing masks for X722 device in i40e_get_rss_hash_opts function. Fixes: eb0dd6e4a3b3 ("i40e: Allow RSS Hash set with less than four parameters") Signed-off-by: Slawomir Laba Signed-off-by: Michal Jaron Signed-off-by: Mateusz Palczewski Tested-by: Gurucharan (A Contingent worker at Intel) Signed-off-by: Jacob Keller Link: https://lore.kernel.org/r/20221024100526.1874914-1-jacob.e.keller@intel.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../net/ethernet/intel/i40e/i40e_ethtool.c | 31 ++++++++++++++----- drivers/net/ethernet/intel/i40e/i40e_type.h | 4 +++ 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index cc5f5c237774..bcc22b374b4a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -3083,10 +3083,17 @@ static int i40e_get_rss_hash_opts(struct i40e_pf *pf, struct ethtool_rxnfc *cmd) if (cmd->flow_type == TCP_V4_FLOW || cmd->flow_type == UDP_V4_FLOW) { - if (i_set & I40E_L3_SRC_MASK) - cmd->data |= RXH_IP_SRC; - if (i_set & I40E_L3_DST_MASK) - cmd->data |= RXH_IP_DST; + if (hw->mac.type == I40E_MAC_X722) { + if (i_set & I40E_X722_L3_SRC_MASK) + cmd->data |= RXH_IP_SRC; + if (i_set & I40E_X722_L3_DST_MASK) + cmd->data |= RXH_IP_DST; + } else { + if (i_set & I40E_L3_SRC_MASK) + cmd->data |= RXH_IP_SRC; + if (i_set & I40E_L3_DST_MASK) + cmd->data |= RXH_IP_DST; + } } else if (cmd->flow_type == TCP_V6_FLOW || cmd->flow_type == UDP_V6_FLOW) { if (i_set & I40E_L3_V6_SRC_MASK) @@ -3393,12 +3400,15 @@ static int i40e_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, /** * i40e_get_rss_hash_bits - Read RSS Hash bits from register + * @hw: hw structure * @nfc: pointer to user request * @i_setc: bits currently set * * Returns value of bits to be set per user request **/ -static u64 i40e_get_rss_hash_bits(struct ethtool_rxnfc *nfc, u64 i_setc) +static u64 i40e_get_rss_hash_bits(struct i40e_hw *hw, + struct ethtool_rxnfc *nfc, + u64 i_setc) { u64 i_set = i_setc; u64 src_l3 = 0, dst_l3 = 0; @@ -3417,8 +3427,13 @@ static u64 i40e_get_rss_hash_bits(struct ethtool_rxnfc *nfc, u64 i_setc) dst_l3 = I40E_L3_V6_DST_MASK; } else if (nfc->flow_type == TCP_V4_FLOW || nfc->flow_type == UDP_V4_FLOW) { - src_l3 = I40E_L3_SRC_MASK; - dst_l3 = I40E_L3_DST_MASK; + if (hw->mac.type == I40E_MAC_X722) { + src_l3 = I40E_X722_L3_SRC_MASK; + dst_l3 = I40E_X722_L3_DST_MASK; + } else { + src_l3 = I40E_L3_SRC_MASK; + dst_l3 = I40E_L3_DST_MASK; + } } else { /* Any other flow type are not supported here */ return i_set; @@ -3533,7 +3548,7 @@ static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc) flow_pctype)) | ((u64)i40e_read_rx_ctl(hw, I40E_GLQF_HASH_INSET(1, flow_pctype)) << 32); - i_set = i40e_get_rss_hash_bits(nfc, i_setc); + i_set = i40e_get_rss_hash_bits(&pf->hw, nfc, i_setc); i40e_write_rx_ctl(hw, I40E_GLQF_HASH_INSET(0, flow_pctype), (u32)i_set); i40e_write_rx_ctl(hw, I40E_GLQF_HASH_INSET(1, flow_pctype), diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h index 446672a7e39f..0872448c0e80 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_type.h +++ b/drivers/net/ethernet/intel/i40e/i40e_type.h @@ -1404,6 +1404,10 @@ struct i40e_lldp_variables { #define I40E_PFQF_CTL_0_HASHLUTSIZE_512 0x00010000 /* INPUT SET MASK for RSS, flow director, and flexible payload */ +#define I40E_X722_L3_SRC_SHIFT 49 +#define I40E_X722_L3_SRC_MASK (0x3ULL << I40E_X722_L3_SRC_SHIFT) +#define I40E_X722_L3_DST_SHIFT 41 +#define I40E_X722_L3_DST_MASK (0x3ULL << I40E_X722_L3_DST_SHIFT) #define I40E_L3_SRC_SHIFT 47 #define I40E_L3_SRC_MASK (0x3ULL << I40E_L3_SRC_SHIFT) #define I40E_L3_V6_SRC_SHIFT 43 From 9abae363af5ced6adbf04c14366289540281fb26 Mon Sep 17 00:00:00 2001 From: Sylwester Dziedziuch Date: Mon, 24 Oct 2022 03:05:25 -0700 Subject: [PATCH 0540/1477] i40e: Fix VF hang when reset is triggered on another VF [ Upstream commit 52424f974bc53c26ba3f00300a00e9de9afcd972 ] When a reset was triggered on one VF with i40e_reset_vf global PF state __I40E_VF_DISABLE was set on a PF until the reset finished. If immediately after triggering reset on one VF there is a request to reset on another it will cause a hang on VF side because VF will be notified of incoming reset but the reset will never happen because of this global state, we will get such error message: [ +4.890195] iavf 0000:86:02.1: Never saw reset and VF will hang waiting for the reset to be triggered. Fix this by introducing new VF state I40E_VF_STATE_RESETTING that will be set on a VF if it is currently resetting instead of the global __I40E_VF_DISABLE PF state. Fixes: 3ba9bcb4b68f ("i40e: add locking around VF reset") Signed-off-by: Sylwester Dziedziuch Signed-off-by: Mateusz Palczewski Tested-by: Konrad Jankowski Signed-off-by: Jacob Keller Link: https://lore.kernel.org/r/20221024100526.1874914-2-jacob.e.keller@intel.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../ethernet/intel/i40e/i40e_virtchnl_pf.c | 43 ++++++++++++++----- .../ethernet/intel/i40e/i40e_virtchnl_pf.h | 1 + 2 files changed, 33 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index ffff7de801af..381b28a08746 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -1483,10 +1483,12 @@ bool i40e_reset_vf(struct i40e_vf *vf, bool flr) if (test_bit(__I40E_VF_RESETS_DISABLED, pf->state)) return true; - /* If the VFs have been disabled, this means something else is - * resetting the VF, so we shouldn't continue. - */ - if (test_and_set_bit(__I40E_VF_DISABLE, pf->state)) + /* Bail out if VFs are disabled. */ + if (test_bit(__I40E_VF_DISABLE, pf->state)) + return true; + + /* If VF is being reset already we don't need to continue. */ + if (test_and_set_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) return true; i40e_trigger_vf_reset(vf, flr); @@ -1523,7 +1525,7 @@ bool i40e_reset_vf(struct i40e_vf *vf, bool flr) i40e_cleanup_reset_vf(vf); i40e_flush(hw); - clear_bit(__I40E_VF_DISABLE, pf->state); + clear_bit(I40E_VF_STATE_RESETTING, &vf->vf_states); return true; } @@ -1556,8 +1558,12 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) return false; /* Begin reset on all VFs at once */ - for (v = 0; v < pf->num_alloc_vfs; v++) - i40e_trigger_vf_reset(&pf->vf[v], flr); + for (v = 0; v < pf->num_alloc_vfs; v++) { + vf = &pf->vf[v]; + /* If VF is being reset no need to trigger reset again */ + if (!test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) + i40e_trigger_vf_reset(&pf->vf[v], flr); + } /* HW requires some time to make sure it can flush the FIFO for a VF * when it resets it. Poll the VPGEN_VFRSTAT register for each VF in @@ -1573,9 +1579,11 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) */ while (v < pf->num_alloc_vfs) { vf = &pf->vf[v]; - reg = rd32(hw, I40E_VPGEN_VFRSTAT(vf->vf_id)); - if (!(reg & I40E_VPGEN_VFRSTAT_VFRD_MASK)) - break; + if (!test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) { + reg = rd32(hw, I40E_VPGEN_VFRSTAT(vf->vf_id)); + if (!(reg & I40E_VPGEN_VFRSTAT_VFRD_MASK)) + break; + } /* If the current VF has finished resetting, move on * to the next VF in sequence. @@ -1603,6 +1611,10 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) if (pf->vf[v].lan_vsi_idx == 0) continue; + /* If VF is reset in another thread just continue */ + if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) + continue; + i40e_vsi_stop_rings_no_wait(pf->vsi[pf->vf[v].lan_vsi_idx]); } @@ -1614,6 +1626,10 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) if (pf->vf[v].lan_vsi_idx == 0) continue; + /* If VF is reset in another thread just continue */ + if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) + continue; + i40e_vsi_wait_queues_disabled(pf->vsi[pf->vf[v].lan_vsi_idx]); } @@ -1623,8 +1639,13 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) mdelay(50); /* Finish the reset on each VF */ - for (v = 0; v < pf->num_alloc_vfs; v++) + for (v = 0; v < pf->num_alloc_vfs; v++) { + /* If VF is reset in another thread just continue */ + if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) + continue; + i40e_cleanup_reset_vf(&pf->vf[v]); + } i40e_flush(hw); clear_bit(__I40E_VF_DISABLE, pf->state); diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h index a554d0a0b09b..358bbdb58795 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h @@ -39,6 +39,7 @@ enum i40e_vf_states { I40E_VF_STATE_MC_PROMISC, I40E_VF_STATE_UC_PROMISC, I40E_VF_STATE_PRE_ENABLE, + I40E_VF_STATE_RESETTING }; /* VF capabilities */ From 6170b4579f36452e3e4ab5d119266053a977c378 Mon Sep 17 00:00:00 2001 From: Slawomir Laba Date: Mon, 24 Oct 2022 03:05:26 -0700 Subject: [PATCH 0541/1477] i40e: Fix flow-type by setting GL_HASH_INSET registers [ Upstream commit 3b32c9932853e11d71f9db012d69e92e4669ba23 ] Fix setting bits for specific flow_type for GLQF_HASH_INSET register. In previous version all of the bits were set only in hena register, while in inset only one bit was set. In order for this working correctly on all types of cards these bits needs to be set correctly for both hena and inset registers. Fixes: eb0dd6e4a3b3 ("i40e: Allow RSS Hash set with less than four parameters") Signed-off-by: Slawomir Laba Signed-off-by: Michal Jaron Signed-off-by: Mateusz Palczewski Signed-off-by: Jacob Keller Link: https://lore.kernel.org/r/20221024100526.1874914-3-jacob.e.keller@intel.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../net/ethernet/intel/i40e/i40e_ethtool.c | 71 ++++++++++--------- 1 file changed, 38 insertions(+), 33 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index bcc22b374b4a..144c4824b5e8 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -3451,6 +3451,7 @@ static u64 i40e_get_rss_hash_bits(struct i40e_hw *hw, return i_set; } +#define FLOW_PCTYPES_SIZE 64 /** * i40e_set_rss_hash_opt - Enable/Disable flow types for RSS hash * @pf: pointer to the physical function struct @@ -3463,9 +3464,11 @@ static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc) struct i40e_hw *hw = &pf->hw; u64 hena = (u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(0)) | ((u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1)) << 32); - u8 flow_pctype = 0; + DECLARE_BITMAP(flow_pctypes, FLOW_PCTYPES_SIZE); u64 i_set, i_setc; + bitmap_zero(flow_pctypes, FLOW_PCTYPES_SIZE); + if (pf->flags & I40E_FLAG_MFP_ENABLED) { dev_err(&pf->pdev->dev, "Change of RSS hash input set is not supported when MFP mode is enabled\n"); @@ -3481,36 +3484,35 @@ static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc) switch (nfc->flow_type) { case TCP_V4_FLOW: - flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP; + set_bit(I40E_FILTER_PCTYPE_NONF_IPV4_TCP, flow_pctypes); if (pf->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE) - hena |= - BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK); + set_bit(I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK, + flow_pctypes); break; case TCP_V6_FLOW: - flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV6_TCP; + set_bit(I40E_FILTER_PCTYPE_NONF_IPV6_TCP, flow_pctypes); if (pf->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE) - hena |= - BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK); - if (pf->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE) - hena |= - BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK); + set_bit(I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK, + flow_pctypes); break; case UDP_V4_FLOW: - flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP; - if (pf->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE) - hena |= - BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP) | - BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP); - + set_bit(I40E_FILTER_PCTYPE_NONF_IPV4_UDP, flow_pctypes); + if (pf->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE) { + set_bit(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP, + flow_pctypes); + set_bit(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP, + flow_pctypes); + } hena |= BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV4); break; case UDP_V6_FLOW: - flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV6_UDP; - if (pf->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE) - hena |= - BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP) | - BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP); - + set_bit(I40E_FILTER_PCTYPE_NONF_IPV6_UDP, flow_pctypes); + if (pf->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE) { + set_bit(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP, + flow_pctypes); + set_bit(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP, + flow_pctypes); + } hena |= BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV6); break; case AH_ESP_V4_FLOW: @@ -3543,17 +3545,20 @@ static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc) return -EINVAL; } - if (flow_pctype) { - i_setc = (u64)i40e_read_rx_ctl(hw, I40E_GLQF_HASH_INSET(0, - flow_pctype)) | - ((u64)i40e_read_rx_ctl(hw, I40E_GLQF_HASH_INSET(1, - flow_pctype)) << 32); - i_set = i40e_get_rss_hash_bits(&pf->hw, nfc, i_setc); - i40e_write_rx_ctl(hw, I40E_GLQF_HASH_INSET(0, flow_pctype), - (u32)i_set); - i40e_write_rx_ctl(hw, I40E_GLQF_HASH_INSET(1, flow_pctype), - (u32)(i_set >> 32)); - hena |= BIT_ULL(flow_pctype); + if (bitmap_weight(flow_pctypes, FLOW_PCTYPES_SIZE)) { + u8 flow_id; + + for_each_set_bit(flow_id, flow_pctypes, FLOW_PCTYPES_SIZE) { + i_setc = (u64)i40e_read_rx_ctl(hw, I40E_GLQF_HASH_INSET(0, flow_id)) | + ((u64)i40e_read_rx_ctl(hw, I40E_GLQF_HASH_INSET(1, flow_id)) << 32); + i_set = i40e_get_rss_hash_bits(&pf->hw, nfc, i_setc); + + i40e_write_rx_ctl(hw, I40E_GLQF_HASH_INSET(0, flow_id), + (u32)i_set); + i40e_write_rx_ctl(hw, I40E_GLQF_HASH_INSET(1, flow_id), + (u32)(i_set >> 32)); + hena |= BIT_ULL(flow_id); + } } i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), (u32)hena); From 1f262d80882ac344ca024523b478b035b5ab3bce Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Mon, 24 Oct 2022 21:13:38 +0800 Subject: [PATCH 0542/1477] net: ksz884x: fix missing pci_disable_device() on error in pcidev_init() [ Upstream commit 5da6d65590a0698199df44d095e54b0ed1708178 ] pci_disable_device() need be called while module exiting, switch to use pcim_enable(), pci_disable_device() will be called in pcim_release() while unbinding device. Fixes: 8ca86fd83eae ("net: Micrel KSZ8841/2 PCI Ethernet driver") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221024131338.2848959-1-yangyingliang@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/micrel/ksz884x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c index 9ed264ed7070..1fa16064142d 100644 --- a/drivers/net/ethernet/micrel/ksz884x.c +++ b/drivers/net/ethernet/micrel/ksz884x.c @@ -6923,7 +6923,7 @@ static int pcidev_init(struct pci_dev *pdev, const struct pci_device_id *id) char banner[sizeof(version)]; struct ksz_switch *sw = NULL; - result = pci_enable_device(pdev); + result = pcim_enable_device(pdev); if (result) return result; From df67a8e625fce95b9bfcaad0b683586a95e1755b Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Tue, 25 Oct 2022 13:34:32 +0100 Subject: [PATCH 0543/1477] PM: domains: Fix handling of unavailable/disabled idle states [ Upstream commit e0c57a5c70c13317238cb19a7ded0eab4a5f7de5 ] Platforms can provide the information about the availability of each idle states via status flag. Platforms may have to disable one or more idle states for various reasons like broken firmware or other unmet dependencies. Fix handling of such unavailable/disabled idle states by ignoring them while parsing the states. Fixes: a3381e3a65cb ("PM / domains: Fix up domain-idle-states OF parsing") Signed-off-by: Sudeep Holla Reviewed-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/base/power/domain.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 743268996336..d0ba5459ce0b 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -2789,6 +2789,10 @@ static int genpd_iterate_idle_states(struct device_node *dn, np = it.node; if (!of_match_node(idle_state_match, np)) continue; + + if (!of_device_is_available(np)) + continue; + if (states) { ret = genpd_parse_state(&states[i], np); if (ret) { From 97262705c0cb067c0f9bd055edfe115f7f43913c Mon Sep 17 00:00:00 2001 From: Juergen Borleis Date: Mon, 24 Oct 2022 10:05:52 +0200 Subject: [PATCH 0544/1477] net: fec: limit register access on i.MX6UL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 0a8b43b12dd78daa77a7dc007b92770d262a2714 ] Using 'ethtool -d […]' on an i.MX6UL leads to a kernel crash: Unhandled fault: external abort on non-linefetch (0x1008) at […] due to this SoC has less registers in its FEC implementation compared to other i.MX6 variants. Thus, a run-time decision is required to avoid access to non-existing registers. Fixes: a51d3ab50702 ("net: fec: use a more proper compatible string for i.MX6UL type device") Signed-off-by: Juergen Borleis Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20221024080552.21004-1-jbe@pengutronix.de Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/freescale/fec_main.c | 46 ++++++++++++++++++++++- 1 file changed, 44 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index d8bdaf2e5365..e183caf38176 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -2251,6 +2251,31 @@ static u32 fec_enet_register_offset[] = { IEEE_R_DROP, IEEE_R_FRAME_OK, IEEE_R_CRC, IEEE_R_ALIGN, IEEE_R_MACERR, IEEE_R_FDXFC, IEEE_R_OCTETS_OK }; +/* for i.MX6ul */ +static u32 fec_enet_register_offset_6ul[] = { + FEC_IEVENT, FEC_IMASK, FEC_R_DES_ACTIVE_0, FEC_X_DES_ACTIVE_0, + FEC_ECNTRL, FEC_MII_DATA, FEC_MII_SPEED, FEC_MIB_CTRLSTAT, FEC_R_CNTRL, + FEC_X_CNTRL, FEC_ADDR_LOW, FEC_ADDR_HIGH, FEC_OPD, FEC_TXIC0, FEC_RXIC0, + FEC_HASH_TABLE_HIGH, FEC_HASH_TABLE_LOW, FEC_GRP_HASH_TABLE_HIGH, + FEC_GRP_HASH_TABLE_LOW, FEC_X_WMRK, FEC_R_DES_START_0, + FEC_X_DES_START_0, FEC_R_BUFF_SIZE_0, FEC_R_FIFO_RSFL, FEC_R_FIFO_RSEM, + FEC_R_FIFO_RAEM, FEC_R_FIFO_RAFL, FEC_RACC, + RMON_T_DROP, RMON_T_PACKETS, RMON_T_BC_PKT, RMON_T_MC_PKT, + RMON_T_CRC_ALIGN, RMON_T_UNDERSIZE, RMON_T_OVERSIZE, RMON_T_FRAG, + RMON_T_JAB, RMON_T_COL, RMON_T_P64, RMON_T_P65TO127, RMON_T_P128TO255, + RMON_T_P256TO511, RMON_T_P512TO1023, RMON_T_P1024TO2047, + RMON_T_P_GTE2048, RMON_T_OCTETS, + IEEE_T_DROP, IEEE_T_FRAME_OK, IEEE_T_1COL, IEEE_T_MCOL, IEEE_T_DEF, + IEEE_T_LCOL, IEEE_T_EXCOL, IEEE_T_MACERR, IEEE_T_CSERR, IEEE_T_SQE, + IEEE_T_FDXFC, IEEE_T_OCTETS_OK, + RMON_R_PACKETS, RMON_R_BC_PKT, RMON_R_MC_PKT, RMON_R_CRC_ALIGN, + RMON_R_UNDERSIZE, RMON_R_OVERSIZE, RMON_R_FRAG, RMON_R_JAB, + RMON_R_RESVD_O, RMON_R_P64, RMON_R_P65TO127, RMON_R_P128TO255, + RMON_R_P256TO511, RMON_R_P512TO1023, RMON_R_P1024TO2047, + RMON_R_P_GTE2048, RMON_R_OCTETS, + IEEE_R_DROP, IEEE_R_FRAME_OK, IEEE_R_CRC, IEEE_R_ALIGN, IEEE_R_MACERR, + IEEE_R_FDXFC, IEEE_R_OCTETS_OK +}; #else static __u32 fec_enet_register_version = 1; static u32 fec_enet_register_offset[] = { @@ -2275,7 +2300,24 @@ static void fec_enet_get_regs(struct net_device *ndev, u32 *buf = (u32 *)regbuf; u32 i, off; int ret; +#if defined(CONFIG_M523x) || defined(CONFIG_M527x) || defined(CONFIG_M528x) || \ + defined(CONFIG_M520x) || defined(CONFIG_M532x) || defined(CONFIG_ARM) || \ + defined(CONFIG_ARM64) || defined(CONFIG_COMPILE_TEST) + u32 *reg_list; + u32 reg_cnt; + if (!of_machine_is_compatible("fsl,imx6ul")) { + reg_list = fec_enet_register_offset; + reg_cnt = ARRAY_SIZE(fec_enet_register_offset); + } else { + reg_list = fec_enet_register_offset_6ul; + reg_cnt = ARRAY_SIZE(fec_enet_register_offset_6ul); + } +#else + /* coldfire */ + static u32 *reg_list = fec_enet_register_offset; + static const u32 reg_cnt = ARRAY_SIZE(fec_enet_register_offset); +#endif ret = pm_runtime_resume_and_get(dev); if (ret < 0) return; @@ -2284,8 +2326,8 @@ static void fec_enet_get_regs(struct net_device *ndev, memset(buf, 0, regs->len); - for (i = 0; i < ARRAY_SIZE(fec_enet_register_offset); i++) { - off = fec_enet_register_offset[i]; + for (i = 0; i < reg_cnt; i++) { + off = reg_list[i]; if ((off == FEC_R_BOUND || off == FEC_R_FSTART) && !(fep->quirks & FEC_QUIRK_HAS_FRREG)) From ce6fd1c382a38b75557db85a2fe99d285540a03d Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 27 Oct 2022 09:34:38 +0800 Subject: [PATCH 0545/1477] ALSA: aoa: i2sbus: fix possible memory leak in i2sbus_add_dev() [ Upstream commit 4a4c8482e370d697738a78dcd7bf2780832cb712 ] dev_set_name() in soundbus_add_one() allocates memory for name, it need be freed when of_device_register() fails, call soundbus_dev_put() to give up the reference that hold in device_initialize(), so that it can be freed in kobject_cleanup() when the refcount hit to 0. And other resources are also freed in i2sbus_release_dev(), so it can return 0 directly. Fixes: f3d9478b2ce4 ("[ALSA] snd-aoa: add snd-aoa") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221027013438.991920-1-yangyingliang@huawei.com Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/aoa/soundbus/i2sbus/core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/aoa/soundbus/i2sbus/core.c b/sound/aoa/soundbus/i2sbus/core.c index faf6b03131ee..f6841daf9e3b 100644 --- a/sound/aoa/soundbus/i2sbus/core.c +++ b/sound/aoa/soundbus/i2sbus/core.c @@ -302,6 +302,10 @@ static int i2sbus_add_dev(struct macio_dev *macio, if (soundbus_add_one(&dev->sound)) { printk(KERN_DEBUG "i2sbus: device registration error!\n"); + if (dev->sound.ofdev.dev.kobj.state_initialized) { + soundbus_dev_put(&dev->sound); + return 0; + } goto err; } From 00d6f33f6782e72c8753f30dea126317d6f97c8e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 27 Oct 2022 08:52:33 +0200 Subject: [PATCH 0546/1477] ALSA: aoa: Fix I2S device accounting [ Upstream commit f1fae475f10a26b7e34da4ff2e2f19b7feb3548e ] i2sbus_add_dev() is supposed to return the number of probed devices, i.e. either 1 or 0. However, i2sbus_add_dev() has one error handling that returns -ENODEV; this will screw up the accumulation number counted in the caller, i2sbus_probe(). Fix the return value to 0 and add the comment for better understanding for readers. Fixes: f3d9478b2ce4 ("[ALSA] snd-aoa: add snd-aoa") Link: https://lore.kernel.org/r/20221027065233.13292-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/aoa/soundbus/i2sbus/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/aoa/soundbus/i2sbus/core.c b/sound/aoa/soundbus/i2sbus/core.c index f6841daf9e3b..51ed2f34b276 100644 --- a/sound/aoa/soundbus/i2sbus/core.c +++ b/sound/aoa/soundbus/i2sbus/core.c @@ -147,6 +147,7 @@ static int i2sbus_get_and_fixup_rsrc(struct device_node *np, int index, return rc; } +/* Returns 1 if added, 0 for otherwise; don't return a negative value! */ /* FIXME: look at device node refcounting */ static int i2sbus_add_dev(struct macio_dev *macio, struct i2sbus_control *control, @@ -213,7 +214,7 @@ static int i2sbus_add_dev(struct macio_dev *macio, * either as the second one in that case is just a modem. */ if (!ok) { kfree(dev); - return -ENODEV; + return 0; } mutex_init(&dev->lock); From 79631daa5a5166c6aa4e25f447e8c08ee030637a Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Tue, 25 Oct 2022 06:50:17 -0400 Subject: [PATCH 0547/1477] openvswitch: switch from WARN to pr_warn [ Upstream commit fd954cc1919e35cb92f78671cab6e42d661945a3 ] As noted by Paolo Abeni, pr_warn doesn't generate any splat and can still preserve the warning to the user that feature downgrade occurred. We likely cannot introduce other kinds of checks / enforcement here because syzbot can generate different genl versions to the datapath. Reported-by: syzbot+31cde0bef4bbf8ba2d86@syzkaller.appspotmail.com Fixes: 44da5ae5fbea ("openvswitch: Drop user features if old user space attempted to create datapath") Cc: Thomas Graf Signed-off-by: Aaron Conole Acked-by: Ilya Maximets Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/openvswitch/datapath.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 6b5c0abf7f1b..7ed97dc0b561 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1592,7 +1592,8 @@ static void ovs_dp_reset_user_features(struct sk_buff *skb, if (IS_ERR(dp)) return; - WARN(dp->user_features, "Dropping previously announced user features\n"); + pr_warn("%s: Dropping previously announced user features\n", + ovs_dp_name(dp)); dp->user_features = 0; } From 3519b5ddac2109892c93a6ada7a3ec82e40d2273 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 25 Oct 2022 21:00:11 +0800 Subject: [PATCH 0548/1477] net: ehea: fix possible memory leak in ehea_register_port() [ Upstream commit 0e7ce23a917a9cc83ca3c779fbba836bca3bcf1e ] If of_device_register() returns error, the of node and the name allocated in dev_set_name() is leaked, call put_device() to give up the reference that was set in device_initialize(), so that of node is put in logical_port_release() and the name is freed in kobject_cleanup(). Fixes: 1acf2318dd13 ("ehea: dynamic add / remove port") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221025130011.1071357-1-yangyingliang@huawei.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/ibm/ehea/ehea_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c index f63066736425..28a5f8d73a61 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea_main.c +++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c @@ -2897,6 +2897,7 @@ static struct device *ehea_register_port(struct ehea_port *port, ret = of_device_register(&port->ofdev); if (ret) { pr_err("failed to register device. ret=%d\n", ret); + put_device(&port->ofdev.dev); goto out; } From 0d88359092ddc5c2ef51f11a8b8a0b07467f9f3c Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 20 Oct 2022 12:09:52 +0200 Subject: [PATCH 0549/1477] nh: fix scope used to find saddr when adding non gw nh [ Upstream commit bac0f937c343d651874f83b265ca8f5070ed4f06 ] As explained by Julian, fib_nh_scope is related to fib_nh_gw4, but fib_info_update_nhc_saddr() needs the scope of the route, which is the scope "before" fib_nh_scope, ie fib_nh_scope - 1. This patch fixes the problem described in commit 747c14307214 ("ip: fix dflt addr selection for connected nexthop"). Fixes: 597cfe4fc339 ("nexthop: Add support for IPv4 nexthops") Link: https://lore.kernel.org/netdev/6c8a44ba-c2d5-cdf-c5c7-5baf97cba38@ssi.bg/ Signed-off-by: Nicolas Dichtel Reviewed-by: Julian Anastasov Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv4/nexthop.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 2a17dc9413ae..7a0102a4b1de 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -1346,7 +1346,7 @@ static int nh_create_ipv4(struct net *net, struct nexthop *nh, if (!err) { nh->nh_flags = fib_nh->fib_nh_flags; fib_info_update_nhc_saddr(net, &fib_nh->nh_common, - fib_nh->fib_nh_scope); + !fib_nh->fib_nh_scope ? 0 : fib_nh->fib_nh_scope - 1); } else { fib_nh_release(net, fib_nh); } From 16376ba5cfd7ee4a1d250ab42dd8418250b0cd97 Mon Sep 17 00:00:00 2001 From: Hyong Youb Kim Date: Wed, 26 Oct 2022 14:51:39 +0100 Subject: [PATCH 0550/1477] net/mlx5e: Do not increment ESN when updating IPsec ESN state [ Upstream commit 888be6b279b7257b5f6e4c9527675bff0a335596 ] An offloaded SA stops receiving after about 2^32 + replay_window packets. For example, when SA reaches , all subsequent packets get dropped with SA-icv-failure (integrity_failed). To reproduce the bug: - ConnectX-6 Dx with crypto enabled (FW 22.30.1004) - ipsec.conf: nic-offload = yes replay-window = 32 esn = yes salifetime=24h - Run netperf for a long time to send more than 2^32 packets netperf -H -t TCP_STREAM -l 20000 When 2^32 + replay_window packets are received, the replay window moves from the 2nd half of subspace (overlap=1) to the 1st half (overlap=0). The driver then updates the 'esn' value in NIC (i.e. seq_hi) as follows. seq_hi = xfrm_replay_seqhi(seq_bottom) new esn in NIC = seq_hi + 1 The +1 increment is wrong, as seq_hi already contains the correct seq_hi. For example, when seq_hi=1, the driver actually tells NIC to use seq_hi=2 (esn). This incorrect esn value causes all subsequent packets to fail integrity checks (SA-icv-failure). So, do not increment. Fixes: cb01008390bb ("net/mlx5: IPSec, Add support for ESN") Signed-off-by: Hyong Youb Kim Acked-by: Leon Romanovsky Signed-off-by: Saeed Mahameed Link: https://lore.kernel.org/r/20221026135153.154807-2-saeed@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index 26f7fab109d9..d08bd22dc569 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -113,7 +113,6 @@ static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry) struct xfrm_replay_state_esn *replay_esn; u32 seq_bottom = 0; u8 overlap; - u32 *esn; if (!(sa_entry->x->props.flags & XFRM_STATE_ESN)) { sa_entry->esn_state.trigger = 0; @@ -128,11 +127,9 @@ static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry) sa_entry->esn_state.esn = xfrm_replay_seqhi(sa_entry->x, htonl(seq_bottom)); - esn = &sa_entry->esn_state.esn; sa_entry->esn_state.trigger = 1; if (unlikely(overlap && seq_bottom < MLX5E_IPSEC_ESN_SCOPE_MID)) { - ++(*esn); sa_entry->esn_state.overlap = 0; return true; } else if (unlikely(!overlap && From bbcc06933f35651294ea1e963757502312c2171f Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 26 Oct 2022 14:51:45 +0100 Subject: [PATCH 0551/1477] net/mlx5: Fix possible use-after-free in async command interface [ Upstream commit bacd22df95147ed673bec4692ab2d4d585935241 ] mlx5_cmd_cleanup_async_ctx should return only after all its callback handlers were completed. Before this patch, the below race between mlx5_cmd_cleanup_async_ctx and mlx5_cmd_exec_cb_handler was possible and lead to a use-after-free: 1. mlx5_cmd_cleanup_async_ctx is called while num_inflight is 2 (i.e. elevated by 1, a single inflight callback). 2. mlx5_cmd_cleanup_async_ctx decreases num_inflight to 1. 3. mlx5_cmd_exec_cb_handler is called, decreases num_inflight to 0 and is about to call wake_up(). 4. mlx5_cmd_cleanup_async_ctx calls wait_event, which returns immediately as the condition (num_inflight == 0) holds. 5. mlx5_cmd_cleanup_async_ctx returns. 6. The caller of mlx5_cmd_cleanup_async_ctx frees the mlx5_async_ctx object. 7. mlx5_cmd_exec_cb_handler goes on and calls wake_up() on the freed object. Fix it by syncing using a completion object. Mark it completed when num_inflight reaches 0. Trace: BUG: KASAN: use-after-free in do_raw_spin_lock+0x23d/0x270 Read of size 4 at addr ffff888139cd12f4 by task swapper/5/0 CPU: 5 PID: 0 Comm: swapper/5 Not tainted 6.0.0-rc3_for_upstream_debug_2022_08_30_13_10 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl+0x57/0x7d print_report.cold+0x2d5/0x684 ? do_raw_spin_lock+0x23d/0x270 kasan_report+0xb1/0x1a0 ? do_raw_spin_lock+0x23d/0x270 do_raw_spin_lock+0x23d/0x270 ? rwlock_bug.part.0+0x90/0x90 ? __delete_object+0xb8/0x100 ? lock_downgrade+0x6e0/0x6e0 _raw_spin_lock_irqsave+0x43/0x60 ? __wake_up_common_lock+0xb9/0x140 __wake_up_common_lock+0xb9/0x140 ? __wake_up_common+0x650/0x650 ? destroy_tis_callback+0x53/0x70 [mlx5_core] ? kasan_set_track+0x21/0x30 ? destroy_tis_callback+0x53/0x70 [mlx5_core] ? kfree+0x1ba/0x520 ? do_raw_spin_unlock+0x54/0x220 mlx5_cmd_exec_cb_handler+0x136/0x1a0 [mlx5_core] ? mlx5_cmd_cleanup_async_ctx+0x220/0x220 [mlx5_core] ? mlx5_cmd_cleanup_async_ctx+0x220/0x220 [mlx5_core] mlx5_cmd_comp_handler+0x65a/0x12b0 [mlx5_core] ? dump_command+0xcc0/0xcc0 [mlx5_core] ? lockdep_hardirqs_on_prepare+0x400/0x400 ? cmd_comp_notifier+0x7e/0xb0 [mlx5_core] cmd_comp_notifier+0x7e/0xb0 [mlx5_core] atomic_notifier_call_chain+0xd7/0x1d0 mlx5_eq_async_int+0x3ce/0xa20 [mlx5_core] atomic_notifier_call_chain+0xd7/0x1d0 ? irq_release+0x140/0x140 [mlx5_core] irq_int_handler+0x19/0x30 [mlx5_core] __handle_irq_event_percpu+0x1f2/0x620 handle_irq_event+0xb2/0x1d0 handle_edge_irq+0x21e/0xb00 __common_interrupt+0x79/0x1a0 common_interrupt+0x78/0xa0 asm_common_interrupt+0x22/0x40 RIP: 0010:default_idle+0x42/0x60 Code: c1 83 e0 07 48 c1 e9 03 83 c0 03 0f b6 14 11 38 d0 7c 04 84 d2 75 14 8b 05 eb 47 22 02 85 c0 7e 07 0f 00 2d e0 9f 48 00 fb f4 48 c7 c7 80 08 7f 85 e8 d1 d3 3e fe eb de 66 66 2e 0f 1f 84 00 RSP: 0018:ffff888100dbfdf0 EFLAGS: 00000242 RAX: 0000000000000001 RBX: ffffffff84ecbd48 RCX: 1ffffffff0afe110 RDX: 0000000000000004 RSI: 0000000000000000 RDI: ffffffff835cc9bc RBP: 0000000000000005 R08: 0000000000000001 R09: ffff88881dec4ac3 R10: ffffed1103bd8958 R11: 0000017d0ca571c9 R12: 0000000000000005 R13: ffffffff84f024e0 R14: 0000000000000000 R15: dffffc0000000000 ? default_idle_call+0xcc/0x450 default_idle_call+0xec/0x450 do_idle+0x394/0x450 ? arch_cpu_idle_exit+0x40/0x40 ? do_idle+0x17/0x450 cpu_startup_entry+0x19/0x20 start_secondary+0x221/0x2b0 ? set_cpu_sibling_map+0x2070/0x2070 secondary_startup_64_no_verify+0xcd/0xdb Allocated by task 49502: kasan_save_stack+0x1e/0x40 __kasan_kmalloc+0x81/0xa0 kvmalloc_node+0x48/0xe0 mlx5e_bulk_async_init+0x35/0x110 [mlx5_core] mlx5e_tls_priv_tx_list_cleanup+0x84/0x3e0 [mlx5_core] mlx5e_ktls_cleanup_tx+0x38f/0x760 [mlx5_core] mlx5e_cleanup_nic_tx+0xa7/0x100 [mlx5_core] mlx5e_detach_netdev+0x1ca/0x2b0 [mlx5_core] mlx5e_suspend+0xdb/0x140 [mlx5_core] mlx5e_remove+0x89/0x190 [mlx5_core] auxiliary_bus_remove+0x52/0x70 device_release_driver_internal+0x40f/0x650 driver_detach+0xc1/0x180 bus_remove_driver+0x125/0x2f0 auxiliary_driver_unregister+0x16/0x50 mlx5e_cleanup+0x26/0x30 [mlx5_core] cleanup+0xc/0x4e [mlx5_core] __x64_sys_delete_module+0x2b5/0x450 do_syscall_64+0x3d/0x90 entry_SYSCALL_64_after_hwframe+0x46/0xb0 Freed by task 49502: kasan_save_stack+0x1e/0x40 kasan_set_track+0x21/0x30 kasan_set_free_info+0x20/0x30 ____kasan_slab_free+0x11d/0x1b0 kfree+0x1ba/0x520 mlx5e_tls_priv_tx_list_cleanup+0x2e7/0x3e0 [mlx5_core] mlx5e_ktls_cleanup_tx+0x38f/0x760 [mlx5_core] mlx5e_cleanup_nic_tx+0xa7/0x100 [mlx5_core] mlx5e_detach_netdev+0x1ca/0x2b0 [mlx5_core] mlx5e_suspend+0xdb/0x140 [mlx5_core] mlx5e_remove+0x89/0x190 [mlx5_core] auxiliary_bus_remove+0x52/0x70 device_release_driver_internal+0x40f/0x650 driver_detach+0xc1/0x180 bus_remove_driver+0x125/0x2f0 auxiliary_driver_unregister+0x16/0x50 mlx5e_cleanup+0x26/0x30 [mlx5_core] cleanup+0xc/0x4e [mlx5_core] __x64_sys_delete_module+0x2b5/0x450 do_syscall_64+0x3d/0x90 entry_SYSCALL_64_after_hwframe+0x46/0xb0 Fixes: e355477ed9e4 ("net/mlx5: Make mlx5_cmd_exec_cb() a safe API") Signed-off-by: Tariq Toukan Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed Link: https://lore.kernel.org/r/20221026135153.154807-8-saeed@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 10 +++++----- include/linux/mlx5/driver.h | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 94426d29025e..6612b2c0be48 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -1853,7 +1853,7 @@ void mlx5_cmd_init_async_ctx(struct mlx5_core_dev *dev, ctx->dev = dev; /* Starts at 1 to avoid doing wake_up if we are not cleaning up */ atomic_set(&ctx->num_inflight, 1); - init_waitqueue_head(&ctx->wait); + init_completion(&ctx->inflight_done); } EXPORT_SYMBOL(mlx5_cmd_init_async_ctx); @@ -1867,8 +1867,8 @@ EXPORT_SYMBOL(mlx5_cmd_init_async_ctx); */ void mlx5_cmd_cleanup_async_ctx(struct mlx5_async_ctx *ctx) { - atomic_dec(&ctx->num_inflight); - wait_event(ctx->wait, atomic_read(&ctx->num_inflight) == 0); + if (!atomic_dec_and_test(&ctx->num_inflight)) + wait_for_completion(&ctx->inflight_done); } EXPORT_SYMBOL(mlx5_cmd_cleanup_async_ctx); @@ -1879,7 +1879,7 @@ static void mlx5_cmd_exec_cb_handler(int status, void *_work) work->user_callback(status, work); if (atomic_dec_and_test(&ctx->num_inflight)) - wake_up(&ctx->wait); + complete(&ctx->inflight_done); } int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size, @@ -1895,7 +1895,7 @@ int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size, ret = cmd_exec(ctx->dev, in, in_size, out, out_size, mlx5_cmd_exec_cb_handler, work, false); if (ret && atomic_dec_and_test(&ctx->num_inflight)) - wake_up(&ctx->wait); + complete(&ctx->inflight_done); return ret; } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 41fbb4793394..ae88362216a4 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -899,7 +899,7 @@ void mlx5_cmd_allowed_opcode(struct mlx5_core_dev *dev, u16 opcode); struct mlx5_async_ctx { struct mlx5_core_dev *dev; atomic_t num_inflight; - struct wait_queue_head wait; + struct completion inflight_done; }; struct mlx5_async_work; From fdba224ab02873a2998acf9d11ae5ac9a1e35717 Mon Sep 17 00:00:00 2001 From: Suresh Devarakonda Date: Wed, 26 Oct 2022 14:51:49 +0100 Subject: [PATCH 0552/1477] net/mlx5: Fix crash during sync firmware reset [ Upstream commit aefb62a9988749703435e941704624949a80a2a9 ] When setting Bluefield to DPU NIC mode using mlxconfig tool + sync firmware reset flow, we run into scenario where the host was not eswitch manager at the time of mlx5 driver load but becomes eswitch manager after the sync firmware reset flow. This results in null pointer access of mpfs structure during mac filter add. This change prevents null pointer access but mpfs table entries will not be added. Fixes: 5ec697446f46 ("net/mlx5: Add support for devlink reload action fw activate") Signed-off-by: Suresh Devarakonda Reviewed-by: Moshe Shemesh Reviewed-by: Bodong Wang Signed-off-by: Saeed Mahameed Link: https://lore.kernel.org/r/20221026135153.154807-12-saeed@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c index 839a01da110f..8ff16318e32d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c @@ -122,7 +122,7 @@ void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev) { struct mlx5_mpfs *mpfs = dev->priv.mpfs; - if (!MLX5_ESWITCH_MANAGER(dev)) + if (!mpfs) return; WARN_ON(!hlist_empty(mpfs->hash)); @@ -137,7 +137,7 @@ int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac) int err = 0; u32 index; - if (!MLX5_ESWITCH_MANAGER(dev)) + if (!mpfs) return 0; mutex_lock(&mpfs->lock); @@ -185,7 +185,7 @@ int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac) int err = 0; u32 index; - if (!MLX5_ESWITCH_MANAGER(dev)) + if (!mpfs) return 0; mutex_lock(&mpfs->lock); From 9889ca7efa128916b52538110cf1bbb62055855a Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 27 Oct 2022 21:29:25 +0300 Subject: [PATCH 0553/1477] net: enetc: survive memory pressure without crashing [ Upstream commit 84ce1ca3fe9e1249bf21176ff162200f1c4e5ed1 ] Under memory pressure, enetc_refill_rx_ring() may fail, and when called during the enetc_open() -> enetc_setup_rxbdr() procedure, this is not checked for. An extreme case of memory pressure will result in exactly zero buffers being allocated for the RX ring, and in such a case it is expected that hardware drops all RX packets due to lack of buffers. This does not happen, because the reset-default value of the consumer and produces index is 0, and this makes the ENETC think that all buffers have been initialized and that it owns them (when in reality none were). The hardware guide explains this best: | Configure the receive ring producer index register RBaPIR with a value | of 0. The producer index is initially configured by software but owned | by hardware after the ring has been enabled. Hardware increments the | index when a frame is received which may consume one or more BDs. | Hardware is not allowed to increment the producer index to match the | consumer index since it is used to indicate an empty condition. The ring | can hold at most RBLENR[LENGTH]-1 received BDs. | | Configure the receive ring consumer index register RBaCIR. The | consumer index is owned by software and updated during operation of the | of the BD ring by software, to indicate that any receive data occupied | in the BD has been processed and it has been prepared for new data. | - If consumer index and producer index are initialized to the same | value, it indicates that all BDs in the ring have been prepared and | hardware owns all of the entries. | - If consumer index is initialized to producer index plus N, it would | indicate N BDs have been prepared. Note that hardware cannot start if | only a single buffer is prepared due to the restrictions described in | (2). | - Software may write consumer index to match producer index anytime | while the ring is operational to indicate all received BDs prior have | been processed and new BDs prepared for hardware. Normally, the value of rx_ring->rcir (consumer index) is brought in sync with the rx_ring->next_to_use software index, but this only happens if page allocation ever succeeded. When PI==CI==0, the hardware appears to receive frames and write them to DMA address 0x0 (?!), then set the READY bit in the BD. The enetc_clean_rx_ring() function (and its XDP derivative) is naturally not prepared to handle such a condition. It will attempt to process those frames using the rx_swbd structure associated with index i of the RX ring, but that structure is not fully initialized (enetc_new_page() does all of that). So what happens next is undefined behavior. To operate using no buffer, we must initialize the CI to PI + 1, which will block the hardware from advancing the CI any further, and drop everything. The issue was seen while adding support for zero-copy AF_XDP sockets, where buffer memory comes from user space, which can even decide to supply no buffers at all (example: "xdpsock --txonly"). However, the bug is present also with the network stack code, even though it would take a very determined person to trigger a page allocation failure at the perfect time (a series of ifup/ifdown under memory pressure should eventually reproduce it given enough retries). Fixes: d4fd0404c1c9 ("enetc: Introduce basic PF and VF ENETC ethernet drivers") Signed-off-by: Vladimir Oltean Reviewed-by: Claudiu Manoil Link: https://lore.kernel.org/r/20221027182925.3256653-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/freescale/enetc/enetc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 4af253825957..ca62c72eb772 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -1241,7 +1241,12 @@ static void enetc_setup_rxbdr(struct enetc_hw *hw, struct enetc_bdr *rx_ring) enetc_rxbdr_wr(hw, idx, ENETC_RBBSR, ENETC_RXB_DMA_SIZE); + /* Also prepare the consumer index in case page allocation never + * succeeds. In that case, hardware will never advance producer index + * to match consumer index, and will drop all frames. + */ enetc_rxbdr_wr(hw, idx, ENETC_RBPIR, 0); + enetc_rxbdr_wr(hw, idx, ENETC_RBCIR, 1); /* enable Rx ints by setting pkt thr to 1 */ enetc_rxbdr_wr(hw, idx, ENETC_RBICR0, ENETC_RBICR0_ICEN | 0x1); From 52a43b82006dc88f996bd06da5a3fcfef85220c8 Mon Sep 17 00:00:00 2001 From: D Scott Phillips Date: Mon, 10 Oct 2022 19:21:40 -0700 Subject: [PATCH 0554/1477] arm64: Add AMPERE1 to the Spectre-BHB affected list [ Upstream commit 0e5d5ae837c8ce04d2ddb874ec5f920118bd9d31 ] Per AmpereOne erratum AC03_CPU_12, "Branch history may allow control of speculative execution across software contexts," the AMPERE1 core needs the bhb clearing loop to mitigate Spectre-BHB, with a loop iteration count of 11. Signed-off-by: D Scott Phillips Link: https://lore.kernel.org/r/20221011022140.432370-1-scott@os.amperecomputing.com Reviewed-by: James Morse Signed-off-by: Catalin Marinas Signed-off-by: Sasha Levin --- arch/arm64/include/asm/cputype.h | 4 ++++ arch/arm64/kernel/proton-pack.c | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 39f5c1672f48..457b6bb276bb 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -60,6 +60,7 @@ #define ARM_CPU_IMP_FUJITSU 0x46 #define ARM_CPU_IMP_HISI 0x48 #define ARM_CPU_IMP_APPLE 0x61 +#define ARM_CPU_IMP_AMPERE 0xC0 #define ARM_CPU_PART_AEM_V8 0xD0F #define ARM_CPU_PART_FOUNDATION 0xD00 @@ -112,6 +113,8 @@ #define APPLE_CPU_PART_M1_ICESTORM 0x022 #define APPLE_CPU_PART_M1_FIRESTORM 0x023 +#define AMPERE_CPU_PART_AMPERE1 0xAC3 + #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) #define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72) @@ -151,6 +154,7 @@ #define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110) #define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM) #define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM) +#define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1) /* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */ #define MIDR_FUJITSU_ERRATUM_010001 MIDR_FUJITSU_A64FX diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index 6ae53d8cd576..faa8a6bf2376 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -876,6 +876,10 @@ u8 spectre_bhb_loop_affected(int scope) MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1), {}, }; + static const struct midr_range spectre_bhb_k11_list[] = { + MIDR_ALL_VERSIONS(MIDR_AMPERE1), + {}, + }; static const struct midr_range spectre_bhb_k8_list[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), @@ -886,6 +890,8 @@ u8 spectre_bhb_loop_affected(int scope) k = 32; else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k24_list)) k = 24; + else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k11_list)) + k = 11; else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k8_list)) k = 8; From d523384766fd5492ab77f49b5e646fa756e5ab4f Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Tue, 1 Nov 2022 09:31:24 +0800 Subject: [PATCH 0555/1477] scsi: sd: Revert "scsi: sd: Remove a local variable" This reverts commit 84f7a9de0602704bbec774a6c7f7c8c4994bee9c. Because it introduces a problem that rq->__data_len is set to the wrong value. before the patch: 1) nr_bytes = rq->__data_len 2) rq->__data_len = sdp->sector_size 3) scsi_init_io() 4) rq->__data_len = nr_bytes after the patch: 1) rq->__data_len = sdp->sector_size 2) scsi_init_io() 3) rq->__data_len = rq->__data_len -> __data_len is wrong It will cause that io can only complete one segment each time, and the io will requeue in scsi_io_completion_action(), which will cause severe performance degradation. Scsi write same is removed in commit e383e16e84e9 ("scsi: sd: Remove WRITE_SAME support") from mainline, hence this patch is only needed for stable kernels. Fixes: 84f7a9de0602 ("scsi: sd: Remove a local variable") Signed-off-by: Yu Kuai Reviewed-by: Bart Van Assche Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index bd068d3bb455..58f66176bcb2 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1074,6 +1074,7 @@ static blk_status_t sd_setup_write_same_cmnd(struct scsi_cmnd *cmd) struct bio *bio = rq->bio; u64 lba = sectors_to_logical(sdp, blk_rq_pos(rq)); u32 nr_blocks = sectors_to_logical(sdp, blk_rq_sectors(rq)); + unsigned int nr_bytes = blk_rq_bytes(rq); blk_status_t ret; if (sdkp->device->no_write_same) @@ -1110,7 +1111,7 @@ static blk_status_t sd_setup_write_same_cmnd(struct scsi_cmnd *cmd) */ rq->__data_len = sdp->sector_size; ret = scsi_alloc_sgtables(cmd); - rq->__data_len = blk_rq_bytes(rq); + rq->__data_len = nr_bytes; return ret; } From c911f03f8d444e623724fddd82b07a7e1af42338 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 10 Mar 2021 11:23:10 +0530 Subject: [PATCH 0556/1477] arm64/mm: Fix __enable_mmu() for new TGRAN range values commit 26f55386f964cefa92ab7ccbed68f1a313074215 upstream. As per ARM ARM DDI 0487G.a, when FEAT_LPA2 is implemented, ID_AA64MMFR0_EL1 might contain a range of values to describe supported translation granules (4K and 16K pages sizes in particular) instead of just enabled or disabled values. This changes __enable_mmu() function to handle complete acceptable range of values (depending on whether the field is signed or unsigned) now represented with ID_AA64MMFR0_TGRAN_SUPPORTED_[MIN..MAX] pair. While here, also fix similar situations in EFI stub and KVM as well. Cc: Catalin Marinas Cc: Will Deacon Cc: Marc Zyngier Cc: James Morse Cc: Suzuki K Poulose Cc: Ard Biesheuvel Cc: Mark Rutland Cc: linux-arm-kernel@lists.infradead.org Cc: kvmarm@lists.cs.columbia.edu Cc: linux-efi@vger.kernel.org Cc: linux-kernel@vger.kernel.org Acked-by: Marc Zyngier Signed-off-by: James Morse Signed-off-by: Anshuman Khandual Link: https://lore.kernel.org/r/1615355590-21102-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Will Deacon Signed-off-by: Zenghui Yu Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/sysreg.h | 20 ++++++++++++++------ arch/arm64/kernel/head.S | 6 ++++-- arch/arm64/kvm/reset.c | 10 ++++++---- drivers/firmware/efi/libstub/arm64-stub.c | 2 +- 4 files changed, 25 insertions(+), 13 deletions(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 1f2209ad2cca..763a745af6a8 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -795,6 +795,11 @@ #define ID_AA64MMFR0_PARANGE_48 0x5 #define ID_AA64MMFR0_PARANGE_52 0x6 +#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_DEFAULT 0x0 +#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_NONE 0x1 +#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_MIN 0x2 +#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_MAX 0x7 + #ifdef CONFIG_ARM64_PA_BITS_52 #define ID_AA64MMFR0_PARANGE_MAX ID_AA64MMFR0_PARANGE_52 #else @@ -955,14 +960,17 @@ #define ID_PFR1_PROGMOD_SHIFT 0 #if defined(CONFIG_ARM64_4K_PAGES) -#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN4_SHIFT -#define ID_AA64MMFR0_TGRAN_SUPPORTED ID_AA64MMFR0_TGRAN4_SUPPORTED +#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN4_SHIFT +#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN4_SUPPORTED +#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX 0x7 #elif defined(CONFIG_ARM64_16K_PAGES) -#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN16_SHIFT -#define ID_AA64MMFR0_TGRAN_SUPPORTED ID_AA64MMFR0_TGRAN16_SUPPORTED +#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN16_SHIFT +#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN16_SUPPORTED +#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX 0xF #elif defined(CONFIG_ARM64_64K_PAGES) -#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN64_SHIFT -#define ID_AA64MMFR0_TGRAN_SUPPORTED ID_AA64MMFR0_TGRAN64_SUPPORTED +#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN64_SHIFT +#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN64_SUPPORTED +#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX 0x7 #endif #define MVFR2_FPMISC_SHIFT 4 diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index f9119eea735e..e1c25fa3b8e6 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -797,8 +797,10 @@ SYM_FUNC_END(__secondary_too_slow) SYM_FUNC_START(__enable_mmu) mrs x2, ID_AA64MMFR0_EL1 ubfx x2, x2, #ID_AA64MMFR0_TGRAN_SHIFT, 4 - cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED - b.ne __no_granule_support + cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED_MIN + b.lt __no_granule_support + cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED_MAX + b.gt __no_granule_support update_early_cpu_boot_status 0, x2, x3 adrp x2, idmap_pg_dir phys_to_ttbr x1, x1 diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 204c62debf06..6f85c1821c3f 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -397,16 +397,18 @@ int kvm_set_ipa_limit(void) } switch (cpuid_feature_extract_unsigned_field(mmfr0, tgran_2)) { - default: - case 1: + case ID_AA64MMFR0_TGRAN_2_SUPPORTED_NONE: kvm_err("PAGE_SIZE not supported at Stage-2, giving up\n"); return -EINVAL; - case 0: + case ID_AA64MMFR0_TGRAN_2_SUPPORTED_DEFAULT: kvm_debug("PAGE_SIZE supported at Stage-2 (default)\n"); break; - case 2: + case ID_AA64MMFR0_TGRAN_2_SUPPORTED_MIN ... ID_AA64MMFR0_TGRAN_2_SUPPORTED_MAX: kvm_debug("PAGE_SIZE supported at Stage-2 (advertised)\n"); break; + default: + kvm_err("Unsupported value for TGRAN_2, giving up\n"); + return -EINVAL; } kvm_ipa_limit = id_aa64mmfr0_parange_to_phys_shift(parange); diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index 415a971e7694..7f4bafcd9d33 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -24,7 +24,7 @@ efi_status_t check_platform_features(void) return EFI_SUCCESS; tg = (read_cpuid(ID_AA64MMFR0_EL1) >> ID_AA64MMFR0_TGRAN_SHIFT) & 0xf; - if (tg != ID_AA64MMFR0_TGRAN_SUPPORTED) { + if (tg < ID_AA64MMFR0_TGRAN_SUPPORTED_MIN || tg > ID_AA64MMFR0_TGRAN_SUPPORTED_MAX) { if (IS_ENABLED(CONFIG_ARM64_64K_PAGES)) efi_err("This 64 KB granular kernel is not supported by your CPU\n"); else From d5924531dd8ad012ad13eb4d6a5e120c3dadfc05 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 14 Jul 2021 10:16:15 +0530 Subject: [PATCH 0557/1477] arm64/kexec: Test page size support with new TGRAN range values commit 79d82cbcbb3d2a56c009ad6a6df92c5dee061dad upstream. The commit 26f55386f964 ("arm64/mm: Fix __enable_mmu() for new TGRAN range values") had already switched into testing ID_AA64MMFR0_TGRAN range values. This just changes system_supports_[4|16|64]kb_granule() helpers to perform similar range tests as well. While here, it standardizes page size specific supported min and max TGRAN values. Cc: Will Deacon Cc: James Morse Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Link: https://lore.kernel.org/r/1626237975-1909-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Catalin Marinas Signed-off-by: Zenghui Yu Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/cpufeature.h | 9 ++++++--- arch/arm64/include/asm/sysreg.h | 28 ++++++++++++++++------------ 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 423f9b40e4d9..31ba0ac7db63 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -648,7 +648,8 @@ static inline bool system_supports_4kb_granule(void) val = cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_TGRAN4_SHIFT); - return val == ID_AA64MMFR0_TGRAN4_SUPPORTED; + return (val >= ID_AA64MMFR0_TGRAN4_SUPPORTED_MIN) && + (val <= ID_AA64MMFR0_TGRAN4_SUPPORTED_MAX); } static inline bool system_supports_64kb_granule(void) @@ -660,7 +661,8 @@ static inline bool system_supports_64kb_granule(void) val = cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_TGRAN64_SHIFT); - return val == ID_AA64MMFR0_TGRAN64_SUPPORTED; + return (val >= ID_AA64MMFR0_TGRAN64_SUPPORTED_MIN) && + (val <= ID_AA64MMFR0_TGRAN64_SUPPORTED_MAX); } static inline bool system_supports_16kb_granule(void) @@ -672,7 +674,8 @@ static inline bool system_supports_16kb_granule(void) val = cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_TGRAN16_SHIFT); - return val == ID_AA64MMFR0_TGRAN16_SUPPORTED; + return (val >= ID_AA64MMFR0_TGRAN16_SUPPORTED_MIN) && + (val <= ID_AA64MMFR0_TGRAN16_SUPPORTED_MAX); } static inline bool system_supports_mixed_endian_el0(void) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 763a745af6a8..06755fad3830 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -786,12 +786,16 @@ #define ID_AA64MMFR0_ASID_SHIFT 4 #define ID_AA64MMFR0_PARANGE_SHIFT 0 -#define ID_AA64MMFR0_TGRAN4_NI 0xf -#define ID_AA64MMFR0_TGRAN4_SUPPORTED 0x0 -#define ID_AA64MMFR0_TGRAN64_NI 0xf -#define ID_AA64MMFR0_TGRAN64_SUPPORTED 0x0 -#define ID_AA64MMFR0_TGRAN16_NI 0x0 -#define ID_AA64MMFR0_TGRAN16_SUPPORTED 0x1 +#define ID_AA64MMFR0_TGRAN4_NI 0xf +#define ID_AA64MMFR0_TGRAN4_SUPPORTED_MIN 0x0 +#define ID_AA64MMFR0_TGRAN4_SUPPORTED_MAX 0x7 +#define ID_AA64MMFR0_TGRAN64_NI 0xf +#define ID_AA64MMFR0_TGRAN64_SUPPORTED_MIN 0x0 +#define ID_AA64MMFR0_TGRAN64_SUPPORTED_MAX 0x7 +#define ID_AA64MMFR0_TGRAN16_NI 0x0 +#define ID_AA64MMFR0_TGRAN16_SUPPORTED_MIN 0x1 +#define ID_AA64MMFR0_TGRAN16_SUPPORTED_MAX 0xf + #define ID_AA64MMFR0_PARANGE_48 0x5 #define ID_AA64MMFR0_PARANGE_52 0x6 @@ -961,16 +965,16 @@ #if defined(CONFIG_ARM64_4K_PAGES) #define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN4_SHIFT -#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN4_SUPPORTED -#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX 0x7 +#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN4_SUPPORTED_MIN +#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_TGRAN4_SUPPORTED_MAX #elif defined(CONFIG_ARM64_16K_PAGES) #define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN16_SHIFT -#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN16_SUPPORTED -#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX 0xF +#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN16_SUPPORTED_MIN +#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_TGRAN16_SUPPORTED_MAX #elif defined(CONFIG_ARM64_64K_PAGES) #define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN64_SHIFT -#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN64_SUPPORTED -#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX 0x7 +#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN64_SUPPORTED_MIN +#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_TGRAN64_SUPPORTED_MAX #endif #define MVFR2_FPMISC_SHIFT 4 From eb69c07eca22ffd8621d9de9378e4b3ce7965190 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Tue, 25 Oct 2022 16:56:55 +0100 Subject: [PATCH 0558/1477] can: rcar_canfd: rcar_canfd_handle_global_receive(): fix IRQ storm on global FIFO receive commit 702de2c21eed04c67cefaaedc248ef16e5f6b293 upstream. We are seeing an IRQ storm on the global receive IRQ line under heavy CAN bus load conditions with both CAN channels enabled. Conditions: The global receive IRQ line is shared between can0 and can1, either of the channels can trigger interrupt while the other channel's IRQ line is disabled (RFIE). When global a receive IRQ interrupt occurs, we mask the interrupt in the IRQ handler. Clearing and unmasking of the interrupt is happening in rx_poll(). There is a race condition where rx_poll() unmasks the interrupt, but the next IRQ handler does not mask the IRQ due to NAPIF_STATE_MISSED flag (e.g.: can0 RX FIFO interrupt is disabled and can1 is triggering RX interrupt, the delay in rx_poll() processing results in setting NAPIF_STATE_MISSED flag) leading to an IRQ storm. This patch fixes the issue by checking IRQ active and enabled before handling the IRQ on a particular channel. Fixes: dd3bd23eb438 ("can: rcar_canfd: Add Renesas R-Car CAN FD driver") Suggested-by: Marc Kleine-Budde Signed-off-by: Biju Das Link: https://lore.kernel.org/all/20221025155657.1426948-2-biju.das.jz@bp.renesas.com Cc: stable@vger.kernel.org [mkl: adjust commit message] Signed-off-by: Marc Kleine-Budde [biju: removed gpriv from RCANFD_RFCC_RFIE macro] Signed-off-by: Biju Das Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/rcar/rcar_canfd.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/rcar/rcar_canfd.c b/drivers/net/can/rcar/rcar_canfd.c index 67f0f14e2bf4..c61534a2a2d3 100644 --- a/drivers/net/can/rcar/rcar_canfd.c +++ b/drivers/net/can/rcar/rcar_canfd.c @@ -1075,7 +1075,7 @@ static irqreturn_t rcar_canfd_global_interrupt(int irq, void *dev_id) struct rcar_canfd_global *gpriv = dev_id; struct net_device *ndev; struct rcar_canfd_channel *priv; - u32 sts, gerfl; + u32 sts, cc, gerfl; u32 ch, ridx; /* Global error interrupts still indicate a condition specific @@ -1093,7 +1093,9 @@ static irqreturn_t rcar_canfd_global_interrupt(int irq, void *dev_id) /* Handle Rx interrupts */ sts = rcar_canfd_read(priv->base, RCANFD_RFSTS(ridx)); - if (likely(sts & RCANFD_RFSTS_RFIF)) { + cc = rcar_canfd_read(priv->base, RCANFD_RFCC(ridx)); + if (likely(sts & RCANFD_RFSTS_RFIF && + cc & RCANFD_RFCC_RFIE)) { if (napi_schedule_prep(&priv->napi)) { /* Disable Rx FIFO interrupts */ rcar_canfd_clear_bit(priv->base, From 4a230f65d6a80c6658860c899f14f1d0bd0ca65b Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Sun, 10 Apr 2022 12:46:34 +0200 Subject: [PATCH 0559/1477] serial: core: move RS485 configuration tasks from drivers into core commit 0ed12afa5655512ee418047fb3546d229df20aa1 upstream. Several drivers that support setting the RS485 configuration via userspace implement one or more of the following tasks: - in case of an invalid RTS configuration (both RTS after send and RTS on send set or both unset) fall back to enable RTS on send and disable RTS after send - nullify the padding field of the returned serial_rs485 struct - copy the configuration into the uart port struct - limit RTS delays to 100 ms Move these tasks into the serial core to make them generic and to provide a consistent behaviour among all drivers. Signed-off-by: Lino Sanfilippo Link: https://lore.kernel.org/r/20220410104642.32195-2-LinoSanfilippo@gmx.de Signed-off-by: Daisuke Mizobuchi Signed-off-by: Dominique Martinet Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_core.c | 33 ++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index b578f7090b63..6cc909d44a81 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -42,6 +42,11 @@ static struct lock_class_key port_lock_key; #define HIGH_BITS_OFFSET ((sizeof(long)-sizeof(int))*8) +/* + * Max time with active RTS before/after data is sent. + */ +#define RS485_MAX_RTS_DELAY 100 /* msecs */ + static void uart_change_speed(struct tty_struct *tty, struct uart_state *state, struct ktermios *old_termios); static void uart_wait_until_sent(struct tty_struct *tty, int timeout); @@ -1326,8 +1331,36 @@ static int uart_set_rs485_config(struct uart_port *port, if (copy_from_user(&rs485, rs485_user, sizeof(*rs485_user))) return -EFAULT; + /* pick sane settings if the user hasn't */ + if (!(rs485.flags & SER_RS485_RTS_ON_SEND) == + !(rs485.flags & SER_RS485_RTS_AFTER_SEND)) { + dev_warn_ratelimited(port->dev, + "%s (%d): invalid RTS setting, using RTS_ON_SEND instead\n", + port->name, port->line); + rs485.flags |= SER_RS485_RTS_ON_SEND; + rs485.flags &= ~SER_RS485_RTS_AFTER_SEND; + } + + if (rs485.delay_rts_before_send > RS485_MAX_RTS_DELAY) { + rs485.delay_rts_before_send = RS485_MAX_RTS_DELAY; + dev_warn_ratelimited(port->dev, + "%s (%d): RTS delay before sending clamped to %u ms\n", + port->name, port->line, rs485.delay_rts_before_send); + } + + if (rs485.delay_rts_after_send > RS485_MAX_RTS_DELAY) { + rs485.delay_rts_after_send = RS485_MAX_RTS_DELAY; + dev_warn_ratelimited(port->dev, + "%s (%d): RTS delay after sending clamped to %u ms\n", + port->name, port->line, rs485.delay_rts_after_send); + } + /* Return clean padding area to userspace */ + memset(rs485.padding, 0, sizeof(rs485.padding)); + spin_lock_irqsave(&port->lock, flags); ret = port->rs485_config(port, &rs485); + if (!ret) + port->rs485 = rs485; spin_unlock_irqrestore(&port->lock, flags); if (ret) return ret; From 26a2b9c468de495902fb914c050b4e8611764b2a Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Thu, 22 Sep 2022 18:27:33 +0200 Subject: [PATCH 0560/1477] serial: Deassert Transmit Enable on probe in driver-specific way MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7c7f9bc986e698873b489c371a08f206979d06b7 upstream. When a UART port is newly registered, uart_configure_port() seeks to deassert RS485 Transmit Enable by setting the RTS bit in port->mctrl. However a number of UART drivers interpret a set RTS bit as *assertion* instead of deassertion: Affected drivers include those using serial8250_em485_config() (except 8250_bcm2835aux.c) and some using mctrl_gpio (e.g. imx.c). Since the interpretation of the RTS bit is driver-specific, it is not suitable as a means to centrally deassert Transmit Enable in the serial core. Instead, the serial core must call on drivers to deassert it in their driver-specific way. One way to achieve that is to call ->rs485_config(). It implicitly deasserts Transmit Enable. So amend uart_configure_port() and uart_resume_port() to invoke uart_rs485_config(). That allows removing calls to uart_rs485_config() from drivers' ->probe() hooks and declaring the function static. Skip any invocation of ->set_mctrl() if RS485 is enabled. RS485 has no hardware flow control, so the modem control lines are irrelevant and need not be touched. When leaving RS485 mode, reset the modem control lines to the state stored in port->mctrl. That way, UARTs which are muxed between RS485 and RS232 transceivers drive the lines correctly when switched to RS232. (serial8250_do_startup() historically raises the OUT1 modem signal because otherwise interrupts are not signaled on ancient PC UARTs, but I believe that no longer applies to modern, RS485-capable UARTs and is thus safe to be skipped.) imx.c modifies port->mctrl whenever Transmit Enable is asserted and deasserted. Stop it from doing that so port->mctrl reflects the RS232 line state. 8250_omap.c deasserts Transmit Enable on ->runtime_resume() by calling ->set_mctrl(). Because that is now a no-op in RS485 mode, amend the function to call serial8250_em485_stop_tx(). fsl_lpuart.c retrieves and applies the RS485 device tree properties after registering the UART port. Because applying now happens on registration in uart_configure_port(), move retrieval of the properties ahead of uart_add_one_port(). Link: https://lore.kernel.org/all/20220329085050.311408-1-matthias.schiffer@ew.tq-group.com/ Link: https://lore.kernel.org/all/8f538a8903795f22f9acc94a9a31b03c9c4ccacb.camel@ginzinger.com/ Fixes: d3b3404df318 ("serial: Fix incorrect rs485 polarity on uart open") Cc: stable@vger.kernel.org # v4.14+ Reported-by: Matthias Schiffer Reported-by: Roosen Henri Tested-by: Matthias Schiffer Reviewed-by: Ilpo Järvinen Signed-off-by: Lukas Wunner Link: https://lore.kernel.org/r/2de36eba3fbe11278d5002e4e501afe0ceaca039.1663863805.git.lukas@wunner.de Signed-off-by: Daisuke Mizobuchi Signed-off-by: Dominique Martinet Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_omap.c | 3 +++ drivers/tty/serial/8250/8250_pci.c | 9 +-------- drivers/tty/serial/8250/8250_port.c | 12 +++++++----- drivers/tty/serial/fsl_lpuart.c | 8 +++----- drivers/tty/serial/imx.c | 8 ++------ drivers/tty/serial/serial_core.c | 30 ++++++++++++++++------------- 6 files changed, 33 insertions(+), 37 deletions(-) diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index 537bee8d2258..f3744ac805ec 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -342,6 +342,9 @@ static void omap8250_restore_regs(struct uart_8250_port *up) omap8250_update_mdr1(up, priv); up->port.ops->set_mctrl(&up->port, up->port.mctrl); + + if (up->port.rs485.flags & SER_RS485_ENABLED) + serial8250_em485_stop_tx(up); } /* diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index df10cc606582..b6656898699d 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -1531,7 +1531,6 @@ static int pci_fintek_init(struct pci_dev *dev) resource_size_t bar_data[3]; u8 config_base; struct serial_private *priv = pci_get_drvdata(dev); - struct uart_8250_port *port; if (!(pci_resource_flags(dev, 5) & IORESOURCE_IO) || !(pci_resource_flags(dev, 4) & IORESOURCE_IO) || @@ -1578,13 +1577,7 @@ static int pci_fintek_init(struct pci_dev *dev) pci_write_config_byte(dev, config_base + 0x06, dev->irq); - if (priv) { - /* re-apply RS232/485 mode when - * pciserial_resume_ports() - */ - port = serial8250_get_port(priv->line[i]); - pci_fintek_rs485_config(&port->port, NULL); - } else { + if (!priv) { /* First init without port data * force init to RS232 Mode */ diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 71d143c00248..8b3756e4bb05 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -592,7 +592,7 @@ EXPORT_SYMBOL_GPL(serial8250_rpm_put); static int serial8250_em485_init(struct uart_8250_port *p) { if (p->em485) - return 0; + goto deassert_rts; p->em485 = kmalloc(sizeof(struct uart_8250_em485), GFP_ATOMIC); if (!p->em485) @@ -608,7 +608,9 @@ static int serial8250_em485_init(struct uart_8250_port *p) p->em485->active_timer = NULL; p->em485->tx_stopped = true; - p->rs485_stop_tx(p); +deassert_rts: + if (p->em485->tx_stopped) + p->rs485_stop_tx(p); return 0; } @@ -2030,6 +2032,9 @@ EXPORT_SYMBOL_GPL(serial8250_do_set_mctrl); static void serial8250_set_mctrl(struct uart_port *port, unsigned int mctrl) { + if (port->rs485.flags & SER_RS485_ENABLED) + return; + if (port->set_mctrl) port->set_mctrl(port, mctrl); else @@ -3161,9 +3166,6 @@ static void serial8250_config_port(struct uart_port *port, int flags) if (flags & UART_CONFIG_TYPE) autoconfig(up); - if (port->rs485.flags & SER_RS485_ENABLED) - port->rs485_config(port, &port->rs485); - /* if access method is AU, it is a 16550 with a quirk */ if (port->type == PORT_16550A && port->iotype == UPIO_AU) up->bugs |= UART_BUG_NOMSR; diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index 269d1e3a025d..43aca5a2ef0f 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -2669,10 +2669,6 @@ static int lpuart_probe(struct platform_device *pdev) if (ret) goto failed_irq_request; - ret = uart_add_one_port(&lpuart_reg, &sport->port); - if (ret) - goto failed_attach_port; - ret = uart_get_rs485_mode(&sport->port); if (ret) goto failed_get_rs485; @@ -2684,7 +2680,9 @@ static int lpuart_probe(struct platform_device *pdev) sport->port.rs485.delay_rts_after_send) dev_err(&pdev->dev, "driver doesn't support RTS delays\n"); - sport->port.rs485_config(&sport->port, &sport->port.rs485); + ret = uart_add_one_port(&lpuart_reg, &sport->port); + if (ret) + goto failed_attach_port; return 0; diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index bfbca711bbf9..cf3d53165776 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -398,8 +398,7 @@ static void imx_uart_rts_active(struct imx_port *sport, u32 *ucr2) { *ucr2 &= ~(UCR2_CTSC | UCR2_CTS); - sport->port.mctrl |= TIOCM_RTS; - mctrl_gpio_set(sport->gpios, sport->port.mctrl); + mctrl_gpio_set(sport->gpios, sport->port.mctrl | TIOCM_RTS); } /* called with port.lock taken and irqs caller dependent */ @@ -408,8 +407,7 @@ static void imx_uart_rts_inactive(struct imx_port *sport, u32 *ucr2) *ucr2 &= ~UCR2_CTSC; *ucr2 |= UCR2_CTS; - sport->port.mctrl &= ~TIOCM_RTS; - mctrl_gpio_set(sport->gpios, sport->port.mctrl); + mctrl_gpio_set(sport->gpios, sport->port.mctrl & ~TIOCM_RTS); } static void start_hrtimer_ms(struct hrtimer *hrt, unsigned long msec) @@ -2381,8 +2379,6 @@ static int imx_uart_probe(struct platform_device *pdev) dev_err(&pdev->dev, "low-active RTS not possible when receiver is off, enabling receiver\n"); - imx_uart_rs485_config(&sport->port, &sport->port.rs485); - /* Disable interrupts before requesting them */ ucr1 = imx_uart_readl(sport, UCR1); ucr1 &= ~(UCR1_ADEN | UCR1_TRDYEN | UCR1_IDEN | UCR1_RRDYEN | UCR1_RTSDEN); diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 6cc909d44a81..605f928f0636 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -149,15 +149,10 @@ uart_update_mctrl(struct uart_port *port, unsigned int set, unsigned int clear) unsigned long flags; unsigned int old; - if (port->rs485.flags & SER_RS485_ENABLED) { - set &= ~TIOCM_RTS; - clear &= ~TIOCM_RTS; - } - spin_lock_irqsave(&port->lock, flags); old = port->mctrl; port->mctrl = (old & ~clear) | set; - if (old != port->mctrl) + if (old != port->mctrl && !(port->rs485.flags & SER_RS485_ENABLED)) port->ops->set_mctrl(port, port->mctrl); spin_unlock_irqrestore(&port->lock, flags); } @@ -1359,8 +1354,13 @@ static int uart_set_rs485_config(struct uart_port *port, spin_lock_irqsave(&port->lock, flags); ret = port->rs485_config(port, &rs485); - if (!ret) + if (!ret) { port->rs485 = rs485; + + /* Reset RTS and other mctrl lines when disabling RS485 */ + if (!(rs485.flags & SER_RS485_ENABLED)) + port->ops->set_mctrl(port, port->mctrl); + } spin_unlock_irqrestore(&port->lock, flags); if (ret) return ret; @@ -2335,7 +2335,8 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *uport) uart_change_pm(state, UART_PM_STATE_ON); spin_lock_irq(&uport->lock); - ops->set_mctrl(uport, 0); + if (!(uport->rs485.flags & SER_RS485_ENABLED)) + ops->set_mctrl(uport, 0); spin_unlock_irq(&uport->lock); if (console_suspend_enabled || !uart_console(uport)) { /* Protected by port mutex for now */ @@ -2346,7 +2347,10 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *uport) if (tty) uart_change_speed(tty, state, NULL); spin_lock_irq(&uport->lock); - ops->set_mctrl(uport, uport->mctrl); + if (!(uport->rs485.flags & SER_RS485_ENABLED)) + ops->set_mctrl(uport, uport->mctrl); + else + uport->rs485_config(uport, &uport->rs485); ops->start_tx(uport); spin_unlock_irq(&uport->lock); tty_port_set_initialized(port, 1); @@ -2444,10 +2448,10 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state, */ spin_lock_irqsave(&port->lock, flags); port->mctrl &= TIOCM_DTR; - if (port->rs485.flags & SER_RS485_ENABLED && - !(port->rs485.flags & SER_RS485_RTS_AFTER_SEND)) - port->mctrl |= TIOCM_RTS; - port->ops->set_mctrl(port, port->mctrl); + if (!(port->rs485.flags & SER_RS485_ENABLED)) + port->ops->set_mctrl(port, port->mctrl); + else + port->rs485_config(port, &port->rs485); spin_unlock_irqrestore(&port->lock, flags); /* From 95aa34f72132ee42ee3f632a5540c84a5ee8624f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 3 Nov 2022 23:57:54 +0900 Subject: [PATCH 0561/1477] Linux 5.10.153 Link: https://lore.kernel.org/r/20221102022055.039689234@linuxfoundation.org Tested-by: Jon Hunter Tested-by: Hulk Robot Tested-by: Rudi Heitbaum Tested-by: Pavel Machek (CIP) Tested-by: Florian Fainelli Tested-by: Guenter Roeck Tested-by: Allen Pais Tested-by: Linux Kernel Functional Testing Tested-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a0750d051982..d1cd7539105d 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 10 -SUBLEVEL = 152 +SUBLEVEL = 153 EXTRAVERSION = NAME = Dare mighty things From e6da7808c955e609b3adf9d18b02f94463616016 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Wed, 21 Sep 2022 00:35:32 +0100 Subject: [PATCH 0562/1477] serial: 8250: Let drivers request full 16550A feature probing [ Upstream commit 9906890c89e4dbd900ed87ad3040080339a7f411 ] A SERIAL_8250_16550A_VARIANTS configuration option has been recently defined that lets one request the 8250 driver not to probe for 16550A device features so as to reduce the driver's device startup time in virtual machines. Some actual hardware devices require these features to have been fully determined however for their driver to work correctly, so define a flag to let drivers request full 16550A feature probing on a device-by-device basis if required regardless of the SERIAL_8250_16550A_VARIANTS option setting chosen. Fixes: dc56ecb81a0a ("serial: 8250: Support disabling mdelay-filled probes of 16550A variants") Cc: stable@vger.kernel.org # v5.6+ Reported-by: Anders Blomdell Signed-off-by: Maciej W. Rozycki Link: https://lore.kernel.org/r/alpine.DEB.2.21.2209202357520.41633@angie.orcam.me.uk Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/8250/8250_port.c | 3 ++- include/linux/serial_core.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 8b3756e4bb05..f648fd1d7548 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -1023,7 +1023,8 @@ static void autoconfig_16550a(struct uart_8250_port *up) up->port.type = PORT_16550A; up->capabilities |= UART_CAP_FIFO; - if (!IS_ENABLED(CONFIG_SERIAL_8250_16550A_VARIANTS)) + if (!IS_ENABLED(CONFIG_SERIAL_8250_16550A_VARIANTS) && + !(up->port.flags & UPF_FULL_PROBE)) return; /* diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 59a8caf3230a..6df4c3356ae6 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -100,7 +100,7 @@ struct uart_icount { __u32 buf_overrun; }; -typedef unsigned int __bitwise upf_t; +typedef u64 __bitwise upf_t; typedef unsigned int __bitwise upstat_t; struct uart_port { @@ -207,6 +207,7 @@ struct uart_port { #define UPF_FIXED_PORT ((__force upf_t) (1 << 29)) #define UPF_DEAD ((__force upf_t) (1 << 30)) #define UPF_IOREMAP ((__force upf_t) (1 << 31)) +#define UPF_FULL_PROBE ((__force upf_t) (1ULL << 32)) #define __UPF_CHANGE_MASK 0x17fff #define UPF_CHANGE_MASK ((__force upf_t) __UPF_CHANGE_MASK) From 028fcabd8a67196277f907c8ecce8e0bb99b276a Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sat, 5 Nov 2022 09:46:40 -0400 Subject: [PATCH 0563/1477] serial: ar933x: Deassert Transmit Enable on ->rs485_config() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3a939433ddc1bab98be028903aaa286e5e7461d7 upstream. The ar933x_uart driver neglects to deassert Transmit Enable when ->rs485_config() is invoked. Fix it. Fixes: 9be1064fe524 ("serial: ar933x_uart: add RS485 support") Cc: stable@vger.kernel.org # v5.7+ Cc: Daniel Golle Reviewed-by: Ilpo JÀrvinen Signed-off-by: Lukas Wunner Signed-off-by: Sasha Levin --- drivers/tty/serial/ar933x_uart.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/tty/serial/ar933x_uart.c b/drivers/tty/serial/ar933x_uart.c index c2be7cf91399..fcbaff894193 100644 --- a/drivers/tty/serial/ar933x_uart.c +++ b/drivers/tty/serial/ar933x_uart.c @@ -593,6 +593,11 @@ static int ar933x_config_rs485(struct uart_port *port, dev_err(port->dev, "RS485 needs rts-gpio\n"); return 1; } + + if (rs485conf->flags & SER_RS485_ENABLED) + gpiod_set_value(up->rts_gpiod, + !!(rs485conf->flags & SER_RS485_RTS_AFTER_SEND)); + port->rs485 = *rs485conf; return 0; } From 523e1dd9f8d4d8cf064488473ef518c8dedcfeec Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 10 Aug 2021 10:19:50 -0700 Subject: [PATCH 0564/1477] KVM: nVMX: Pull KVM L0's desired controls directly from vmcs01 [ Upstream commit 389ab25216c9d09e0d335e764eeeb84c2089614f ] When preparing controls for vmcs02, grab KVM's desired controls from vmcs01's shadow state instead of recalculating the controls from scratch, or in the secondary execution controls, instead of using the dedicated cache. Calculating secondary exec controls is eye-poppingly expensive due to the guest CPUID checks, hence the dedicated cache, but the other calculations aren't exactly free either. Explicitly clear several bits (x2APIC, DESC exiting, and load EFER on exit) as appropriate as they may be set in vmcs01, whereas the previous implementation relied on dynamic bits being cleared in the calculator. Intentionally propagate VM_{ENTRY,EXIT}_LOAD_IA32_PERF_GLOBAL_CTRL from vmcs01 to vmcs02. Whether or not PERF_GLOBAL_CTRL is loaded depends on whether or not perf itself is active, so unless perf stops between the exit from L1 and entry to L2, vmcs01 will hold the desired value. This is purely an optimization as atomic_switch_perf_msrs() will set/clear the control as needed at VM-Enter, i.e. it avoids two extra VMWRITEs in the case where perf is active (versus starting with the bits clear in vmcs02, which was the previous behavior). Cc: Zeng Guang Signed-off-by: Sean Christopherson Message-Id: <20210810171952.2758100-3-seanjc@google.com> Signed-off-by: Paolo Bonzini Stable-dep-of: def9d705c05e ("KVM: nVMX: Don't propagate vmcs12's PERF_GLOBAL_CTRL settings to vmcs02") Signed-off-by: Sasha Levin --- arch/x86/kvm/vmx/nested.c | 25 ++++++++++++++++--------- arch/x86/kvm/vmx/vmx.h | 6 +++++- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 7f15e2b2a0d6..2395387945a8 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2232,7 +2232,8 @@ static void prepare_vmcs02_early_rare(struct vcpu_vmx *vmx, } } -static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) +static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs01, + struct vmcs12 *vmcs12) { u32 exec_control, vmcs12_exec_ctrl; u64 guest_efer = nested_vmx_calc_efer(vmx, vmcs12); @@ -2243,7 +2244,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) /* * PIN CONTROLS */ - exec_control = vmx_pin_based_exec_ctrl(vmx); + exec_control = __pin_controls_get(vmcs01); exec_control |= (vmcs12->pin_based_vm_exec_control & ~PIN_BASED_VMX_PREEMPTION_TIMER); @@ -2258,7 +2259,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) /* * EXEC CONTROLS */ - exec_control = vmx_exec_control(vmx); /* L0's desires */ + exec_control = __exec_controls_get(vmcs01); /* L0's desires */ exec_control &= ~CPU_BASED_INTR_WINDOW_EXITING; exec_control &= ~CPU_BASED_NMI_WINDOW_EXITING; exec_control &= ~CPU_BASED_TPR_SHADOW; @@ -2295,17 +2296,20 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) * SECONDARY EXEC CONTROLS */ if (cpu_has_secondary_exec_ctrls()) { - exec_control = vmx->secondary_exec_control; + exec_control = __secondary_exec_controls_get(vmcs01); /* Take the following fields only from vmcs12 */ exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | SECONDARY_EXEC_ENABLE_INVPCID | SECONDARY_EXEC_ENABLE_RDTSCP | SECONDARY_EXEC_XSAVES | SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | SECONDARY_EXEC_APIC_REGISTER_VIRT | - SECONDARY_EXEC_ENABLE_VMFUNC); + SECONDARY_EXEC_ENABLE_VMFUNC | + SECONDARY_EXEC_DESC); + if (nested_cpu_has(vmcs12, CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) { vmcs12_exec_ctrl = vmcs12->secondary_vm_exec_control & @@ -2342,8 +2346,9 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) * on the related bits (if supported by the CPU) in the hope that * we can avoid VMWrites during vmx_set_efer(). */ - exec_control = (vmcs12->vm_entry_controls | vmx_vmentry_ctrl()) & - ~VM_ENTRY_IA32E_MODE & ~VM_ENTRY_LOAD_IA32_EFER; + exec_control = __vm_entry_controls_get(vmcs01); + exec_control |= vmcs12->vm_entry_controls; + exec_control &= ~(VM_ENTRY_IA32E_MODE | VM_ENTRY_LOAD_IA32_EFER); if (cpu_has_load_ia32_efer()) { if (guest_efer & EFER_LMA) exec_control |= VM_ENTRY_IA32E_MODE; @@ -2359,9 +2364,11 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER * bits may be modified by vmx_set_efer() in prepare_vmcs02(). */ - exec_control = vmx_vmexit_ctrl(); + exec_control = __vm_exit_controls_get(vmcs01); if (cpu_has_load_ia32_efer() && guest_efer != host_efer) exec_control |= VM_EXIT_LOAD_IA32_EFER; + else + exec_control &= ~VM_EXIT_LOAD_IA32_EFER; vm_exit_controls_set(vmx, exec_control); /* @@ -3370,7 +3377,7 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02); - prepare_vmcs02_early(vmx, vmcs12); + prepare_vmcs02_early(vmx, &vmx->vmcs01, vmcs12); if (from_vmentry) { if (unlikely(!nested_get_vmcs12_pages(vcpu))) { diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 24903f05c204..ed4b6da83aa8 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -386,9 +386,13 @@ static inline void lname##_controls_set(struct vcpu_vmx *vmx, u32 val) \ vmx->loaded_vmcs->controls_shadow.lname = val; \ } \ } \ +static inline u32 __##lname##_controls_get(struct loaded_vmcs *vmcs) \ +{ \ + return vmcs->controls_shadow.lname; \ +} \ static inline u32 lname##_controls_get(struct vcpu_vmx *vmx) \ { \ - return vmx->loaded_vmcs->controls_shadow.lname; \ + return __##lname##_controls_get(vmx->loaded_vmcs); \ } \ static inline void lname##_controls_setbit(struct vcpu_vmx *vmx, u32 val) \ { \ From 8364786152d5abf887162612c23f932d8cda164f Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 30 Aug 2022 15:37:21 +0200 Subject: [PATCH 0565/1477] KVM: nVMX: Don't propagate vmcs12's PERF_GLOBAL_CTRL settings to vmcs02 [ Upstream commit def9d705c05eab3fdedeb10ad67907513b12038e ] Don't propagate vmcs12's VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL to vmcs02. KVM doesn't disallow L1 from using VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL even when KVM itself doesn't use the control, e.g. due to the various CPU errata that where the MSR can be corrupted on VM-Exit. Preserve KVM's (vmcs01) setting to hopefully avoid having to toggle the bit in vmcs02 at a later point. E.g. if KVM is loading PERF_GLOBAL_CTRL when running L1, then odds are good KVM will also load the MSR when running L2. Fixes: 8bf00a529967 ("KVM: VMX: add support for switching of PERF_GLOBAL_CTRL") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Link: https://lore.kernel.org/r/20220830133737.1539624-18-vkuznets@redhat.com Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- arch/x86/kvm/vmx/nested.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 2395387945a8..498fed0dda98 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2345,9 +2345,14 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0 * are emulated by vmx_set_efer() in prepare_vmcs02(), but speculate * on the related bits (if supported by the CPU) in the hope that * we can avoid VMWrites during vmx_set_efer(). + * + * Similarly, take vmcs01's PERF_GLOBAL_CTRL in the hope that if KVM is + * loading PERF_GLOBAL_CTRL via the VMCS for L1, then KVM will want to + * do the same for L2. */ exec_control = __vm_entry_controls_get(vmcs01); - exec_control |= vmcs12->vm_entry_controls; + exec_control |= (vmcs12->vm_entry_controls & + ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL); exec_control &= ~(VM_ENTRY_IA32E_MODE | VM_ENTRY_LOAD_IA32_EFER); if (cpu_has_load_ia32_efer()) { if (guest_efer & EFER_LMA) From e5d7c6786befe4aca0ba645dc5b836fc60ea733c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 2 May 2022 00:07:31 +0200 Subject: [PATCH 0566/1477] KVM: x86: Trace re-injected exceptions [ Upstream commit a61d7c5432ac5a953bbcec17af031661c2bd201d ] Trace exceptions that are re-injected, not just those that KVM is injecting for the first time. Debugging re-injection bugs is painful enough as is, not having visibility into what KVM is doing only makes things worse. Delay propagating pending=>injected in the non-reinjection path so that the tracing can properly identify reinjected exceptions. Signed-off-by: Sean Christopherson Reviewed-by: Maxim Levitsky Signed-off-by: Maciej S. Szmigiero Message-Id: <25470690a38b4d2b32b6204875dd35676c65c9f2.1651440202.git.maciej.szmigiero@oracle.com> Signed-off-by: Paolo Bonzini Stable-dep-of: 5623f751bd9c ("KVM: x86: Treat #DBs from the emulator as fault-like (code and DR7.GD=1)") Signed-off-by: Sasha Levin --- arch/x86/kvm/trace.h | 12 ++++++++---- arch/x86/kvm/x86.c | 16 +++++++++------- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index a2835d784f4b..3d4988ea8b57 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -304,25 +304,29 @@ TRACE_EVENT(kvm_inj_virq, * Tracepoint for kvm interrupt injection: */ TRACE_EVENT(kvm_inj_exception, - TP_PROTO(unsigned exception, bool has_error, unsigned error_code), - TP_ARGS(exception, has_error, error_code), + TP_PROTO(unsigned exception, bool has_error, unsigned error_code, + bool reinjected), + TP_ARGS(exception, has_error, error_code, reinjected), TP_STRUCT__entry( __field( u8, exception ) __field( u8, has_error ) __field( u32, error_code ) + __field( bool, reinjected ) ), TP_fast_assign( __entry->exception = exception; __entry->has_error = has_error; __entry->error_code = error_code; + __entry->reinjected = reinjected; ), - TP_printk("%s (0x%x)", + TP_printk("%s (0x%x)%s", __print_symbolic(__entry->exception, kvm_trace_sym_exc), /* FIXME: don't print error_code if not present */ - __entry->has_error ? __entry->error_code : 0) + __entry->has_error ? __entry->error_code : 0, + __entry->reinjected ? " [reinjected]" : "") ); /* diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f3473418dcd5..17bb3d0e2d13 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8347,6 +8347,11 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu) static void kvm_inject_exception(struct kvm_vcpu *vcpu) { + trace_kvm_inj_exception(vcpu->arch.exception.nr, + vcpu->arch.exception.has_error_code, + vcpu->arch.exception.error_code, + vcpu->arch.exception.injected); + if (vcpu->arch.exception.error_code && !is_protmode(vcpu)) vcpu->arch.exception.error_code = false; kvm_x86_ops.queue_exception(vcpu); @@ -8404,13 +8409,6 @@ static void inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit /* try to inject new event if pending */ if (vcpu->arch.exception.pending) { - trace_kvm_inj_exception(vcpu->arch.exception.nr, - vcpu->arch.exception.has_error_code, - vcpu->arch.exception.error_code); - - vcpu->arch.exception.pending = false; - vcpu->arch.exception.injected = true; - if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT) __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) | X86_EFLAGS_RF); @@ -8424,6 +8422,10 @@ static void inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit } kvm_inject_exception(vcpu); + + vcpu->arch.exception.pending = false; + vcpu->arch.exception.injected = true; + can_inject = false; } From 570fa3bcd2f9328efc2123f8c46a50ea6b64b004 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 30 Aug 2022 23:15:55 +0000 Subject: [PATCH 0567/1477] KVM: x86: Treat #DBs from the emulator as fault-like (code and DR7.GD=1) [ Upstream commit 5623f751bd9c438ed12840e086f33c4646440d19 ] Add a dedicated "exception type" for #DBs, as #DBs can be fault-like or trap-like depending the sub-type of #DB, and effectively defer the decision of what to do with the #DB to the caller. For the emulator's two calls to exception_type(), treat the #DB as fault-like, as the emulator handles only code breakpoint and general detect #DBs, both of which are fault-like. For event injection, which uses exception_type() to determine whether to set EFLAGS.RF=1 on the stack, keep the current behavior of not setting RF=1 for #DBs. Intel and AMD explicitly state RF isn't set on code #DBs, so exempting by failing the "== EXCPT_FAULT" check is correct. The only other fault-like #DB is General Detect, and despite Intel and AMD both strongly implying (through omission) that General Detect #DBs should set RF=1, hardware (multiple generations of both Intel and AMD), in fact does not. Through insider knowledge, extreme foresight, sheer dumb luck, or some combination thereof, KVM correctly handled RF for General Detect #DBs. Fixes: 38827dbd3fb8 ("KVM: x86: Do not update EFLAGS on faulting emulation") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Reviewed-by: Maxim Levitsky Link: https://lore.kernel.org/r/20220830231614.3580124-9-seanjc@google.com Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- arch/x86/kvm/x86.c | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 17bb3d0e2d13..e07607eed35c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -459,6 +459,7 @@ static int exception_class(int vector) #define EXCPT_TRAP 1 #define EXCPT_ABORT 2 #define EXCPT_INTERRUPT 3 +#define EXCPT_DB 4 static int exception_type(int vector) { @@ -469,8 +470,14 @@ static int exception_type(int vector) mask = 1 << vector; - /* #DB is trap, as instruction watchpoints are handled elsewhere */ - if (mask & ((1 << DB_VECTOR) | (1 << BP_VECTOR) | (1 << OF_VECTOR))) + /* + * #DBs can be trap-like or fault-like, the caller must check other CPU + * state, e.g. DR6, to determine whether a #DB is a trap or fault. + */ + if (mask & (1 << DB_VECTOR)) + return EXCPT_DB; + + if (mask & ((1 << BP_VECTOR) | (1 << OF_VECTOR))) return EXCPT_TRAP; if (mask & ((1 << DF_VECTOR) | (1 << MC_VECTOR))) @@ -7560,6 +7567,12 @@ restart: unsigned long rflags = kvm_x86_ops.get_rflags(vcpu); toggle_interruptibility(vcpu, ctxt->interruptibility); vcpu->arch.emulate_regs_need_sync_to_vcpu = false; + + /* + * Note, EXCPT_DB is assumed to be fault-like as the emulator + * only supports code breakpoints and general detect #DB, both + * of which are fault-like. + */ if (!ctxt->have_exception || exception_type(ctxt->exception.vector) == EXCPT_TRAP) { kvm_rip_write(vcpu, ctxt->eip); @@ -8409,6 +8422,16 @@ static void inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit /* try to inject new event if pending */ if (vcpu->arch.exception.pending) { + /* + * Fault-class exceptions, except #DBs, set RF=1 in the RFLAGS + * value pushed on the stack. Trap-like exception and all #DBs + * leave RF as-is (KVM follows Intel's behavior in this regard; + * AMD states that code breakpoint #DBs excplitly clear RF=0). + * + * Note, most versions of Intel's SDM and AMD's APM incorrectly + * describe the behavior of General Detect #DBs, which are + * fault-like. They do _not_ set RF, a la code breakpoints. + */ if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT) __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) | X86_EFLAGS_RF); From f5ad52da145a460b11d3a02a74c995fc33cce2f0 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Mon, 9 Nov 2020 21:06:59 +0000 Subject: [PATCH 0568/1477] x86/topology: Set cpu_die_id only if DIE_TYPE found [ Upstream commit cb09a379724d299c603a7a79f444f52a9a75b8d2 ] CPUID Leaf 0x1F defines a DIE_TYPE level (nb: ECX[8:15] level type == 0x5), but CPUID Leaf 0xB does not. However, detect_extended_topology() will set struct cpuinfo_x86.cpu_die_id regardless of whether a valid Die ID was found. Only set cpu_die_id if a DIE_TYPE level is found. CPU topology code may use another value for cpu_die_id, e.g. the AMD NodeId on AMD-based systems. Code ordering should be maintained so that the CPUID Leaf 0x1F Die ID value will take precedence on systems that may use another value. Suggested-by: Borislav Petkov Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20201109210659.754018-5-Yazen.Ghannam@amd.com Stable-dep-of: 2b12a7a126d6 ("x86/topology: Fix multiple packages shown on a single-package system") Signed-off-by: Sasha Levin --- arch/x86/kernel/cpu/topology.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 91288da29599..8678864ce712 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -96,6 +96,7 @@ int detect_extended_topology(struct cpuinfo_x86 *c) unsigned int ht_mask_width, core_plus_mask_width, die_plus_mask_width; unsigned int core_select_mask, core_level_siblings; unsigned int die_select_mask, die_level_siblings; + bool die_level_present = false; int leaf; leaf = detect_extended_topology_leaf(c); @@ -126,6 +127,7 @@ int detect_extended_topology(struct cpuinfo_x86 *c) die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); } if (LEAFB_SUBTYPE(ecx) == DIE_TYPE) { + die_level_present = true; die_level_siblings = LEVEL_MAX_SIBLINGS(ebx); die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); } @@ -139,8 +141,12 @@ int detect_extended_topology(struct cpuinfo_x86 *c) c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, ht_mask_width) & core_select_mask; - c->cpu_die_id = apic->phys_pkg_id(c->initial_apicid, - core_plus_mask_width) & die_select_mask; + + if (die_level_present) { + c->cpu_die_id = apic->phys_pkg_id(c->initial_apicid, + core_plus_mask_width) & die_select_mask; + } + c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, die_plus_mask_width); /* From 6c31fc028a657e8d53cec5127a39a35cce6dba08 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Fri, 14 Oct 2022 17:01:46 +0800 Subject: [PATCH 0569/1477] x86/topology: Fix multiple packages shown on a single-package system [ Upstream commit 2b12a7a126d62bdbd81f4923c21bf6e9a7fbd069 ] CPUID.1F/B does not enumerate Package level explicitly, instead, all the APIC-ID bits above the enumerated levels are assumed to be package ID bits. Current code gets package ID by shifting out all the APIC-ID bits that Linux supports, rather than shifting out all the APIC-ID bits that CPUID.1F enumerates. This introduces problems when CPUID.1F enumerates a level that Linux does not support. For example, on a single package AlderLake-N, there are 2 Ecore Modules with 4 atom cores in each module. Linux does not support the Module level and interprets the Module ID bits as package ID and erroneously reports a multi module system as a multi-package system. Fix this by using APIC-ID bits above all the CPUID.1F enumerated levels as package ID. [ dhansen: spelling fix ] Fixes: 7745f03eb395 ("x86/topology: Add CPUID.1F multi-die/package support") Suggested-by: Len Brown Signed-off-by: Zhang Rui Signed-off-by: Dave Hansen Reviewed-by: Len Brown Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20221014090147.1836-4-rui.zhang@intel.com Signed-off-by: Sasha Levin --- arch/x86/kernel/cpu/topology.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 8678864ce712..696309749d62 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -96,6 +96,7 @@ int detect_extended_topology(struct cpuinfo_x86 *c) unsigned int ht_mask_width, core_plus_mask_width, die_plus_mask_width; unsigned int core_select_mask, core_level_siblings; unsigned int die_select_mask, die_level_siblings; + unsigned int pkg_mask_width; bool die_level_present = false; int leaf; @@ -111,10 +112,10 @@ int detect_extended_topology(struct cpuinfo_x86 *c) core_level_siblings = smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx); core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); die_level_siblings = LEVEL_MAX_SIBLINGS(ebx); - die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); + pkg_mask_width = die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); sub_index = 1; - do { + while (true) { cpuid_count(leaf, sub_index, &eax, &ebx, &ecx, &edx); /* @@ -132,8 +133,13 @@ int detect_extended_topology(struct cpuinfo_x86 *c) die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); } + if (LEAFB_SUBTYPE(ecx) != INVALID_TYPE) + pkg_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); + else + break; + sub_index++; - } while (LEAFB_SUBTYPE(ecx) != INVALID_TYPE); + } core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width; die_select_mask = (~(-1 << die_plus_mask_width)) >> @@ -148,7 +154,7 @@ int detect_extended_topology(struct cpuinfo_x86 *c) } c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, - die_plus_mask_width); + pkg_mask_width); /* * Reinit the apicid, now that we have extended initial_apicid. */ From 5bdbccc79c86424fef1960de76abab9e83cfbdc9 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Fri, 14 Oct 2022 17:01:47 +0800 Subject: [PATCH 0570/1477] x86/topology: Fix duplicated core ID within a package [ Upstream commit 71eac7063698b7d7b8fafb1683ac24a034541141 ] Today, core ID is assumed to be unique within each package. But an AlderLake-N platform adds a Module level between core and package, Linux excludes the unknown modules bits from the core ID, resulting in duplicate core ID's. To keep core ID unique within a package, Linux must include all APIC-ID bits for known or unknown levels above the core and below the package in the core ID. It is important to understand that core ID's have always come directly from the APIC-ID encoding, which comes from the BIOS. Thus there is no guarantee that they start at 0, or that they are contiguous. As such, naively using them for array indexes can be problematic. [ dhansen: un-known -> unknown ] Fixes: 7745f03eb395 ("x86/topology: Add CPUID.1F multi-die/package support") Suggested-by: Len Brown Signed-off-by: Zhang Rui Signed-off-by: Dave Hansen Reviewed-by: Len Brown Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20221014090147.1836-5-rui.zhang@intel.com Signed-off-by: Sasha Levin --- arch/x86/kernel/cpu/topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 696309749d62..37d48ab3d077 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -141,7 +141,7 @@ int detect_extended_topology(struct cpuinfo_x86 *c) sub_index++; } - core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width; + core_select_mask = (~(-1 << pkg_mask_width)) >> ht_mask_width; die_select_mask = (~(-1 << die_plus_mask_width)) >> core_plus_mask_width; From 9faacf442d11c872e7e2a66a6d1844459336dfb1 Mon Sep 17 00:00:00 2001 From: Aaron Lewis Date: Thu, 14 Jul 2022 16:13:15 +0000 Subject: [PATCH 0571/1477] KVM: x86: Protect the unused bits in MSR exiting flags [ Upstream commit cf5029d5dd7cb0aaa53250fa9e389abd231606b3 ] The flags for KVM_CAP_X86_USER_SPACE_MSR and KVM_X86_SET_MSR_FILTER have no protection for their unused bits. Without protection, future development for these features will be difficult. Add the protection needed to make it possible to extend these features in the future. Signed-off-by: Aaron Lewis Message-Id: <20220714161314.1715227-1-aaronlewis@google.com> Signed-off-by: Paolo Bonzini Stable-dep-of: 2e3272bc1790 ("KVM: x86: Copy filter arg outside kvm_vm_ioctl_set_msr_filter()") Signed-off-by: Sasha Levin --- arch/x86/kvm/x86.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e07607eed35c..ed8efd402d05 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5360,6 +5360,11 @@ split_irqchip_unlock: r = 0; break; case KVM_CAP_X86_USER_SPACE_MSR: + r = -EINVAL; + if (cap->args[0] & ~(KVM_MSR_EXIT_REASON_INVAL | + KVM_MSR_EXIT_REASON_UNKNOWN | + KVM_MSR_EXIT_REASON_FILTER)) + break; kvm->arch.user_space_msr_mask = cap->args[0]; r = 0; break; @@ -5454,6 +5459,9 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp) if (copy_from_user(&filter, user_msr_filter, sizeof(filter))) return -EFAULT; + if (filter.flags & ~KVM_MSR_FILTER_DEFAULT_DENY) + return -EINVAL; + for (i = 0; i < ARRAY_SIZE(filter.ranges); i++) empty &= !filter.ranges[i].nmsrs; From bb584caee8957469dfd7549dc7accdce806f23dd Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 17 Oct 2022 20:45:40 +0200 Subject: [PATCH 0572/1477] KVM: x86: Copy filter arg outside kvm_vm_ioctl_set_msr_filter() [ Upstream commit 2e3272bc1790825c43d2c39690bf2836b81c6d36 ] In the next patch we want to introduce a second caller to set_msr_filter() which constructs its own filter list on the stack. Refactor the original function so it takes it as argument instead of reading it through copy_from_user(). Signed-off-by: Alexander Graf Message-Id: <20221017184541.2658-3-graf@amazon.com> Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- arch/x86/kvm/x86.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ed8efd402d05..be4326b143e1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5446,26 +5446,22 @@ err: return r; } -static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp) +static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, + struct kvm_msr_filter *filter) { - struct kvm_msr_filter __user *user_msr_filter = argp; struct kvm_x86_msr_filter *new_filter, *old_filter; - struct kvm_msr_filter filter; bool default_allow; bool empty = true; int r = 0; u32 i; - if (copy_from_user(&filter, user_msr_filter, sizeof(filter))) - return -EFAULT; - - if (filter.flags & ~KVM_MSR_FILTER_DEFAULT_DENY) + if (filter->flags & ~KVM_MSR_FILTER_DEFAULT_DENY) return -EINVAL; - for (i = 0; i < ARRAY_SIZE(filter.ranges); i++) - empty &= !filter.ranges[i].nmsrs; + for (i = 0; i < ARRAY_SIZE(filter->ranges); i++) + empty &= !filter->ranges[i].nmsrs; - default_allow = !(filter.flags & KVM_MSR_FILTER_DEFAULT_DENY); + default_allow = !(filter->flags & KVM_MSR_FILTER_DEFAULT_DENY); if (empty && !default_allow) return -EINVAL; @@ -5473,8 +5469,8 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp) if (!new_filter) return -ENOMEM; - for (i = 0; i < ARRAY_SIZE(filter.ranges); i++) { - r = kvm_add_msr_filter(new_filter, &filter.ranges[i]); + for (i = 0; i < ARRAY_SIZE(filter->ranges); i++) { + r = kvm_add_msr_filter(new_filter, &filter->ranges[i]); if (r) { kvm_free_msr_filter(new_filter); return r; @@ -5803,9 +5799,16 @@ set_pit2_out: case KVM_SET_PMU_EVENT_FILTER: r = kvm_vm_ioctl_set_pmu_event_filter(kvm, argp); break; - case KVM_X86_SET_MSR_FILTER: - r = kvm_vm_ioctl_set_msr_filter(kvm, argp); + case KVM_X86_SET_MSR_FILTER: { + struct kvm_msr_filter __user *user_msr_filter = argp; + struct kvm_msr_filter filter; + + if (copy_from_user(&filter, user_msr_filter, sizeof(filter))) + return -EFAULT; + + r = kvm_vm_ioctl_set_msr_filter(kvm, &filter); break; + } default: r = -ENOTTY; } From 4dbb739eb29c99c8b59bbb7bbaa71ec1b438f37c Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 17 Oct 2022 20:45:41 +0200 Subject: [PATCH 0573/1477] KVM: x86: Add compat handler for KVM_X86_SET_MSR_FILTER [ Upstream commit 1739c7017fb1d759965dcbab925ff5980a5318cb ] The KVM_X86_SET_MSR_FILTER ioctls contains a pointer in the passed in struct which means it has a different struct size depending on whether it gets called from 32bit or 64bit code. This patch introduces compat code that converts from the 32bit struct to its 64bit counterpart which then gets used going forward internally. With this applied, 32bit QEMU can successfully set MSR bitmaps when running on 64bit kernels. Reported-by: Andrew Randrianasulu Fixes: 1a155254ff937 ("KVM: x86: Introduce MSR filtering") Signed-off-by: Alexander Graf Message-Id: <20221017184541.2658-4-graf@amazon.com> Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- arch/x86/kvm/x86.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index be4326b143e1..0ac80b3ff0f5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5493,6 +5493,62 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, return 0; } +#ifdef CONFIG_KVM_COMPAT +/* for KVM_X86_SET_MSR_FILTER */ +struct kvm_msr_filter_range_compat { + __u32 flags; + __u32 nmsrs; + __u32 base; + __u32 bitmap; +}; + +struct kvm_msr_filter_compat { + __u32 flags; + struct kvm_msr_filter_range_compat ranges[KVM_MSR_FILTER_MAX_RANGES]; +}; + +#define KVM_X86_SET_MSR_FILTER_COMPAT _IOW(KVMIO, 0xc6, struct kvm_msr_filter_compat) + +long kvm_arch_vm_compat_ioctl(struct file *filp, unsigned int ioctl, + unsigned long arg) +{ + void __user *argp = (void __user *)arg; + struct kvm *kvm = filp->private_data; + long r = -ENOTTY; + + switch (ioctl) { + case KVM_X86_SET_MSR_FILTER_COMPAT: { + struct kvm_msr_filter __user *user_msr_filter = argp; + struct kvm_msr_filter_compat filter_compat; + struct kvm_msr_filter filter; + int i; + + if (copy_from_user(&filter_compat, user_msr_filter, + sizeof(filter_compat))) + return -EFAULT; + + filter.flags = filter_compat.flags; + for (i = 0; i < ARRAY_SIZE(filter.ranges); i++) { + struct kvm_msr_filter_range_compat *cr; + + cr = &filter_compat.ranges[i]; + filter.ranges[i] = (struct kvm_msr_filter_range) { + .flags = cr->flags, + .nmsrs = cr->nmsrs, + .base = cr->base, + .bitmap = (__u8 *)(ulong)cr->bitmap, + }; + } + + r = kvm_vm_ioctl_set_msr_filter(kvm, &filter); + break; + } + } + + return r; +} +#endif + long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { From 87ac93c8dd6ddbbfa2c2239a42520c8410d2fcca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20Bugge?= Date: Wed, 12 Oct 2022 16:15:42 +0200 Subject: [PATCH 0574/1477] RDMA/cma: Use output interface for net_dev check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit eb83f502adb036cd56c27e13b9ca3b2aabfa790b ] Commit 27cfde795a96 ("RDMA/cma: Fix arguments order in net device validation") swapped the src and dst addresses in the call to validate_net_dev(). As a consequence, the test in validate_ipv4_net_dev() to see if the net_dev is the right one, is incorrect for port 1 <-> 2 communication when the ports are on the same sub-net. This is fixed by denoting the flowi4_oif as the device instead of the incoming one. The bug has not been observed using IPv6 addresses. Fixes: 27cfde795a96 ("RDMA/cma: Fix arguments order in net device validation") Signed-off-by: Håkon Bugge Link: https://lore.kernel.org/r/20221012141542.16925-1-haakon.bugge@oracle.com Reviewed-by: Leon Romanovsky Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/core/cma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index b5fa19a033c0..9ed5de38e372 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1437,7 +1437,7 @@ static bool validate_ipv4_net_dev(struct net_device *net_dev, return false; memset(&fl4, 0, sizeof(fl4)); - fl4.flowi4_iif = net_dev->ifindex; + fl4.flowi4_oif = net_dev->ifindex; fl4.daddr = daddr; fl4.saddr = saddr; From 7c4260f8f188df32414a5ecad63e8b934c2aa3f0 Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Tue, 18 Oct 2022 10:27:50 -0400 Subject: [PATCH 0575/1477] IB/hfi1: Correctly move list in sc_disable() [ Upstream commit 1afac08b39d85437187bb2a92d89a741b1078f55 ] Commit 13bac861952a ("IB/hfi1: Fix abba locking issue with sc_disable()") incorrectly tries to move a list from one list head to another. The result is a kernel crash. The crash is triggered when a link goes down and there are waiters for a send to complete. The following signature is seen: BUG: kernel NULL pointer dereference, address: 0000000000000030 [...] Call Trace: sc_disable+0x1ba/0x240 [hfi1] pio_freeze+0x3d/0x60 [hfi1] handle_freeze+0x27/0x1b0 [hfi1] process_one_work+0x1b0/0x380 ? process_one_work+0x380/0x380 worker_thread+0x30/0x360 ? process_one_work+0x380/0x380 kthread+0xd7/0x100 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x1f/0x30 The fix is to use the correct call to move the list. Fixes: 13bac861952a ("IB/hfi1: Fix abba locking issue with sc_disable()") Signed-off-by: Dean Luick Signed-off-by: Dennis Dalessandro Link: https://lore.kernel.org/r/166610327042.674422.6146908799669288976.stgit@awfm-02.cornelisnetworks.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hfi1/pio.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 1cd8f80f097a..60eb3a64518f 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -955,8 +955,7 @@ void sc_disable(struct send_context *sc) spin_unlock(&sc->release_lock); write_seqlock(&sc->waitlock); - if (!list_empty(&sc->piowait)) - list_move(&sc->piowait, &wake_list); + list_splice_init(&sc->piowait, &wake_list); write_sequnlock(&sc->waitlock); while (!list_empty(&wake_list)) { struct iowait *wait; From 4813dd737dc4fa85e72761c95fc228cafad88c30 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 16 Oct 2022 14:44:31 -0400 Subject: [PATCH 0576/1477] NFSv4: Fix a potential state reclaim deadlock [ Upstream commit 1ba04394e028ea8b45d92685cc0d6ab582cf7647 ] If the server reboots while we are engaged in a delegation return, and there is a pNFS layout with return-on-close set, then the current code can end up deadlocking in pnfs_roc() when nfs_inode_set_delegation() tries to return the old delegation. Now that delegreturn actually uses its own copy of the stateid, it should be safe to just always update the delegation stateid in place. Fixes: 078000d02d57 ("pNFS: We want return-on-close to complete when evicting the inode") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker Signed-off-by: Sasha Levin --- fs/nfs/delegation.c | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index d6ac2c4f88b6..1eb6c7a142ff 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -228,8 +228,7 @@ again: * */ void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred, - fmode_t type, - const nfs4_stateid *stateid, + fmode_t type, const nfs4_stateid *stateid, unsigned long pagemod_limit) { struct nfs_delegation *delegation; @@ -239,25 +238,24 @@ void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred, delegation = rcu_dereference(NFS_I(inode)->delegation); if (delegation != NULL) { spin_lock(&delegation->lock); - if (nfs4_is_valid_delegation(delegation, 0)) { - nfs4_stateid_copy(&delegation->stateid, stateid); - delegation->type = type; - delegation->pagemod_limit = pagemod_limit; - oldcred = delegation->cred; - delegation->cred = get_cred(cred); - clear_bit(NFS_DELEGATION_NEED_RECLAIM, - &delegation->flags); - spin_unlock(&delegation->lock); - rcu_read_unlock(); - put_cred(oldcred); - trace_nfs4_reclaim_delegation(inode, type); - return; - } - /* We appear to have raced with a delegation return. */ + nfs4_stateid_copy(&delegation->stateid, stateid); + delegation->type = type; + delegation->pagemod_limit = pagemod_limit; + oldcred = delegation->cred; + delegation->cred = get_cred(cred); + clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags); + if (test_and_clear_bit(NFS_DELEGATION_REVOKED, + &delegation->flags)) + atomic_long_inc(&nfs_active_delegations); spin_unlock(&delegation->lock); + rcu_read_unlock(); + put_cred(oldcred); + trace_nfs4_reclaim_delegation(inode, type); + } else { + rcu_read_unlock(); + nfs_inode_set_delegation(inode, cred, type, stateid, + pagemod_limit); } - rcu_read_unlock(); - nfs_inode_set_delegation(inode, cred, type, stateid, pagemod_limit); } static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync) From 10c554d7227580f905ec675c32bcfa2132ed087f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 16 Oct 2022 14:44:32 -0400 Subject: [PATCH 0577/1477] NFSv4.1: Handle RECLAIM_COMPLETE trunking errors [ Upstream commit 5d917cba3201e5c25059df96c29252fd99c4f6a7 ] If RECLAIM_COMPLETE sets the NFS4CLNT_BIND_CONN_TO_SESSION flag, then we need to loop back in order to handle it. Fixes: 0048fdd06614 ("NFSv4.1: RECLAIM_COMPLETE must handle NFS4ERR_CONN_NOT_BOUND_TO_SESSION") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker Signed-off-by: Sasha Levin --- fs/nfs/nfs4state.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index a8fe8f84c5ae..cd9e84ab3dd7 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -2642,6 +2642,7 @@ static void nfs4_state_manager(struct nfs_client *clp) if (status < 0) goto out_error; nfs4_state_end_reclaim_reboot(clp); + continue; } /* Detect expired delegations... */ From f0f1c74fa670d37c042514c15c83b84c1c9fc672 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 16 Oct 2022 14:44:33 -0400 Subject: [PATCH 0578/1477] NFSv4.1: We must always send RECLAIM_COMPLETE after a reboot [ Upstream commit e59679f2b7e522ecad99974e5636291ffd47c184 ] Currently, we are only guaranteed to send RECLAIM_COMPLETE if we have open state to recover. Fix the client to always send RECLAIM_COMPLETE after setting up the lease. Fixes: fce5c838e133 ("nfs41: RECLAIM_COMPLETE functionality") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker Signed-off-by: Sasha Levin --- fs/nfs/nfs4state.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index cd9e84ab3dd7..a77a3d8c0b3f 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1777,6 +1777,7 @@ static void nfs4_state_mark_reclaim_helper(struct nfs_client *clp, static void nfs4_state_start_reclaim_reboot(struct nfs_client *clp) { + set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state); /* Mark all delegations for reclaim */ nfs_delegation_mark_reclaim(clp); nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_reboot); From 45aea4fbf61e205649c29200726b9f45c1718a67 Mon Sep 17 00:00:00 2001 From: Zhang Xiaoxu Date: Thu, 20 Oct 2022 11:20:54 +0800 Subject: [PATCH 0579/1477] nfs4: Fix kmemleak when allocate slot failed [ Upstream commit 7e8436728e22181c3f12a5dbabd35ed3a8b8c593 ] If one of the slot allocate failed, should cleanup all the other allocated slots, otherwise, the allocated slots will leak: unreferenced object 0xffff8881115aa100 (size 64): comm ""mount.nfs"", pid 679, jiffies 4294744957 (age 115.037s) hex dump (first 32 bytes): 00 cc 19 73 81 88 ff ff 00 a0 5a 11 81 88 ff ff ...s......Z..... 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<000000007a4c434a>] nfs4_find_or_create_slot+0x8e/0x130 [<000000005472a39c>] nfs4_realloc_slot_table+0x23f/0x270 [<00000000cd8ca0eb>] nfs40_init_client+0x4a/0x90 [<00000000128486db>] nfs4_init_client+0xce/0x270 [<000000008d2cacad>] nfs4_set_client+0x1a2/0x2b0 [<000000000e593b52>] nfs4_create_server+0x300/0x5f0 [<00000000e4425dd2>] nfs4_try_get_tree+0x65/0x110 [<00000000d3a6176f>] vfs_get_tree+0x41/0xf0 [<0000000016b5ad4c>] path_mount+0x9b3/0xdd0 [<00000000494cae71>] __x64_sys_mount+0x190/0x1d0 [<000000005d56bdec>] do_syscall_64+0x35/0x80 [<00000000687c9ae4>] entry_SYSCALL_64_after_hwframe+0x46/0xb0 Fixes: abf79bb341bf ("NFS: Add a slot table to struct nfs_client for NFSv4.0 transport blocking") Signed-off-by: Zhang Xiaoxu Signed-off-by: Anna Schumaker Signed-off-by: Sasha Levin --- fs/nfs/nfs4client.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 0e6437b08a3a..252c99c76a42 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -346,6 +346,7 @@ int nfs40_init_client(struct nfs_client *clp) ret = nfs4_setup_slot_table(tbl, NFS4_MAX_SLOT_TABLE, "NFSv4.0 transport Slot table"); if (ret) { + nfs4_shutdown_slot_table(tbl); kfree(tbl); return ret; } From 37a098fc9b42bd7fce66764866aa514639667b6e Mon Sep 17 00:00:00 2001 From: Chen Zhongjin Date: Wed, 26 Oct 2022 10:03:21 +0800 Subject: [PATCH 0580/1477] net: dsa: Fix possible memory leaks in dsa_loop_init() [ Upstream commit 633efc8b3dc96f56f5a57f2a49764853a2fa3f50 ] kmemleak reported memory leaks in dsa_loop_init(): kmemleak: 12 new suspected memory leaks unreferenced object 0xffff8880138ce000 (size 2048): comm "modprobe", pid 390, jiffies 4295040478 (age 238.976s) backtrace: [<000000006a94f1d5>] kmalloc_trace+0x26/0x60 [<00000000a9c44622>] phy_device_create+0x5d/0x970 [<00000000d0ee2afc>] get_phy_device+0xf3/0x2b0 [<00000000dca0c71f>] __fixed_phy_register.part.0+0x92/0x4e0 [<000000008a834798>] fixed_phy_register+0x84/0xb0 [<0000000055223fcb>] dsa_loop_init+0xa9/0x116 [dsa_loop] ... There are two reasons for memleak in dsa_loop_init(). First, fixed_phy_register() create and register phy_device: fixed_phy_register() get_phy_device() phy_device_create() # freed by phy_device_free() phy_device_register() # freed by phy_device_remove() But fixed_phy_unregister() only calls phy_device_remove(). So the memory allocated in phy_device_create() is leaked. Second, when mdio_driver_register() fail in dsa_loop_init(), it just returns and there is no cleanup for phydevs. Fix the problems by catching the error of mdio_driver_register() in dsa_loop_init(), then calling both fixed_phy_unregister() and phy_device_free() to release phydevs. Also add a function for phydevs cleanup to avoid duplacate. Fixes: 98cd1552ea27 ("net: dsa: Mock-up driver") Signed-off-by: Chen Zhongjin Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/dsa/dsa_loop.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/net/dsa/dsa_loop.c b/drivers/net/dsa/dsa_loop.c index e38906ae8f23..fbeb99ab9e4d 100644 --- a/drivers/net/dsa/dsa_loop.c +++ b/drivers/net/dsa/dsa_loop.c @@ -376,6 +376,17 @@ static struct mdio_driver dsa_loop_drv = { #define NUM_FIXED_PHYS (DSA_LOOP_NUM_PORTS - 2) +static void dsa_loop_phydevs_unregister(void) +{ + unsigned int i; + + for (i = 0; i < NUM_FIXED_PHYS; i++) + if (!IS_ERR(phydevs[i])) { + fixed_phy_unregister(phydevs[i]); + phy_device_free(phydevs[i]); + } +} + static int __init dsa_loop_init(void) { struct fixed_phy_status status = { @@ -383,23 +394,23 @@ static int __init dsa_loop_init(void) .speed = SPEED_100, .duplex = DUPLEX_FULL, }; - unsigned int i; + unsigned int i, ret; for (i = 0; i < NUM_FIXED_PHYS; i++) phydevs[i] = fixed_phy_register(PHY_POLL, &status, NULL); - return mdio_driver_register(&dsa_loop_drv); + ret = mdio_driver_register(&dsa_loop_drv); + if (ret) + dsa_loop_phydevs_unregister(); + + return ret; } module_init(dsa_loop_init); static void __exit dsa_loop_exit(void) { - unsigned int i; - mdio_driver_unregister(&dsa_loop_drv); - for (i = 0; i < NUM_FIXED_PHYS; i++) - if (!IS_ERR(phydevs[i])) - fixed_phy_unregister(phydevs[i]); + dsa_loop_phydevs_unregister(); } module_exit(dsa_loop_exit); From d360e875c011a005628525bf290322058927e7dc Mon Sep 17 00:00:00 2001 From: Chen Zhongjin Date: Tue, 25 Oct 2022 10:41:46 +0800 Subject: [PATCH 0581/1477] RDMA/core: Fix null-ptr-deref in ib_core_cleanup() [ Upstream commit 07c0d131cc0fe1f3981a42958fc52d573d303d89 ] KASAN reported a null-ptr-deref error: KASAN: null-ptr-deref in range [0x0000000000000118-0x000000000000011f] CPU: 1 PID: 379 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) RIP: 0010:destroy_workqueue+0x2f/0x740 RSP: 0018:ffff888016137df8 EFLAGS: 00000202 ... Call Trace: ib_core_cleanup+0xa/0xa1 [ib_core] __do_sys_delete_module.constprop.0+0x34f/0x5b0 do_syscall_64+0x3a/0x90 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7fa1a0d221b7 ... It is because the fail of roce_gid_mgmt_init() is ignored: ib_core_init() roce_gid_mgmt_init() gid_cache_wq = alloc_ordered_workqueue # fail ... ib_core_cleanup() roce_gid_mgmt_cleanup() destroy_workqueue(gid_cache_wq) # destroy an unallocated wq Fix this by catching the fail of roce_gid_mgmt_init() in ib_core_init(). Fixes: 03db3a2d81e6 ("IB/core: Add RoCE GID table management") Signed-off-by: Chen Zhongjin Link: https://lore.kernel.org/r/20221025024146.109137-1-chenzhongjin@huawei.com Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/core/device.c | 10 +++++++++- drivers/infiniband/core/nldev.c | 2 +- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index aa526c5ca0cf..d91892ffe243 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2759,10 +2759,18 @@ static int __init ib_core_init(void) nldev_init(); rdma_nl_register(RDMA_NL_LS, ibnl_ls_cb_table); - roce_gid_mgmt_init(); + ret = roce_gid_mgmt_init(); + if (ret) { + pr_warn("Couldn't init RoCE GID management\n"); + goto err_parent; + } return 0; +err_parent: + rdma_nl_unregister(RDMA_NL_LS); + nldev_exit(); + unregister_pernet_device(&rdma_dev_net_ops); err_compat: unregister_blocking_lsm_notifier(&ibdev_lsm_nb); err_sa: diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 12d29d54a081..c90f6378d839 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -2181,7 +2181,7 @@ void __init nldev_init(void) rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table); } -void __exit nldev_exit(void) +void nldev_exit(void) { rdma_nl_unregister(RDMA_NL_NLDEV); } From 4e1e4485b2520983b1cc47b3af278e9f4b9734a5 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 25 Oct 2022 18:32:32 +0300 Subject: [PATCH 0582/1477] RDMA/qedr: clean up work queue on failure in qedr_alloc_resources() [ Upstream commit 7a47e077e503feb73d56e491ce89aa73b67a3972 ] Add a check for if create_singlethread_workqueue() fails and also destroy the work queue on failure paths. Fixes: e411e0587e0d ("RDMA/qedr: Add iWARP connection management functions") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/Y1gBkDucQhhWj5YM@kili Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/qedr/main.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index 967641662b24..d0bb61b7e419 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -374,6 +374,10 @@ static int qedr_alloc_resources(struct qedr_dev *dev) if (IS_IWARP(dev)) { xa_init(&dev->qps); dev->iwarp_wq = create_singlethread_workqueue("qedr_iwarpq"); + if (!dev->iwarp_wq) { + rc = -ENOMEM; + goto err1; + } } /* Allocate Status blocks for CNQ */ @@ -381,7 +385,7 @@ static int qedr_alloc_resources(struct qedr_dev *dev) GFP_KERNEL); if (!dev->sb_array) { rc = -ENOMEM; - goto err1; + goto err_destroy_wq; } dev->cnq_array = kcalloc(dev->num_cnq, @@ -432,6 +436,9 @@ err3: kfree(dev->cnq_array); err2: kfree(dev->sb_array); +err_destroy_wq: + if (IS_IWARP(dev)) + destroy_workqueue(dev->iwarp_wq); err1: kfree(dev->sgid_tbl); return rc; From 31b83d6990c8e5fe8600f4553bbe8beb2b249a56 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 31 May 2021 09:35:12 +0200 Subject: [PATCH 0583/1477] nfc: fdp: drop ftrace-like debugging messages [ Upstream commit 9571289ddf71694de0e023afc5e88d90cfd067b5 ] Now that the kernel has ftrace, any debugging calls that just do "made it to this function!" and "leaving this function!" can be removed. Better to use standard debugging tools. This allows also to remove several local variables and entire fdp_nci_recv_frame() function (whose purpose was only to log). Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20210531073522.6720-1-krzysztof.kozlowski@canonical.com Signed-off-by: Jakub Kicinski Stable-dep-of: 8e4aae6b8ca7 ("nfc: fdp: Fix potential memory leak in fdp_nci_send()") Signed-off-by: Sasha Levin --- drivers/nfc/fdp/fdp.c | 31 ------------------------------- drivers/nfc/fdp/fdp.h | 1 - drivers/nfc/fdp/i2c.c | 12 +----------- 3 files changed, 1 insertion(+), 43 deletions(-) diff --git a/drivers/nfc/fdp/fdp.c b/drivers/nfc/fdp/fdp.c index 4dc7bd7e02b6..52c60d11849c 100644 --- a/drivers/nfc/fdp/fdp.c +++ b/drivers/nfc/fdp/fdp.c @@ -238,9 +238,6 @@ static int fdp_nci_open(struct nci_dev *ndev) { int r; struct fdp_nci_info *info = nci_get_drvdata(ndev); - struct device *dev = &info->phy->i2c_dev->dev; - - dev_dbg(dev, "%s\n", __func__); r = info->phy_ops->enable(info->phy); @@ -249,19 +246,12 @@ static int fdp_nci_open(struct nci_dev *ndev) static int fdp_nci_close(struct nci_dev *ndev) { - struct fdp_nci_info *info = nci_get_drvdata(ndev); - struct device *dev = &info->phy->i2c_dev->dev; - - dev_dbg(dev, "%s\n", __func__); return 0; } static int fdp_nci_send(struct nci_dev *ndev, struct sk_buff *skb) { struct fdp_nci_info *info = nci_get_drvdata(ndev); - struct device *dev = &info->phy->i2c_dev->dev; - - dev_dbg(dev, "%s\n", __func__); if (atomic_dec_and_test(&info->data_pkt_counter)) info->data_pkt_counter_cb(ndev); @@ -269,16 +259,6 @@ static int fdp_nci_send(struct nci_dev *ndev, struct sk_buff *skb) return info->phy_ops->write(info->phy, skb); } -int fdp_nci_recv_frame(struct nci_dev *ndev, struct sk_buff *skb) -{ - struct fdp_nci_info *info = nci_get_drvdata(ndev); - struct device *dev = &info->phy->i2c_dev->dev; - - dev_dbg(dev, "%s\n", __func__); - return nci_recv_frame(ndev, skb); -} -EXPORT_SYMBOL(fdp_nci_recv_frame); - static int fdp_nci_request_firmware(struct nci_dev *ndev) { struct fdp_nci_info *info = nci_get_drvdata(ndev); @@ -489,8 +469,6 @@ static int fdp_nci_setup(struct nci_dev *ndev) int r; u8 patched = 0; - dev_dbg(dev, "%s\n", __func__); - r = nci_core_init(ndev); if (r) goto error; @@ -598,9 +576,7 @@ static int fdp_nci_core_reset_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb) { struct fdp_nci_info *info = nci_get_drvdata(ndev); - struct device *dev = &info->phy->i2c_dev->dev; - dev_dbg(dev, "%s\n", __func__); info->setup_reset_ntf = 1; wake_up(&info->setup_wq); @@ -611,9 +587,7 @@ static int fdp_nci_prop_patch_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb) { struct fdp_nci_info *info = nci_get_drvdata(ndev); - struct device *dev = &info->phy->i2c_dev->dev; - dev_dbg(dev, "%s\n", __func__); info->setup_patch_ntf = 1; info->setup_patch_status = skb->data[0]; wake_up(&info->setup_wq); @@ -786,11 +760,6 @@ EXPORT_SYMBOL(fdp_nci_probe); void fdp_nci_remove(struct nci_dev *ndev) { - struct fdp_nci_info *info = nci_get_drvdata(ndev); - struct device *dev = &info->phy->i2c_dev->dev; - - dev_dbg(dev, "%s\n", __func__); - nci_unregister_device(ndev); nci_free_device(ndev); } diff --git a/drivers/nfc/fdp/fdp.h b/drivers/nfc/fdp/fdp.h index 9bd1f3f23e2d..ead3b21ccae6 100644 --- a/drivers/nfc/fdp/fdp.h +++ b/drivers/nfc/fdp/fdp.h @@ -25,6 +25,5 @@ int fdp_nci_probe(struct fdp_i2c_phy *phy, struct nfc_phy_ops *phy_ops, struct nci_dev **ndev, int tx_headroom, int tx_tailroom, u8 clock_type, u32 clock_freq, u8 *fw_vsc_cfg); void fdp_nci_remove(struct nci_dev *ndev); -int fdp_nci_recv_frame(struct nci_dev *ndev, struct sk_buff *skb); #endif /* __LOCAL_FDP_H_ */ diff --git a/drivers/nfc/fdp/i2c.c b/drivers/nfc/fdp/i2c.c index ad0abb1f0bae..5e300788be52 100644 --- a/drivers/nfc/fdp/i2c.c +++ b/drivers/nfc/fdp/i2c.c @@ -49,7 +49,6 @@ static int fdp_nci_i2c_enable(void *phy_id) { struct fdp_i2c_phy *phy = phy_id; - dev_dbg(&phy->i2c_dev->dev, "%s\n", __func__); fdp_nci_i2c_reset(phy); return 0; @@ -59,7 +58,6 @@ static void fdp_nci_i2c_disable(void *phy_id) { struct fdp_i2c_phy *phy = phy_id; - dev_dbg(&phy->i2c_dev->dev, "%s\n", __func__); fdp_nci_i2c_reset(phy); } @@ -197,7 +195,6 @@ flush: static irqreturn_t fdp_nci_i2c_irq_thread_fn(int irq, void *phy_id) { struct fdp_i2c_phy *phy = phy_id; - struct i2c_client *client; struct sk_buff *skb; int r; @@ -206,9 +203,6 @@ static irqreturn_t fdp_nci_i2c_irq_thread_fn(int irq, void *phy_id) return IRQ_NONE; } - client = phy->i2c_dev; - dev_dbg(&client->dev, "%s\n", __func__); - r = fdp_nci_i2c_read(phy, &skb); if (r == -EREMOTEIO) @@ -217,7 +211,7 @@ static irqreturn_t fdp_nci_i2c_irq_thread_fn(int irq, void *phy_id) return IRQ_HANDLED; if (skb != NULL) - fdp_nci_recv_frame(phy->ndev, skb); + nci_recv_frame(phy->ndev, skb); return IRQ_HANDLED; } @@ -288,8 +282,6 @@ static int fdp_nci_i2c_probe(struct i2c_client *client) u32 clock_freq; int r = 0; - dev_dbg(dev, "%s\n", __func__); - if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) { nfc_err(dev, "No I2C_FUNC_I2C support\n"); return -ENODEV; @@ -351,8 +343,6 @@ static int fdp_nci_i2c_remove(struct i2c_client *client) { struct fdp_i2c_phy *phy = i2c_get_clientdata(client); - dev_dbg(&client->dev, "%s\n", __func__); - fdp_nci_remove(phy->ndev); fdp_nci_i2c_disable(phy); From e8c11ee2d07f7c4dfa2ac0ea8efc4f627e58ea57 Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Thu, 27 Oct 2022 22:03:29 +0800 Subject: [PATCH 0584/1477] nfc: fdp: Fix potential memory leak in fdp_nci_send() [ Upstream commit 8e4aae6b8ca76afb1fb64dcb24be44ba814e7f8a ] fdp_nci_send() will call fdp_nci_i2c_write that will not free skb in the function. As a result, when fdp_nci_i2c_write() finished, the skb will memleak. fdp_nci_send() should free skb after fdp_nci_i2c_write() finished. Fixes: a06347c04c13 ("NFC: Add Intel Fields Peak NFC solution driver") Signed-off-by: Shang XiaoJing Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/nfc/fdp/fdp.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/nfc/fdp/fdp.c b/drivers/nfc/fdp/fdp.c index 52c60d11849c..90bea6a1db69 100644 --- a/drivers/nfc/fdp/fdp.c +++ b/drivers/nfc/fdp/fdp.c @@ -252,11 +252,19 @@ static int fdp_nci_close(struct nci_dev *ndev) static int fdp_nci_send(struct nci_dev *ndev, struct sk_buff *skb) { struct fdp_nci_info *info = nci_get_drvdata(ndev); + int ret; if (atomic_dec_and_test(&info->data_pkt_counter)) info->data_pkt_counter_cb(ndev); - return info->phy_ops->write(info->phy, skb); + ret = info->phy_ops->write(info->phy, skb); + if (ret < 0) { + kfree_skb(skb); + return ret; + } + + consume_skb(skb); + return 0; } static int fdp_nci_request_firmware(struct nci_dev *ndev) From eecea068bf116d448c271263d879aa22926cc0d3 Mon Sep 17 00:00:00 2001 From: wengjianfeng Date: Fri, 18 Jun 2021 17:10:16 +0800 Subject: [PATCH 0585/1477] NFC: nxp-nci: remove unnecessary labels [ Upstream commit 96a19319921ceb4b2f4c49d1b9bf9de1161e30ca ] Simplify the code by removing unnecessary labels and returning directly. Signed-off-by: wengjianfeng Signed-off-by: David S. Miller Stable-dep-of: 7bf1ed6aff0f ("nfc: nxp-nci: Fix potential memory leak in nxp_nci_send()") Signed-off-by: Sasha Levin --- drivers/nfc/nxp-nci/core.c | 39 +++++++++++++------------------------- 1 file changed, 13 insertions(+), 26 deletions(-) diff --git a/drivers/nfc/nxp-nci/core.c b/drivers/nfc/nxp-nci/core.c index a0ce95a287c5..2b0c7232e91f 100644 --- a/drivers/nfc/nxp-nci/core.c +++ b/drivers/nfc/nxp-nci/core.c @@ -70,21 +70,16 @@ static int nxp_nci_send(struct nci_dev *ndev, struct sk_buff *skb) struct nxp_nci_info *info = nci_get_drvdata(ndev); int r; - if (!info->phy_ops->write) { - r = -ENOTSUPP; - goto send_exit; - } + if (!info->phy_ops->write) + return -EOPNOTSUPP; - if (info->mode != NXP_NCI_MODE_NCI) { - r = -EINVAL; - goto send_exit; - } + if (info->mode != NXP_NCI_MODE_NCI) + return -EINVAL; r = info->phy_ops->write(info->phy_id, skb); if (r < 0) kfree_skb(skb); -send_exit: return r; } @@ -104,10 +99,8 @@ int nxp_nci_probe(void *phy_id, struct device *pdev, int r; info = devm_kzalloc(pdev, sizeof(struct nxp_nci_info), GFP_KERNEL); - if (!info) { - r = -ENOMEM; - goto probe_exit; - } + if (!info) + return -ENOMEM; info->phy_id = phy_id; info->pdev = pdev; @@ -120,31 +113,25 @@ int nxp_nci_probe(void *phy_id, struct device *pdev, if (info->phy_ops->set_mode) { r = info->phy_ops->set_mode(info->phy_id, NXP_NCI_MODE_COLD); if (r < 0) - goto probe_exit; + return r; } info->mode = NXP_NCI_MODE_COLD; info->ndev = nci_allocate_device(&nxp_nci_ops, NXP_NCI_NFC_PROTOCOLS, NXP_NCI_HDR_LEN, 0); - if (!info->ndev) { - r = -ENOMEM; - goto probe_exit; - } + if (!info->ndev) + return -ENOMEM; nci_set_parent_dev(info->ndev, pdev); nci_set_drvdata(info->ndev, info); r = nci_register_device(info->ndev); - if (r < 0) - goto probe_exit_free_nci; + if (r < 0) { + nci_free_device(info->ndev); + return r; + } *ndev = info->ndev; - - goto probe_exit; - -probe_exit_free_nci: - nci_free_device(info->ndev); -probe_exit: return r; } EXPORT_SYMBOL(nxp_nci_probe); From 9ae2c9a91ff068f4c3e392f47e8e26a1c9f85ebb Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Thu, 27 Oct 2022 22:03:30 +0800 Subject: [PATCH 0586/1477] nfc: nxp-nci: Fix potential memory leak in nxp_nci_send() [ Upstream commit 7bf1ed6aff0f70434bd0cdd45495e83f1dffb551 ] nxp_nci_send() will call nxp_nci_i2c_write(), and only free skb when nxp_nci_i2c_write() failed. However, even if the nxp_nci_i2c_write() run succeeds, the skb will not be freed in nxp_nci_i2c_write(). As the result, the skb will memleak. nxp_nci_send() should also free the skb when nxp_nci_i2c_write() succeeds. Fixes: dece45855a8b ("NFC: nxp-nci: Add support for NXP NCI chips") Signed-off-by: Shang XiaoJing Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/nfc/nxp-nci/core.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/nfc/nxp-nci/core.c b/drivers/nfc/nxp-nci/core.c index 2b0c7232e91f..b68b315689c3 100644 --- a/drivers/nfc/nxp-nci/core.c +++ b/drivers/nfc/nxp-nci/core.c @@ -77,10 +77,13 @@ static int nxp_nci_send(struct nci_dev *ndev, struct sk_buff *skb) return -EINVAL; r = info->phy_ops->write(info->phy_id, skb); - if (r < 0) + if (r < 0) { kfree_skb(skb); + return r; + } - return r; + consume_skb(skb); + return 0; } static struct nci_ops nxp_nci_ops = { From 0acfcd2aed4f7eaf086218414c6b7a197c9873f3 Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Thu, 27 Oct 2022 22:03:31 +0800 Subject: [PATCH 0587/1477] nfc: s3fwrn5: Fix potential memory leak in s3fwrn5_nci_send() [ Upstream commit 3a146b7e3099dc7cf3114f627d9b79291e2d2203 ] s3fwrn5_nci_send() will call s3fwrn5_i2c_write() or s3fwrn82_uart_write(), and free the skb if write() failed. However, even if the write() run succeeds, the skb will not be freed in write(). As the result, the skb will memleak. s3fwrn5_nci_send() should also free the skb when write() succeeds. Fixes: c04c674fadeb ("nfc: s3fwrn5: Add driver for Samsung S3FWRN5 NFC Chip") Signed-off-by: Shang XiaoJing Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/nfc/s3fwrn5/core.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/nfc/s3fwrn5/core.c b/drivers/nfc/s3fwrn5/core.c index ba6c486d6465..9b43cd3a45af 100644 --- a/drivers/nfc/s3fwrn5/core.c +++ b/drivers/nfc/s3fwrn5/core.c @@ -97,11 +97,15 @@ static int s3fwrn5_nci_send(struct nci_dev *ndev, struct sk_buff *skb) } ret = s3fwrn5_write(info, skb); - if (ret < 0) + if (ret < 0) { kfree_skb(skb); + mutex_unlock(&info->mutex); + return ret; + } + consume_skb(skb); mutex_unlock(&info->mutex); - return ret; + return 0; } static int s3fwrn5_nci_post_setup(struct nci_dev *ndev) From 52438e734c1566f5e2bcd9a065d2d65e306c0555 Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Thu, 27 Oct 2022 22:03:32 +0800 Subject: [PATCH 0588/1477] nfc: nfcmrvl: Fix potential memory leak in nfcmrvl_i2c_nci_send() [ Upstream commit 93d904a734a74c54d945a9884b4962977f1176cd ] nfcmrvl_i2c_nci_send() will be called by nfcmrvl_nci_send(), and skb should be freed in nfcmrvl_i2c_nci_send(). However, nfcmrvl_nci_send() will only free skb when i2c_master_send() return >=0, which means skb will memleak when i2c_master_send() failed. Free skb no matter whether i2c_master_send() succeeds. Fixes: b5b3e23e4cac ("NFC: nfcmrvl: add i2c driver") Signed-off-by: Shang XiaoJing Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/nfc/nfcmrvl/i2c.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/nfc/nfcmrvl/i2c.c b/drivers/nfc/nfcmrvl/i2c.c index f81f1cae9324..41f27e1cac20 100644 --- a/drivers/nfc/nfcmrvl/i2c.c +++ b/drivers/nfc/nfcmrvl/i2c.c @@ -151,10 +151,15 @@ static int nfcmrvl_i2c_nci_send(struct nfcmrvl_private *priv, ret = -EREMOTEIO; } else ret = 0; - kfree_skb(skb); } - return ret; + if (ret) { + kfree_skb(skb); + return ret; + } + + consume_skb(skb); + return 0; } static void nfcmrvl_i2c_nci_update_config(struct nfcmrvl_private *priv, From c85ee1c3cbc6b3e6e0a10918e49c2266b18089eb Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Fri, 28 Oct 2022 10:09:11 +0800 Subject: [PATCH 0589/1477] net: fec: fix improper use of NETDEV_TX_BUSY [ Upstream commit 06a4df5863f73af193a4ff7abf7cb04058584f06 ] The ndo_start_xmit() method must not free skb when returning NETDEV_TX_BUSY, since caller is going to requeue freed skb. Fix it by returning NETDEV_TX_OK in case of dma_map_single() fails. Fixes: 79f339125ea3 ("net: fec: Add software TSO support") Signed-off-by: Zhang Changzhong Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/freescale/fec_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index e183caf38176..686bb873125c 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -623,7 +623,7 @@ fec_enet_txq_put_data_tso(struct fec_enet_priv_tx_q *txq, struct sk_buff *skb, dev_kfree_skb_any(skb); if (net_ratelimit()) netdev_err(ndev, "Tx DMA memory map failed\n"); - return NETDEV_TX_BUSY; + return NETDEV_TX_OK; } bdp->cbd_datlen = cpu_to_fec16(size); @@ -685,7 +685,7 @@ fec_enet_txq_put_hdr_tso(struct fec_enet_priv_tx_q *txq, dev_kfree_skb_any(skb); if (net_ratelimit()) netdev_err(ndev, "Tx DMA memory map failed\n"); - return NETDEV_TX_BUSY; + return NETDEV_TX_OK; } } From 24f9c41435a89d73846dbb06b8ba85824bcf9fea Mon Sep 17 00:00:00 2001 From: Sergey Shtylyov Date: Sat, 29 Oct 2022 00:07:06 +0300 Subject: [PATCH 0590/1477] ata: pata_legacy: fix pdc20230_set_piomode() [ Upstream commit 171a93182eccd6e6835d2c86b40787f9f832efaa ] Clang gives a warning when compiling pata_legacy.c with 'make W=1' about the 'rt' local variable in pdc20230_set_piomode() being set but unused. Quite obviously, there is an outb() call missing to write back the updated variable. Moreover, checking the docs by Petr Soucek revealed that bitwise AND should have been done with a negated timing mask and the master/slave timing masks were swapped while updating... Fixes: 669a5db411d8 ("[libata] Add a bunch of PATA drivers.") Reported-by: Damien Le Moal Signed-off-by: Sergey Shtylyov Signed-off-by: Damien Le Moal Signed-off-by: Sasha Levin --- drivers/ata/pata_legacy.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/ata/pata_legacy.c b/drivers/ata/pata_legacy.c index d91ba47f2fc4..4405d255e3aa 100644 --- a/drivers/ata/pata_legacy.c +++ b/drivers/ata/pata_legacy.c @@ -278,9 +278,10 @@ static void pdc20230_set_piomode(struct ata_port *ap, struct ata_device *adev) outb(inb(0x1F4) & 0x07, 0x1F4); rt = inb(0x1F3); - rt &= 0x07 << (3 * adev->devno); + rt &= ~(0x07 << (3 * !adev->devno)); if (pio) - rt |= (1 + 3 * pio) << (3 * adev->devno); + rt |= (1 + 3 * pio) << (3 * !adev->devno); + outb(rt, 0x1F3); udelay(100); outb(inb(0x1F2) | 0x01, 0x1F2); From 5960b9081baca85cc7dcb14aec1de85999ea9d36 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 28 Oct 2022 18:05:00 +0300 Subject: [PATCH 0591/1477] net: sched: Fix use after free in red_enqueue() [ Upstream commit 8bdc2acd420c6f3dd1f1c78750ec989f02a1e2b9 ] We can't use "skb" again after passing it to qdisc_enqueue(). This is basically identical to commit 2f09707d0c97 ("sch_sfb: Also store skb len before calling child enqueue"). Fixes: d7f4f332f082 ("sch_red: update backlog as well") Signed-off-by: Dan Carpenter Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/sched/sch_red.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index f1e013e3f04a..935d90874b1b 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -72,6 +72,7 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, { struct red_sched_data *q = qdisc_priv(sch); struct Qdisc *child = q->qdisc; + unsigned int len; int ret; q->vars.qavg = red_calc_qavg(&q->parms, @@ -126,9 +127,10 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, break; } + len = qdisc_pkt_len(skb); ret = qdisc_enqueue(skb, child, to_free); if (likely(ret == NET_XMIT_SUCCESS)) { - qdisc_qstats_backlog_inc(sch, skb); + sch->qstats.backlog += len; sch->q.qlen++; } else if (net_xmit_drop_count(ret)) { q->stats.pdrop++; From 3583826b443a63681deaa855048d3f2b742af47e Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Sat, 29 Oct 2022 17:41:01 +0800 Subject: [PATCH 0592/1477] net: tun: fix bugs for oversize packet when napi frags enabled [ Upstream commit 363a5328f4b0517e59572118ccfb7c626d81dca9 ] Recently, we got two syzkaller problems because of oversize packet when napi frags enabled. One of the problems is because the first seg size of the iov_iter from user space is very big, it is 2147479538 which is bigger than the threshold value for bail out early in __alloc_pages(). And skb->pfmemalloc is true, __kmalloc_reserve() would use pfmemalloc reserves without __GFP_NOWARN flag. Thus we got a warning as following: ======================================================== WARNING: CPU: 1 PID: 17965 at mm/page_alloc.c:5295 __alloc_pages+0x1308/0x16c4 mm/page_alloc.c:5295 ... Call trace: __alloc_pages+0x1308/0x16c4 mm/page_alloc.c:5295 __alloc_pages_node include/linux/gfp.h:550 [inline] alloc_pages_node include/linux/gfp.h:564 [inline] kmalloc_large_node+0x94/0x350 mm/slub.c:4038 __kmalloc_node_track_caller+0x620/0x8e4 mm/slub.c:4545 __kmalloc_reserve.constprop.0+0x1e4/0x2b0 net/core/skbuff.c:151 pskb_expand_head+0x130/0x8b0 net/core/skbuff.c:1654 __skb_grow include/linux/skbuff.h:2779 [inline] tun_napi_alloc_frags+0x144/0x610 drivers/net/tun.c:1477 tun_get_user+0x31c/0x2010 drivers/net/tun.c:1835 tun_chr_write_iter+0x98/0x100 drivers/net/tun.c:2036 The other problem is because odd IPv6 packets without NEXTHDR_NONE extension header and have big packet length, it is 2127925 which is bigger than ETH_MAX_MTU(65535). After ipv6_gso_pull_exthdrs() in ipv6_gro_receive(), network_header offset and transport_header offset are all bigger than U16_MAX. That would trigger skb->network_header and skb->transport_header overflow error, because they are all '__u16' type. Eventually, it would affect the value for __skb_push(skb, value), and make it be a big value. After __skb_push() in ipv6_gro_receive(), skb->data would less than skb->head, an out of bounds memory bug occurred. That would trigger the problem as following: ================================================================== BUG: KASAN: use-after-free in eth_type_trans+0x100/0x260 ... Call trace: dump_backtrace+0xd8/0x130 show_stack+0x1c/0x50 dump_stack_lvl+0x64/0x7c print_address_description.constprop.0+0xbc/0x2e8 print_report+0x100/0x1e4 kasan_report+0x80/0x120 __asan_load8+0x78/0xa0 eth_type_trans+0x100/0x260 napi_gro_frags+0x164/0x550 tun_get_user+0xda4/0x1270 tun_chr_write_iter+0x74/0x130 do_iter_readv_writev+0x130/0x1ec do_iter_write+0xbc/0x1e0 vfs_writev+0x13c/0x26c To fix the problems, restrict the packet size less than (ETH_MAX_MTU - NET_SKB_PAD - NET_IP_ALIGN) which has considered reserved skb space in napi_alloc_skb() because transport_header is an offset from skb->head. Add len check in tun_napi_alloc_frags() simply. Fixes: 90e33d459407 ("tun: enable napi_gro_frags() for TUN/TAP driver") Signed-off-by: Ziyang Xuan Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20221029094101.1653855-1-william.xuanziyang@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/tun.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index a643b2f2f4de..0c09f8e9d383 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1475,7 +1475,8 @@ static struct sk_buff *tun_napi_alloc_frags(struct tun_file *tfile, int err; int i; - if (it->nr_segs > MAX_SKB_FRAGS + 1) + if (it->nr_segs > MAX_SKB_FRAGS + 1 || + len > (ETH_MAX_MTU - NET_SKB_PAD - NET_IP_ALIGN)) return ERR_PTR(-EMSGSIZE); local_bh_disable(); From b2d7a92aff0fbd93c29d2aa6451fb99f050e2c4e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 26 Oct 2022 09:54:45 +0200 Subject: [PATCH 0593/1477] netfilter: nf_tables: release flow rule object from commit path [ Upstream commit 26b5934ff4194e13196bedcba373cd4915071d0e ] No need to postpone this to the commit release path, since no packets are walking over this object, this is accessed from control plane only. This helped uncovered UAF triggered by races with the netlink notifier. Fixes: 9dd732e0bdf5 ("netfilter: nf_tables: memleak flow rule from commit path") Reported-by: syzbot+8f747f62763bc6c32916@syzkaller.appspotmail.com Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 810995d712ac..2143edafba77 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -7527,9 +7527,6 @@ static void nft_commit_release(struct nft_trans *trans) nf_tables_chain_destroy(&trans->ctx); break; case NFT_MSG_DELRULE: - if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) - nft_flow_rule_destroy(nft_trans_flow_rule(trans)); - nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans)); break; case NFT_MSG_DELSET: @@ -7973,6 +7970,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nft_rule_expr_deactivate(&trans->ctx, nft_trans_rule(trans), NFT_TRANS_COMMIT); + + if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) + nft_flow_rule_destroy(nft_trans_flow_rule(trans)); break; case NFT_MSG_NEWSET: nft_clear(net, nft_trans_set(trans)); From d69328cdb92fcc74ffaf415856fdd226a7aeff7b Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 26 Oct 2022 14:32:16 +0200 Subject: [PATCH 0594/1477] ipvs: use explicitly signed chars [ Upstream commit 5c26159c97b324dc5174a5713eafb8c855cf8106 ] The `char` type with no explicit sign is sometimes signed and sometimes unsigned. This code will break on platforms such as arm, where char is unsigned. So mark it here as explicitly signed, so that the todrop_counter decrement and subsequent comparison is correct. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason A. Donenfeld Acked-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/ipvs/ip_vs_conn.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index fb67f1ca2495..db13288fddfa 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -1265,8 +1265,8 @@ static inline int todrop_entry(struct ip_vs_conn *cp) * The drop rate array needs tuning for real environments. * Called from timer bh only => no locking */ - static const char todrop_rate[9] = {0, 1, 2, 3, 4, 5, 6, 7, 8}; - static char todrop_counter[9] = {0}; + static const signed char todrop_rate[9] = {0, 1, 2, 3, 4, 5, 6, 7, 8}; + static signed char todrop_counter[9] = {0}; int i; /* if the conn entry hasn't lasted for 60 seconds, don't drop it. From 931f56d59c854263b32075bfac56fdb3b1598d1b Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Mon, 31 Oct 2022 20:07:04 +0800 Subject: [PATCH 0595/1477] ipvs: fix WARNING in __ip_vs_cleanup_batch() [ Upstream commit 3d00c6a0da8ddcf75213e004765e4a42acc71d5d ] During the initialization of ip_vs_conn_net_init(), if file ip_vs_conn or ip_vs_conn_sync fails to be created, the initialization is successful by default. Therefore, the ip_vs_conn or ip_vs_conn_sync file doesn't be found during the remove. The following is the stack information: name 'ip_vs_conn_sync' WARNING: CPU: 3 PID: 9 at fs/proc/generic.c:712 remove_proc_entry+0x389/0x460 Modules linked in: Workqueue: netns cleanup_net RIP: 0010:remove_proc_entry+0x389/0x460 Call Trace: __ip_vs_cleanup_batch+0x7d/0x120 ops_exit_list+0x125/0x170 cleanup_net+0x4ea/0xb00 process_one_work+0x9bf/0x1710 worker_thread+0x665/0x1080 kthread+0x2e4/0x3a0 ret_from_fork+0x1f/0x30 Fixes: 61b1ab4583e2 ("IPVS: netns, add basic init per netns.") Signed-off-by: Zhengchao Shao Acked-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/ipvs/ip_vs_conn.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index db13288fddfa..cb6d68220c26 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -1447,20 +1447,36 @@ int __net_init ip_vs_conn_net_init(struct netns_ipvs *ipvs) { atomic_set(&ipvs->conn_count, 0); - proc_create_net("ip_vs_conn", 0, ipvs->net->proc_net, - &ip_vs_conn_seq_ops, sizeof(struct ip_vs_iter_state)); - proc_create_net("ip_vs_conn_sync", 0, ipvs->net->proc_net, - &ip_vs_conn_sync_seq_ops, - sizeof(struct ip_vs_iter_state)); +#ifdef CONFIG_PROC_FS + if (!proc_create_net("ip_vs_conn", 0, ipvs->net->proc_net, + &ip_vs_conn_seq_ops, + sizeof(struct ip_vs_iter_state))) + goto err_conn; + + if (!proc_create_net("ip_vs_conn_sync", 0, ipvs->net->proc_net, + &ip_vs_conn_sync_seq_ops, + sizeof(struct ip_vs_iter_state))) + goto err_conn_sync; +#endif + return 0; + +#ifdef CONFIG_PROC_FS +err_conn_sync: + remove_proc_entry("ip_vs_conn", ipvs->net->proc_net); +err_conn: + return -ENOMEM; +#endif } void __net_exit ip_vs_conn_net_cleanup(struct netns_ipvs *ipvs) { /* flush all the connection entries first */ ip_vs_conn_flush(ipvs); +#ifdef CONFIG_PROC_FS remove_proc_entry("ip_vs_conn", ipvs->net->proc_net); remove_proc_entry("ip_vs_conn_sync", ipvs->net->proc_net); +#endif } int __init ip_vs_conn_init(void) From 2c8d81bdb2684d53d6cedad7410ba4cf9090e343 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Mon, 31 Oct 2022 20:07:05 +0800 Subject: [PATCH 0596/1477] ipvs: fix WARNING in ip_vs_app_net_cleanup() [ Upstream commit 5663ed63adb9619c98ab7479aa4606fa9b7a548c ] During the initialization of ip_vs_app_net_init(), if file ip_vs_app fails to be created, the initialization is successful by default. Therefore, the ip_vs_app file doesn't be found during the remove in ip_vs_app_net_cleanup(). It will cause WRNING. The following is the stack information: name 'ip_vs_app' WARNING: CPU: 1 PID: 9 at fs/proc/generic.c:712 remove_proc_entry+0x389/0x460 Modules linked in: Workqueue: netns cleanup_net RIP: 0010:remove_proc_entry+0x389/0x460 Call Trace: ops_exit_list+0x125/0x170 cleanup_net+0x4ea/0xb00 process_one_work+0x9bf/0x1710 worker_thread+0x665/0x1080 kthread+0x2e4/0x3a0 ret_from_fork+0x1f/0x30 Fixes: 457c4cbc5a3d ("[NET]: Make /proc/net per network namespace") Signed-off-by: Zhengchao Shao Acked-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/ipvs/ip_vs_app.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index f9b16f2b2219..fdacbc3c15be 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -599,13 +599,19 @@ static const struct seq_operations ip_vs_app_seq_ops = { int __net_init ip_vs_app_net_init(struct netns_ipvs *ipvs) { INIT_LIST_HEAD(&ipvs->app_list); - proc_create_net("ip_vs_app", 0, ipvs->net->proc_net, &ip_vs_app_seq_ops, - sizeof(struct seq_net_private)); +#ifdef CONFIG_PROC_FS + if (!proc_create_net("ip_vs_app", 0, ipvs->net->proc_net, + &ip_vs_app_seq_ops, + sizeof(struct seq_net_private))) + return -ENOMEM; +#endif return 0; } void __net_exit ip_vs_app_net_cleanup(struct netns_ipvs *ipvs) { unregister_ip_vs_app(ipvs, NULL /* all */); +#ifdef CONFIG_PROC_FS remove_proc_entry("ip_vs_app", ipvs->net->proc_net); +#endif } From f06186e5271b980bac03f5c97276ed0146ddc9b0 Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Sat, 29 Oct 2022 00:10:49 +0800 Subject: [PATCH 0597/1477] rose: Fix NULL pointer dereference in rose_send_frame() [ Upstream commit e97c089d7a49f67027395ddf70bf327eeac2611e ] The syzkaller reported an issue: KASAN: null-ptr-deref in range [0x0000000000000380-0x0000000000000387] CPU: 0 PID: 4069 Comm: kworker/0:15 Not tainted 6.0.0-syzkaller-02734-g0326074ff465 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/22/2022 Workqueue: rcu_gp srcu_invoke_callbacks RIP: 0010:rose_send_frame+0x1dd/0x2f0 net/rose/rose_link.c:101 Call Trace: rose_transmit_clear_request+0x1d5/0x290 net/rose/rose_link.c:255 rose_rx_call_request+0x4c0/0x1bc0 net/rose/af_rose.c:1009 rose_loopback_timer+0x19e/0x590 net/rose/rose_loopback.c:111 call_timer_fn+0x1a0/0x6b0 kernel/time/timer.c:1474 expire_timers kernel/time/timer.c:1519 [inline] __run_timers.part.0+0x674/0xa80 kernel/time/timer.c:1790 __run_timers kernel/time/timer.c:1768 [inline] run_timer_softirq+0xb3/0x1d0 kernel/time/timer.c:1803 __do_softirq+0x1d0/0x9c8 kernel/softirq.c:571 [...] It triggers NULL pointer dereference when 'neigh->dev->dev_addr' is called in the rose_send_frame(). It's the first occurrence of the `neigh` is in rose_loopback_timer() as `rose_loopback_neigh', and the 'dev' in 'rose_loopback_neigh' is initialized sa nullptr. It had been fixed by commit 3b3fd068c56e3fbea30090859216a368398e39bf ("rose: Fix Null pointer dereference in rose_send_frame()") ever. But it's introduced by commit 3c53cd65dece47dd1f9d3a809f32e59d1d87b2b8 ("rose: check NULL rose_loopback_neigh->loopback") again. We fix it by add NULL check in rose_transmit_clear_request(). When the 'dev' in 'neigh' is NULL, we don't reply the request and just clear it. syzkaller don't provide repro, and I provide a syz repro like: r0 = syz_init_net_socket$bt_sco(0x1f, 0x5, 0x2) ioctl$sock_inet_SIOCSIFFLAGS(r0, 0x8914, &(0x7f0000000180)={'rose0\x00', 0x201}) r1 = syz_init_net_socket$rose(0xb, 0x5, 0x0) bind$rose(r1, &(0x7f00000000c0)=@full={0xb, @dev, @null, 0x0, [@null, @null, @netrom, @netrom, @default, @null]}, 0x40) connect$rose(r1, &(0x7f0000000240)=@short={0xb, @dev={0xbb, 0xbb, 0xbb, 0x1, 0x0}, @remote={0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x1}, 0x1, @netrom={0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0x0, 0x0}}, 0x1c) Fixes: 3c53cd65dece ("rose: check NULL rose_loopback_neigh->loopback") Signed-off-by: Zhang Qilong Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/rose/rose_link.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c index f6102e6f5161..730d2205f197 100644 --- a/net/rose/rose_link.c +++ b/net/rose/rose_link.c @@ -236,6 +236,9 @@ void rose_transmit_clear_request(struct rose_neigh *neigh, unsigned int lci, uns unsigned char *dptr; int len; + if (!neigh->dev) + return; + len = AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN + 3; if ((skb = alloc_skb(len, GFP_ATOMIC)) == NULL) From e77d213843e67b4373285712699b692f9c743f61 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Mon, 31 Oct 2022 20:13:40 +0800 Subject: [PATCH 0598/1477] mISDN: fix possible memory leak in mISDN_register_device() [ Upstream commit e7d1d4d9ac0dfa40be4c2c8abd0731659869b297 ] Afer commit 1fa5ae857bb1 ("driver core: get rid of struct device's bus_id string array"), the name of device is allocated dynamically, add put_device() to give up the reference, so that the name can be freed in kobject_cleanup() when the refcount is 0. Set device class before put_device() to avoid null release() function WARN message in device_release(). Fixes: 1fa5ae857bb1 ("driver core: get rid of struct device's bus_id string array") Signed-off-by: Yang Yingliang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/isdn/mISDN/core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/isdn/mISDN/core.c b/drivers/isdn/mISDN/core.c index a41b4b264594..7ea0100f218a 100644 --- a/drivers/isdn/mISDN/core.c +++ b/drivers/isdn/mISDN/core.c @@ -233,11 +233,12 @@ mISDN_register_device(struct mISDNdevice *dev, if (debug & DEBUG_CORE) printk(KERN_DEBUG "mISDN_register %s %d\n", dev_name(&dev->dev), dev->id); + dev->dev.class = &mISDN_class; + err = create_stack(dev); if (err) goto error1; - dev->dev.class = &mISDN_class; dev->dev.platform_data = dev; dev->dev.parent = parent; dev_set_drvdata(&dev->dev, dev); @@ -249,8 +250,8 @@ mISDN_register_device(struct mISDNdevice *dev, error3: delete_stack(dev); - return err; error1: + put_device(&dev->dev); return err; } From 81204283ea138a85af380d05c8735751f7dd1242 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Mon, 31 Oct 2022 20:13:41 +0800 Subject: [PATCH 0599/1477] isdn: mISDN: netjet: fix wrong check of device registration [ Upstream commit bf00f5426074249058a106a6edbb89e4b25a4d79 ] The class is set in mISDN_register_device(), but if device_add() returns error, it will lead to delete a device without added, fix this by using device_is_registered() to check if the device is registered. Fixes: a900845e5661 ("mISDN: Add support for Traverse Technologies NETJet PCI cards") Signed-off-by: Yang Yingliang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/isdn/hardware/mISDN/netjet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/isdn/hardware/mISDN/netjet.c b/drivers/isdn/hardware/mISDN/netjet.c index a52f275f8263..f8447135a902 100644 --- a/drivers/isdn/hardware/mISDN/netjet.c +++ b/drivers/isdn/hardware/mISDN/netjet.c @@ -956,7 +956,7 @@ nj_release(struct tiger_hw *card) } if (card->irq > 0) free_irq(card->irq, card); - if (card->isac.dch.dev.dev.class) + if (device_is_registered(&card->isac.dch.dev.dev)) mISDN_unregister_device(&card->isac.dch.dev); for (i = 0; i < 2; i++) { From a52e24c7fcc3c5ce3588a14e3663c00868d36623 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 1 Nov 2022 16:15:37 +0000 Subject: [PATCH 0600/1477] btrfs: fix inode list leak during backref walking at resolve_indirect_refs() [ Upstream commit 5614dc3a47e3310fbc77ea3b67eaadd1c6417bf1 ] During backref walking, at resolve_indirect_refs(), if we get an error we jump to the 'out' label and call ulist_free() on the 'parents' ulist, which frees all the elements in the ulist - however that does not free any inode lists that may be attached to elements, through the 'aux' field of a ulist node, so we end up leaking lists if we have any attached to the unodes. Fix this by calling free_leaf_list() instead of ulist_free() when we exit from resolve_indirect_refs(). The static function free_leaf_list() is moved up for this to be possible and it's slightly simplified by removing unnecessary code. Fixes: 3301958b7c1d ("Btrfs: add inodes before dropping the extent lock in find_all_leafs") Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/backref.c | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 92cb16c0e5ee..70c1c15266d6 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -647,6 +647,18 @@ unode_aux_to_inode_list(struct ulist_node *node) return (struct extent_inode_elem *)(uintptr_t)node->aux; } +static void free_leaf_list(struct ulist *ulist) +{ + struct ulist_node *node; + struct ulist_iterator uiter; + + ULIST_ITER_INIT(&uiter); + while ((node = ulist_next(ulist, &uiter))) + free_inode_elem_list(unode_aux_to_inode_list(node)); + + ulist_free(ulist); +} + /* * We maintain three separate rbtrees: one for direct refs, one for * indirect refs which have a key, and one for indirect refs which do not @@ -761,7 +773,11 @@ static int resolve_indirect_refs(struct btrfs_fs_info *fs_info, cond_resched(); } out: - ulist_free(parents); + /* + * We may have inode lists attached to refs in the parents ulist, so we + * must free them before freeing the ulist and its refs. + */ + free_leaf_list(parents); return ret; } @@ -1413,24 +1429,6 @@ out: return ret; } -static void free_leaf_list(struct ulist *blocks) -{ - struct ulist_node *node = NULL; - struct extent_inode_elem *eie; - struct ulist_iterator uiter; - - ULIST_ITER_INIT(&uiter); - while ((node = ulist_next(blocks, &uiter))) { - if (!node->aux) - continue; - eie = unode_aux_to_inode_list(node); - free_inode_elem_list(eie); - node->aux = 0; - } - - ulist_free(blocks); -} - /* * Finds all leafs with a reference to the specified combination of bytenr and * offset. key_list_head will point to a list of corresponding keys (caller must From 61e06128113711df0534c404fb6bb528eb7d2332 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 1 Nov 2022 16:15:38 +0000 Subject: [PATCH 0601/1477] btrfs: fix inode list leak during backref walking at find_parent_nodes() [ Upstream commit 92876eec382a0f19f33d09d2c939e9ca49038ae5 ] During backref walking, at find_parent_nodes(), if we are dealing with a data extent and we get an error while resolving the indirect backrefs, at resolve_indirect_refs(), or in the while loop that iterates over the refs in the direct refs rbtree, we end up leaking the inode lists attached to the direct refs we have in the direct refs rbtree that were not yet added to the refs ulist passed as argument to find_parent_nodes(). Since they were not yet added to the refs ulist and prelim_release() does not free the lists, on error the caller can only free the lists attached to the refs that were added to the refs ulist, all the remaining refs get their inode lists never freed, therefore leaking their memory. Fix this by having prelim_release() always free any attached inode list to each ref found in the rbtree, and have find_parent_nodes() set the ref's inode list to NULL once it transfers ownership of the inode list to a ref added to the refs ulist passed to find_parent_nodes(). Fixes: 86d5f9944252 ("btrfs: convert prelimary reference tracking to use rbtrees") Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/backref.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 70c1c15266d6..6942707f8b03 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -288,8 +288,10 @@ static void prelim_release(struct preftree *preftree) struct prelim_ref *ref, *next_ref; rbtree_postorder_for_each_entry_safe(ref, next_ref, - &preftree->root.rb_root, rbnode) + &preftree->root.rb_root, rbnode) { + free_inode_elem_list(ref->inode_list); free_pref(ref); + } preftree->root = RB_ROOT_CACHED; preftree->count = 0; @@ -1388,6 +1390,12 @@ again: if (ret < 0) goto out; ref->inode_list = eie; + /* + * We transferred the list ownership to the ref, + * so set to NULL to avoid a double free in case + * an error happens after this. + */ + eie = NULL; } ret = ulist_add_merge_ptr(refs, ref->parent, ref->inode_list, @@ -1413,6 +1421,14 @@ again: eie->next = ref->inode_list; } eie = NULL; + /* + * We have transferred the inode list ownership from + * this ref to the ref we added to the 'refs' ulist. + * So set this ref's inode list to NULL to avoid + * use-after-free when our caller uses it or double + * frees in case an error happens before we return. + */ + ref->inode_list = NULL; } cond_resched(); } From 0a0dead4ad1a2e2a9bdf133ef45111d7c8daef84 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 1 Nov 2022 16:15:39 +0000 Subject: [PATCH 0602/1477] btrfs: fix ulist leaks in error paths of qgroup self tests [ Upstream commit d37de92b38932d40e4a251e876cc388f9aee5f42 ] In the test_no_shared_qgroup() and test_multiple_refs() qgroup self tests, if we fail to add the tree ref, remove the extent item or remove the extent ref, we are returning from the test function without freeing the "old_roots" ulist that was allocated by the previous calls to btrfs_find_all_roots(). Fix that by calling ulist_free() before returning. Fixes: 442244c96332 ("btrfs: qgroup: Switch self test to extent-oriented qgroup mechanism.") Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/tests/qgroup-tests.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c index ce1ca8e73c2d..c4b31dccc184 100644 --- a/fs/btrfs/tests/qgroup-tests.c +++ b/fs/btrfs/tests/qgroup-tests.c @@ -237,8 +237,10 @@ static int test_no_shared_qgroup(struct btrfs_root *root, ret = insert_normal_tree_ref(root, nodesize, nodesize, 0, BTRFS_FS_TREE_OBJECTID); - if (ret) + if (ret) { + ulist_free(old_roots); return ret; + } ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false); @@ -273,8 +275,10 @@ static int test_no_shared_qgroup(struct btrfs_root *root, } ret = remove_extent_item(root, nodesize, nodesize); - if (ret) + if (ret) { + ulist_free(old_roots); return -EINVAL; + } ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false); @@ -338,8 +342,10 @@ static int test_multiple_refs(struct btrfs_root *root, ret = insert_normal_tree_ref(root, nodesize, nodesize, 0, BTRFS_FS_TREE_OBJECTID); - if (ret) + if (ret) { + ulist_free(old_roots); return ret; + } ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false); @@ -373,8 +379,10 @@ static int test_multiple_refs(struct btrfs_root *root, ret = add_tree_ref(root, nodesize, nodesize, 0, BTRFS_FIRST_FREE_OBJECTID); - if (ret) + if (ret) { + ulist_free(old_roots); return ret; + } ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false); @@ -414,8 +422,10 @@ static int test_multiple_refs(struct btrfs_root *root, ret = remove_extent_ref(root, nodesize, nodesize, 0, BTRFS_FIRST_FREE_OBJECTID); - if (ret) + if (ret) { + ulist_free(old_roots); return ret; + } ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false); From cb1c012099ef5904cd468bdb8d6fcdfdd9bcb569 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Wed, 5 Oct 2022 00:27:18 +0300 Subject: [PATCH 0603/1477] Bluetooth: L2CAP: Fix use-after-free caused by l2cap_reassemble_sdu [ Upstream commit 3aff8aaca4e36dc8b17eaa011684881a80238966 ] Fix the race condition between the following two flows that run in parallel: 1. l2cap_reassemble_sdu -> chan->ops->recv (l2cap_sock_recv_cb) -> __sock_queue_rcv_skb. 2. bt_sock_recvmsg -> skb_recv_datagram, skb_free_datagram. An SKB can be queued by the first flow and immediately dequeued and freed by the second flow, therefore the callers of l2cap_reassemble_sdu can't use the SKB after that function returns. However, some places continue accessing struct l2cap_ctrl that resides in the SKB's CB for a short time after l2cap_reassemble_sdu returns, leading to a use-after-free condition (the stack trace is below, line numbers for kernel 5.19.8). Fix it by keeping a local copy of struct l2cap_ctrl. BUG: KASAN: use-after-free in l2cap_rx_state_recv (net/bluetooth/l2cap_core.c:6906) bluetooth Read of size 1 at addr ffff88812025f2f0 by task kworker/u17:3/43169 Workqueue: hci0 hci_rx_work [bluetooth] Call Trace: dump_stack_lvl (lib/dump_stack.c:107 (discriminator 4)) print_report.cold (mm/kasan/report.c:314 mm/kasan/report.c:429) ? l2cap_rx_state_recv (net/bluetooth/l2cap_core.c:6906) bluetooth kasan_report (mm/kasan/report.c:162 mm/kasan/report.c:493) ? l2cap_rx_state_recv (net/bluetooth/l2cap_core.c:6906) bluetooth l2cap_rx_state_recv (net/bluetooth/l2cap_core.c:6906) bluetooth l2cap_rx (net/bluetooth/l2cap_core.c:7236 net/bluetooth/l2cap_core.c:7271) bluetooth ret_from_fork (arch/x86/entry/entry_64.S:306) Allocated by task 43169: kasan_save_stack (mm/kasan/common.c:39) __kasan_slab_alloc (mm/kasan/common.c:45 mm/kasan/common.c:436 mm/kasan/common.c:469) kmem_cache_alloc_node (mm/slab.h:750 mm/slub.c:3243 mm/slub.c:3293) __alloc_skb (net/core/skbuff.c:414) l2cap_recv_frag (./include/net/bluetooth/bluetooth.h:425 net/bluetooth/l2cap_core.c:8329) bluetooth l2cap_recv_acldata (net/bluetooth/l2cap_core.c:8442) bluetooth hci_rx_work (net/bluetooth/hci_core.c:3642 net/bluetooth/hci_core.c:3832) bluetooth process_one_work (kernel/workqueue.c:2289) worker_thread (./include/linux/list.h:292 kernel/workqueue.c:2437) kthread (kernel/kthread.c:376) ret_from_fork (arch/x86/entry/entry_64.S:306) Freed by task 27920: kasan_save_stack (mm/kasan/common.c:39) kasan_set_track (mm/kasan/common.c:45) kasan_set_free_info (mm/kasan/generic.c:372) ____kasan_slab_free (mm/kasan/common.c:368 mm/kasan/common.c:328) slab_free_freelist_hook (mm/slub.c:1780) kmem_cache_free (mm/slub.c:3536 mm/slub.c:3553) skb_free_datagram (./include/net/sock.h:1578 ./include/net/sock.h:1639 net/core/datagram.c:323) bt_sock_recvmsg (net/bluetooth/af_bluetooth.c:295) bluetooth l2cap_sock_recvmsg (net/bluetooth/l2cap_sock.c:1212) bluetooth sock_read_iter (net/socket.c:1087) new_sync_read (./include/linux/fs.h:2052 fs/read_write.c:401) vfs_read (fs/read_write.c:482) ksys_read (fs/read_write.c:620) do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120) Link: https://lore.kernel.org/linux-bluetooth/CAKErNvoqga1WcmoR3-0875esY6TVWFQDandbVZncSiuGPBQXLA@mail.gmail.com/T/#u Fixes: d2a7ac5d5d3a ("Bluetooth: Add the ERTM receive state machine") Fixes: 4b51dae96731 ("Bluetooth: Add streaming mode receive and incoming packet classifier") Signed-off-by: Maxim Mikityanskiy Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/l2cap_core.c | 48 ++++++++++++++++++++++++++++++++------ 1 file changed, 41 insertions(+), 7 deletions(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 83dd76e9196f..0327c513191c 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -6874,6 +6874,7 @@ static int l2cap_rx_state_recv(struct l2cap_chan *chan, struct l2cap_ctrl *control, struct sk_buff *skb, u8 event) { + struct l2cap_ctrl local_control; int err = 0; bool skb_in_use = false; @@ -6898,15 +6899,32 @@ static int l2cap_rx_state_recv(struct l2cap_chan *chan, chan->buffer_seq = chan->expected_tx_seq; skb_in_use = true; + /* l2cap_reassemble_sdu may free skb, hence invalidate + * control, so make a copy in advance to use it after + * l2cap_reassemble_sdu returns and to avoid the race + * condition, for example: + * + * The current thread calls: + * l2cap_reassemble_sdu + * chan->ops->recv == l2cap_sock_recv_cb + * __sock_queue_rcv_skb + * Another thread calls: + * bt_sock_recvmsg + * skb_recv_datagram + * skb_free_datagram + * Then the current thread tries to access control, but + * it was freed by skb_free_datagram. + */ + local_control = *control; err = l2cap_reassemble_sdu(chan, skb, control); if (err) break; - if (control->final) { + if (local_control.final) { if (!test_and_clear_bit(CONN_REJ_ACT, &chan->conn_state)) { - control->final = 0; - l2cap_retransmit_all(chan, control); + local_control.final = 0; + l2cap_retransmit_all(chan, &local_control); l2cap_ertm_send(chan); } } @@ -7286,11 +7304,27 @@ static int l2cap_rx(struct l2cap_chan *chan, struct l2cap_ctrl *control, static int l2cap_stream_rx(struct l2cap_chan *chan, struct l2cap_ctrl *control, struct sk_buff *skb) { + /* l2cap_reassemble_sdu may free skb, hence invalidate control, so store + * the txseq field in advance to use it after l2cap_reassemble_sdu + * returns and to avoid the race condition, for example: + * + * The current thread calls: + * l2cap_reassemble_sdu + * chan->ops->recv == l2cap_sock_recv_cb + * __sock_queue_rcv_skb + * Another thread calls: + * bt_sock_recvmsg + * skb_recv_datagram + * skb_free_datagram + * Then the current thread tries to access control, but it was freed by + * skb_free_datagram. + */ + u16 txseq = control->txseq; + BT_DBG("chan %p, control %p, skb %p, state %d", chan, control, skb, chan->rx_state); - if (l2cap_classify_txseq(chan, control->txseq) == - L2CAP_TXSEQ_EXPECTED) { + if (l2cap_classify_txseq(chan, txseq) == L2CAP_TXSEQ_EXPECTED) { l2cap_pass_to_tx(chan, control); BT_DBG("buffer_seq %d->%d", chan->buffer_seq, @@ -7313,8 +7347,8 @@ static int l2cap_stream_rx(struct l2cap_chan *chan, struct l2cap_ctrl *control, } } - chan->last_acked_seq = control->txseq; - chan->expected_tx_seq = __next_seq(chan, control->txseq); + chan->last_acked_seq = txseq; + chan->expected_tx_seq = __next_seq(chan, txseq); return 0; } From d9ec6e2fbd4a565b2345d4852f586b7ae3ab41fd Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Mon, 17 Oct 2022 15:58:13 +0800 Subject: [PATCH 0604/1477] Bluetooth: L2CAP: fix use-after-free in l2cap_conn_del() [ Upstream commit 0d0e2d032811280b927650ff3c15fe5020e82533 ] When l2cap_recv_frame() is invoked to receive data, and the cid is L2CAP_CID_A2MP, if the channel does not exist, it will create a channel. However, after a channel is created, the hold operation of the channel is not performed. In this case, the value of channel reference counting is 1. As a result, after hci_error_reset() is triggered, l2cap_conn_del() invokes the close hook function of A2MP to release the channel. Then l2cap_chan_unlock(chan) will trigger UAF issue. The process is as follows: Receive data: l2cap_data_channel() a2mp_channel_create() --->channel ref is 2 l2cap_chan_put() --->channel ref is 1 Triger event: hci_error_reset() hci_dev_do_close() ... l2cap_disconn_cfm() l2cap_conn_del() l2cap_chan_hold() --->channel ref is 2 l2cap_chan_del() --->channel ref is 1 a2mp_chan_close_cb() --->channel ref is 0, release channel l2cap_chan_unlock() --->UAF of channel The detailed Call Trace is as follows: BUG: KASAN: use-after-free in __mutex_unlock_slowpath+0xa6/0x5e0 Read of size 8 at addr ffff8880160664b8 by task kworker/u11:1/7593 Workqueue: hci0 hci_error_reset Call Trace: dump_stack_lvl+0xcd/0x134 print_report.cold+0x2ba/0x719 kasan_report+0xb1/0x1e0 kasan_check_range+0x140/0x190 __mutex_unlock_slowpath+0xa6/0x5e0 l2cap_conn_del+0x404/0x7b0 l2cap_disconn_cfm+0x8c/0xc0 hci_conn_hash_flush+0x11f/0x260 hci_dev_close_sync+0x5f5/0x11f0 hci_dev_do_close+0x2d/0x70 hci_error_reset+0x9e/0x140 process_one_work+0x98a/0x1620 worker_thread+0x665/0x1080 kthread+0x2e4/0x3a0 ret_from_fork+0x1f/0x30 Allocated by task 7593: kasan_save_stack+0x1e/0x40 __kasan_kmalloc+0xa9/0xd0 l2cap_chan_create+0x40/0x930 amp_mgr_create+0x96/0x990 a2mp_channel_create+0x7d/0x150 l2cap_recv_frame+0x51b8/0x9a70 l2cap_recv_acldata+0xaa3/0xc00 hci_rx_work+0x702/0x1220 process_one_work+0x98a/0x1620 worker_thread+0x665/0x1080 kthread+0x2e4/0x3a0 ret_from_fork+0x1f/0x30 Freed by task 7593: kasan_save_stack+0x1e/0x40 kasan_set_track+0x21/0x30 kasan_set_free_info+0x20/0x30 ____kasan_slab_free+0x167/0x1c0 slab_free_freelist_hook+0x89/0x1c0 kfree+0xe2/0x580 l2cap_chan_put+0x22a/0x2d0 l2cap_conn_del+0x3fc/0x7b0 l2cap_disconn_cfm+0x8c/0xc0 hci_conn_hash_flush+0x11f/0x260 hci_dev_close_sync+0x5f5/0x11f0 hci_dev_do_close+0x2d/0x70 hci_error_reset+0x9e/0x140 process_one_work+0x98a/0x1620 worker_thread+0x665/0x1080 kthread+0x2e4/0x3a0 ret_from_fork+0x1f/0x30 Last potentially related work creation: kasan_save_stack+0x1e/0x40 __kasan_record_aux_stack+0xbe/0xd0 call_rcu+0x99/0x740 netlink_release+0xe6a/0x1cf0 __sock_release+0xcd/0x280 sock_close+0x18/0x20 __fput+0x27c/0xa90 task_work_run+0xdd/0x1a0 exit_to_user_mode_prepare+0x23c/0x250 syscall_exit_to_user_mode+0x19/0x50 do_syscall_64+0x42/0x80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Second to last potentially related work creation: kasan_save_stack+0x1e/0x40 __kasan_record_aux_stack+0xbe/0xd0 call_rcu+0x99/0x740 netlink_release+0xe6a/0x1cf0 __sock_release+0xcd/0x280 sock_close+0x18/0x20 __fput+0x27c/0xa90 task_work_run+0xdd/0x1a0 exit_to_user_mode_prepare+0x23c/0x250 syscall_exit_to_user_mode+0x19/0x50 do_syscall_64+0x42/0x80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Fixes: d0be8347c623 ("Bluetooth: L2CAP: Fix use-after-free caused by l2cap_chan_put") Signed-off-by: Zhengchao Shao Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/l2cap_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 0327c513191c..7f37e22d9242 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -7604,6 +7604,7 @@ static void l2cap_data_channel(struct l2cap_conn *conn, u16 cid, return; } + l2cap_chan_hold(chan); l2cap_chan_lock(chan); } else { BT_DBG("unknown cid 0x%4.4x", cid); From 634f066d02bdb22a26da7deb0c7617ab1a65fc9d Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Mon, 31 Oct 2022 21:26:45 +0800 Subject: [PATCH 0605/1477] net: mdio: fix undefined behavior in bit shift for __mdiobus_register [ Upstream commit 40e4eb324c59e11fcb927aa46742d28aba6ecb8a ] Shifting signed 32-bit value by 31 bits is undefined, so changing significant bit to unsigned. The UBSAN warning calltrace like below: UBSAN: shift-out-of-bounds in drivers/net/phy/mdio_bus.c:586:27 left shift of 1 by 31 places cannot be represented in type 'int' Call Trace: dump_stack_lvl+0x7d/0xa5 dump_stack+0x15/0x1b ubsan_epilogue+0xe/0x4e __ubsan_handle_shift_out_of_bounds+0x1e7/0x20c __mdiobus_register+0x49d/0x4e0 fixed_mdio_bus_init+0xd8/0x12d do_one_initcall+0x76/0x430 kernel_init_freeable+0x3b3/0x422 kernel_init+0x24/0x1e0 ret_from_fork+0x1f/0x30 Fixes: 4fd5f812c23c ("phylib: allow incremental scanning of an mii bus") Signed-off-by: Gaosheng Cui Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20221031132645.168421-1-cuigaosheng1@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/phy/mdio_bus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index c1cbdac4b376..77ba6c3c7a09 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -574,7 +574,7 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner) } for (i = 0; i < PHY_MAX_ADDR; i++) { - if ((bus->phy_mask & (1 << i)) == 0) { + if ((bus->phy_mask & BIT(i)) == 0) { struct phy_device *phydev; phydev = mdiobus_scan(bus, i); From 1c89642e7f2b7ecc9635610653f5c2f0276c0051 Mon Sep 17 00:00:00 2001 From: Chen Zhongjin Date: Tue, 1 Nov 2022 20:15:52 +0800 Subject: [PATCH 0606/1477] net, neigh: Fix null-ptr-deref in neigh_table_clear() [ Upstream commit f8017317cb0b279b8ab98b0f3901a2e0ac880dad ] When IPv6 module gets initialized but hits an error in the middle, kenel panic with: KASAN: null-ptr-deref in range [0x0000000000000598-0x000000000000059f] CPU: 1 PID: 361 Comm: insmod Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) RIP: 0010:__neigh_ifdown.isra.0+0x24b/0x370 RSP: 0018:ffff888012677908 EFLAGS: 00000202 ... Call Trace: neigh_table_clear+0x94/0x2d0 ndisc_cleanup+0x27/0x40 [ipv6] inet6_init+0x21c/0x2cb [ipv6] do_one_initcall+0xd3/0x4d0 do_init_module+0x1ae/0x670 ... Kernel panic - not syncing: Fatal exception When ipv6 initialization fails, it will try to cleanup and calls: neigh_table_clear() neigh_ifdown(tbl, NULL) pneigh_queue_purge(&tbl->proxy_queue, dev_net(dev == NULL)) # dev_net(NULL) triggers null-ptr-deref. Fix it by passing NULL to pneigh_queue_purge() in neigh_ifdown() if dev is NULL, to make kernel not panic immediately. Fixes: 66ba215cb513 ("neigh: fix possible DoS due to net iface start/stop loop") Signed-off-by: Chen Zhongjin Reviewed-by: Eric Dumazet Reviewed-by: Denis V. Lunev Link: https://lore.kernel.org/r/20221101121552.21890-1-chenzhongjin@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/core/neighbour.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 434c5aab83ea..f6f580e9d282 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -373,7 +373,7 @@ static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev, write_lock_bh(&tbl->lock); neigh_flush_dev(tbl, dev, skip_perm); pneigh_ifdown_and_unlock(tbl, dev); - pneigh_queue_purge(&tbl->proxy_queue, dev_net(dev)); + pneigh_queue_purge(&tbl->proxy_queue, dev ? dev_net(dev) : NULL); if (skb_queue_empty_lockless(&tbl->proxy_queue)) del_timer_sync(&tbl->proxy_timer); return 0; From 5dbb47ee89762da433cd8458788d7640c85f1a07 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Wed, 2 Nov 2022 10:06:10 +0800 Subject: [PATCH 0607/1477] ipv6: fix WARNING in ip6_route_net_exit_late() [ Upstream commit 768b3c745fe5789f2430bdab02f35a9ad1148d97 ] During the initialization of ip6_route_net_init_late(), if file ipv6_route or rt6_stats fails to be created, the initialization is successful by default. Therefore, the ipv6_route or rt6_stats file doesn't be found during the remove in ip6_route_net_exit_late(). It will cause WRNING. The following is the stack information: name 'rt6_stats' WARNING: CPU: 0 PID: 9 at fs/proc/generic.c:712 remove_proc_entry+0x389/0x460 Modules linked in: Workqueue: netns cleanup_net RIP: 0010:remove_proc_entry+0x389/0x460 PKRU: 55555554 Call Trace: ops_exit_list+0xb0/0x170 cleanup_net+0x4ea/0xb00 process_one_work+0x9bf/0x1710 worker_thread+0x665/0x1080 kthread+0x2e4/0x3a0 ret_from_fork+0x1f/0x30 Fixes: cdb1876192db ("[NETNS][IPV6] route6 - create route6 proc files for the namespace") Signed-off-by: Zhengchao Shao Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20221102020610.351330-1-shaozhengchao@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv6/route.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index cdf215442d37..803d1aa83140 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -6405,10 +6405,16 @@ static void __net_exit ip6_route_net_exit(struct net *net) static int __net_init ip6_route_net_init_late(struct net *net) { #ifdef CONFIG_PROC_FS - proc_create_net("ipv6_route", 0, net->proc_net, &ipv6_route_seq_ops, - sizeof(struct ipv6_route_iter)); - proc_create_net_single("rt6_stats", 0444, net->proc_net, - rt6_stats_seq_show, NULL); + if (!proc_create_net("ipv6_route", 0, net->proc_net, + &ipv6_route_seq_ops, + sizeof(struct ipv6_route_iter))) + return -ENOMEM; + + if (!proc_create_net_single("rt6_stats", 0444, net->proc_net, + rt6_stats_seq_show, NULL)) { + remove_proc_entry("ipv6_route", net->proc_net); + return -ENOMEM; + } #endif return 0; } From 8225bdaec5b043712e613ec6237842227e066695 Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Tue, 1 Feb 2022 17:47:33 +0000 Subject: [PATCH 0608/1477] drm/msm/hdmi: Remove spurious IRQF_ONESHOT flag [ Upstream commit 088604d37e23e9ec01a501d0e3630bc4f02027a0 ] Quoting the header comments, IRQF_ONESHOT is "Used by threaded interrupts which need to keep the irq line disabled until the threaded handler has been run.". When applied to an interrupt that doesn't request a threaded irq then IRQF_ONESHOT has a lesser known (undocumented?) side effect, which it to disable the forced threading of irqs. For "normal" kernels if there is no thread_fn then IRQF_ONESHOT is a nop. In this case disabling forced threading is not appropriate because the driver calls wake_up_all() (via msm_hdmi_i2c_irq) and also directly uses the regular spinlock API for locking (in msm_hdmi_hdcp_irq() ). Neither of these APIs can be called from no-thread interrupt handlers on PREEMPT_RT systems. Fix this by removing IRQF_ONESHOT. Signed-off-by: Daniel Thompson Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20220201174734.196718-3-daniel.thompson@linaro.org Signed-off-by: Dmitry Baryshkov Stable-dep-of: 152d394842bb ("drm/msm/hdmi: fix IRQ lifetime") Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/hdmi/hdmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.c b/drivers/gpu/drm/msm/hdmi/hdmi.c index 47796e12b432..28823fd94f9f 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi.c @@ -327,7 +327,7 @@ int msm_hdmi_modeset_init(struct hdmi *hdmi, } ret = devm_request_irq(&pdev->dev, hdmi->irq, - msm_hdmi_irq, IRQF_TRIGGER_HIGH | IRQF_ONESHOT, + msm_hdmi_irq, IRQF_TRIGGER_HIGH, "hdmi_isr", hdmi); if (ret < 0) { DRM_DEV_ERROR(dev->dev, "failed to request IRQ%u: %d\n", From 59400c9b0d0767044212ca042c00440066f0909f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 13 Sep 2022 10:53:18 +0200 Subject: [PATCH 0609/1477] drm/msm/hdmi: fix IRQ lifetime [ Upstream commit 152d394842bb564148e68b92486a87db0bf54859 ] Device-managed resources allocated post component bind must be tied to the lifetime of the aggregate DRM device or they will not necessarily be released when binding of the aggregate device is deferred. This is specifically true for the HDMI IRQ, which will otherwise remain requested so that the next bind attempt fails when requesting the IRQ a second time. Fix this by tying the device-managed lifetime of the HDMI IRQ to the DRM device so that it is released when bind fails. Fixes: 067fef372c73 ("drm/msm/hdmi: refactor bind/init") Cc: stable@vger.kernel.org # 3.19 Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Tested-by: Kuogee Hsieh Reviewed-by: Kuogee Hsieh Patchwork: https://patchwork.freedesktop.org/patch/502666/ Link: https://lore.kernel.org/r/20220913085320.8577-9-johan+linaro@kernel.org Signed-off-by: Abhinav Kumar Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/hdmi/hdmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.c b/drivers/gpu/drm/msm/hdmi/hdmi.c index 28823fd94f9f..bd65dc9b8892 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi.c @@ -326,7 +326,7 @@ int msm_hdmi_modeset_init(struct hdmi *hdmi, goto fail; } - ret = devm_request_irq(&pdev->dev, hdmi->irq, + ret = devm_request_irq(dev->dev, hdmi->irq, msm_hdmi_irq, IRQF_TRIGGER_HIGH, "hdmi_isr", hdmi); if (ret < 0) { From a06721767cfca54f36d82cb265a35ce3f9585705 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Thu, 13 Oct 2022 11:32:48 +0200 Subject: [PATCH 0610/1477] mmc: sdhci-esdhc-imx: Propagate ESDHC_FLAG_HS400* only on 8bit bus [ Upstream commit 1ed5c3b22fc78735c539e4767832aea58db6761c ] The core issues the warning "drop HS400 support since no 8-bit bus" when one of the ESDHC_FLAG_HS400* flags is set on a non 8bit capable host. To avoid this warning set these flags only on hosts that actually can do 8bit, i.e. have bus-width = <8> set in the device tree. Signed-off-by: Sascha Hauer Reviewed-by: Haibo Chen Fixes: 029e2476f9e6 ("mmc: sdhci-esdhc-imx: add HS400_ES support for i.MX8QXP") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20221013093248.2220802-1-s.hauer@pengutronix.de Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/mmc/host/sdhci-esdhc-imx.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index a4bd85b200a3..be4e5cdda1fa 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -1692,6 +1692,10 @@ static int sdhci_esdhc_imx_probe(struct platform_device *pdev) host->mmc_host_ops.execute_tuning = usdhc_execute_tuning; } + err = sdhci_esdhc_imx_probe_dt(pdev, host, imx_data); + if (err) + goto disable_ahb_clk; + if (imx_data->socdata->flags & ESDHC_FLAG_MAN_TUNING) sdhci_esdhc_ops.platform_execute_tuning = esdhc_executing_tuning; @@ -1699,13 +1703,15 @@ static int sdhci_esdhc_imx_probe(struct platform_device *pdev) if (imx_data->socdata->flags & ESDHC_FLAG_ERR004536) host->quirks |= SDHCI_QUIRK_BROKEN_ADMA; - if (imx_data->socdata->flags & ESDHC_FLAG_HS400) + if (host->caps & MMC_CAP_8_BIT_DATA && + imx_data->socdata->flags & ESDHC_FLAG_HS400) host->quirks2 |= SDHCI_QUIRK2_CAPS_BIT63_FOR_HS400; if (imx_data->socdata->flags & ESDHC_FLAG_BROKEN_AUTO_CMD23) host->quirks2 |= SDHCI_QUIRK2_ACMD23_BROKEN; - if (imx_data->socdata->flags & ESDHC_FLAG_HS400_ES) { + if (host->caps & MMC_CAP_8_BIT_DATA && + imx_data->socdata->flags & ESDHC_FLAG_HS400_ES) { host->mmc->caps2 |= MMC_CAP2_HS400_ES; host->mmc_host_ops.hs400_enhanced_strobe = esdhc_hs400_enhanced_strobe; @@ -1727,13 +1733,6 @@ static int sdhci_esdhc_imx_probe(struct platform_device *pdev) goto disable_ahb_clk; } - if (of_id) - err = sdhci_esdhc_imx_probe_dt(pdev, host, imx_data); - else - err = sdhci_esdhc_imx_probe_nondt(pdev, host, imx_data); - if (err) - goto disable_ahb_clk; - sdhci_esdhc_imx_hwinit(host); err = sdhci_add_host(host); From afeae13b8a3c35b450c28c08c5e954a5a6c7df39 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Thu, 11 Mar 2021 18:14:32 +0800 Subject: [PATCH 0611/1477] mmc: sdhci-pci: Avoid comma separated statements [ Upstream commit ba8734dfbe87b9dd68c9d525c0a3a52e8da42167 ] Use semicolons. Signed-off-by: Jisheng Zhang Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/20210311181432.6385cd2b@xhacker.debian Signed-off-by: Ulf Hansson Stable-dep-of: 9dc0033e4658 ("mmc: sdhci-pci-core: Disable ES for ASUS BIOS on Jasper Lake") Signed-off-by: Sasha Levin --- drivers/mmc/host/sdhci-pci-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c index a78b060ce847..422ea3a1817a 100644 --- a/drivers/mmc/host/sdhci-pci-core.c +++ b/drivers/mmc/host/sdhci-pci-core.c @@ -975,7 +975,7 @@ static int glk_emmc_probe_slot(struct sdhci_pci_slot *slot) slot->host->mmc->caps2 |= MMC_CAP2_CQE; if (slot->chip->pdev->device != PCI_DEVICE_ID_INTEL_GLK_EMMC) { - slot->host->mmc->caps2 |= MMC_CAP2_HS400_ES, + slot->host->mmc->caps2 |= MMC_CAP2_HS400_ES; slot->host->mmc_host_ops.hs400_enhanced_strobe = intel_hs400_enhanced_strobe; slot->host->mmc->caps2 |= MMC_CAP2_CQE_DCMD; From e975d7aecad7df71021947e6797ca719e06598cb Mon Sep 17 00:00:00 2001 From: Patrick Thompson Date: Thu, 13 Oct 2022 17:00:17 -0400 Subject: [PATCH 0612/1477] mmc: sdhci-pci-core: Disable ES for ASUS BIOS on Jasper Lake [ Upstream commit 9dc0033e4658d6f9d9952c3c0c6be3ec25bc2985 ] Enhanced Strobe (ES) does not work correctly on the ASUS 1100 series of devices. Jasper Lake eMMCs (pci_id 8086:4dc4) are supposed to support ES. There are also two system families under the series, thus this is being scoped to the ASUS BIOS. The failing ES prevents the installer from writing to disk. Falling back to HS400 without ES fixes the issue. Signed-off-by: Patrick Thompson Fixes: 315e3bd7ac19 ("mmc: sdhci-pci: Add support for Intel JSL") Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20221013210017.3751025-1-ptf@google.com Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/mmc/host/sdhci-pci-core.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c index 422ea3a1817a..7eb9a62ee074 100644 --- a/drivers/mmc/host/sdhci-pci-core.c +++ b/drivers/mmc/host/sdhci-pci-core.c @@ -967,6 +967,12 @@ static bool glk_broken_cqhci(struct sdhci_pci_slot *slot) dmi_match(DMI_SYS_VENDOR, "IRBIS")); } +static bool jsl_broken_hs400es(struct sdhci_pci_slot *slot) +{ + return slot->chip->pdev->device == PCI_DEVICE_ID_INTEL_JSL_EMMC && + dmi_match(DMI_BIOS_VENDOR, "ASUSTeK COMPUTER INC."); +} + static int glk_emmc_probe_slot(struct sdhci_pci_slot *slot) { int ret = byt_emmc_probe_slot(slot); @@ -975,9 +981,11 @@ static int glk_emmc_probe_slot(struct sdhci_pci_slot *slot) slot->host->mmc->caps2 |= MMC_CAP2_CQE; if (slot->chip->pdev->device != PCI_DEVICE_ID_INTEL_GLK_EMMC) { - slot->host->mmc->caps2 |= MMC_CAP2_HS400_ES; - slot->host->mmc_host_ops.hs400_enhanced_strobe = - intel_hs400_enhanced_strobe; + if (!jsl_broken_hs400es(slot)) { + slot->host->mmc->caps2 |= MMC_CAP2_HS400_ES; + slot->host->mmc_host_ops.hs400_enhanced_strobe = + intel_hs400_enhanced_strobe; + } slot->host->mmc->caps2 |= MMC_CAP2_CQE_DCMD; } From f8c86d7829525e1b0ea4ee232b967661cf7de4cb Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 13 Jan 2022 16:35:53 +0100 Subject: [PATCH 0613/1477] video/fbdev/stifb: Implement the stifb_fillrect() function [ Upstream commit 9c379c65241707e44072139d782bc2dfec9b4ab3 ] The stifb driver (for Artist/HCRX graphics on PA-RISC) was missing the fillrect function. Tested on a 715/64 PA-RISC machine and in qemu. Signed-off-by: Helge Deller Stable-dep-of: 776d875fd4cb ("fbdev: stifb: Fall back to cfb_fillrect() on 32-bit HCRX cards") Signed-off-by: Sasha Levin --- drivers/video/fbdev/stifb.c | 45 +++++++++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/drivers/video/fbdev/stifb.c b/drivers/video/fbdev/stifb.c index b0470f4f595e..7753e586e65a 100644 --- a/drivers/video/fbdev/stifb.c +++ b/drivers/video/fbdev/stifb.c @@ -1041,6 +1041,47 @@ stifb_copyarea(struct fb_info *info, const struct fb_copyarea *area) SETUP_FB(fb); } +#define ARTIST_VRAM_SIZE 0x000804 +#define ARTIST_VRAM_SRC 0x000808 +#define ARTIST_VRAM_SIZE_TRIGGER_WINFILL 0x000a04 +#define ARTIST_VRAM_DEST_TRIGGER_BLOCKMOVE 0x000b00 +#define ARTIST_SRC_BM_ACCESS 0x018008 +#define ARTIST_FGCOLOR 0x018010 +#define ARTIST_BGCOLOR 0x018014 +#define ARTIST_BITMAP_OP 0x01801c + +static void +stifb_fillrect(struct fb_info *info, const struct fb_fillrect *rect) +{ + struct stifb_info *fb = container_of(info, struct stifb_info, info); + + if (rect->rop != ROP_COPY) + return cfb_fillrect(info, rect); + + SETUP_HW(fb); + + if (fb->info.var.bits_per_pixel == 32) { + WRITE_WORD(0xBBA0A000, fb, REG_10); + + NGLE_REALLY_SET_IMAGE_PLANEMASK(fb, 0xffffffff); + } else { + WRITE_WORD(fb->id == S9000_ID_HCRX ? 0x13a02000 : 0x13a01000, fb, REG_10); + + NGLE_REALLY_SET_IMAGE_PLANEMASK(fb, 0xff); + } + + WRITE_WORD(0x03000300, fb, ARTIST_BITMAP_OP); + WRITE_WORD(0x2ea01000, fb, ARTIST_SRC_BM_ACCESS); + NGLE_QUICK_SET_DST_BM_ACCESS(fb, 0x2ea01000); + NGLE_REALLY_SET_IMAGE_FG_COLOR(fb, rect->color); + WRITE_WORD(0, fb, ARTIST_BGCOLOR); + + NGLE_SET_DSTXY(fb, (rect->dx << 16) | (rect->dy)); + SET_LENXY_START_RECFILL(fb, (rect->width << 16) | (rect->height)); + + SETUP_FB(fb); +} + static void __init stifb_init_display(struct stifb_info *fb) { @@ -1105,7 +1146,7 @@ static const struct fb_ops stifb_ops = { .owner = THIS_MODULE, .fb_setcolreg = stifb_setcolreg, .fb_blank = stifb_blank, - .fb_fillrect = cfb_fillrect, + .fb_fillrect = stifb_fillrect, .fb_copyarea = stifb_copyarea, .fb_imageblit = cfb_imageblit, }; @@ -1297,7 +1338,7 @@ static int __init stifb_init_fb(struct sti_struct *sti, int bpp_pref) goto out_err0; } info->screen_size = fix->smem_len; - info->flags = FBINFO_DEFAULT | FBINFO_HWACCEL_COPYAREA; + info->flags = FBINFO_HWACCEL_COPYAREA | FBINFO_HWACCEL_FILLRECT; info->pseudo_palette = &fb->pseudo_palette; /* This has to be done !!! */ From ec54104febdcf4342cac8d718f16202484c6bf2b Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 14 Oct 2022 20:01:17 +0200 Subject: [PATCH 0614/1477] fbdev: stifb: Fall back to cfb_fillrect() on 32-bit HCRX cards [ Upstream commit 776d875fd4cbb3884860ea7f63c3958f02b0c80e ] When the text console is scrolling text upwards it calls the fillrect() function to empty the new line. The current implementation doesn't seem to work correctly on HCRX cards in 32-bit mode and leave garbage in that line instead. Fix it by falling back to standard cfb_fillrect() in that case. Signed-off-by: Helge Deller Cc: Signed-off-by: Sasha Levin --- drivers/video/fbdev/stifb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/video/fbdev/stifb.c b/drivers/video/fbdev/stifb.c index 7753e586e65a..3feb6e40d56d 100644 --- a/drivers/video/fbdev/stifb.c +++ b/drivers/video/fbdev/stifb.c @@ -1055,7 +1055,8 @@ stifb_fillrect(struct fb_info *info, const struct fb_fillrect *rect) { struct stifb_info *fb = container_of(info, struct stifb_info, info); - if (rect->rop != ROP_COPY) + if (rect->rop != ROP_COPY || + (fb->id == S9000_ID_HCRX && fb->info.var.bits_per_pixel == 32)) return cfb_fillrect(info, rect); SETUP_HW(fb); From 85e458369c0f48d3b27592c0804dedbeb466c4db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Thu, 17 Mar 2022 12:43:16 +0100 Subject: [PATCH 0615/1477] mtd: parsers: bcm47xxpart: print correct offset on read error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 4c38eded807043f40f4dc49da6df097f9dcac393 ] mtd_read() gets called with offset + 0x8000 as argument so use the same value in pr_err(). Signed-off-by: Rafał Miłecki Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20220317114316.29827-1-zajec5@gmail.com Stable-dep-of: 05e258c6ec66 ("mtd: parsers: bcm47xxpart: Fix halfblock reads") Signed-off-by: Sasha Levin --- drivers/mtd/parsers/bcm47xxpart.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/parsers/bcm47xxpart.c b/drivers/mtd/parsers/bcm47xxpart.c index 6012a10f10c8..50fcf4c2174b 100644 --- a/drivers/mtd/parsers/bcm47xxpart.c +++ b/drivers/mtd/parsers/bcm47xxpart.c @@ -237,7 +237,7 @@ static int bcm47xxpart_parse(struct mtd_info *master, (uint8_t *)buf); if (err && !mtd_is_bitflip(err)) { pr_err("mtd_read error while parsing (offset: 0x%X): %d\n", - offset, err); + offset + 0x8000, err); continue; } From 37bb57908dd38d37599bca3e16d13785e81a7900 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 18 Oct 2022 11:11:29 +0200 Subject: [PATCH 0616/1477] mtd: parsers: bcm47xxpart: Fix halfblock reads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 05e258c6ec669d6d18c494ea03d35962d6f5b545 ] There is some code in the parser that tries to read 0x8000 bytes into a block to "read in the middle" of the block. Well that only works if the block is also 0x10000 bytes all the time, else we get these parse errors as we reach the end of the flash: spi-nor spi0.0: mx25l1606e (2048 Kbytes) mtd_read error while parsing (offset: 0x200000): -22 mtd_read error while parsing (offset: 0x201000): -22 (...) Fix the code to do what I think was intended. Cc: stable@vger.kernel.org Fixes: f0501e81fbaa ("mtd: bcm47xxpart: alternative MAGIC for board_data partition") Cc: Rafał Miłecki Cc: Florian Fainelli Signed-off-by: Linus Walleij Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20221018091129.280026-1-linus.walleij@linaro.org Signed-off-by: Sasha Levin --- drivers/mtd/parsers/bcm47xxpart.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/parsers/bcm47xxpart.c b/drivers/mtd/parsers/bcm47xxpart.c index 50fcf4c2174b..13daf9bffd08 100644 --- a/drivers/mtd/parsers/bcm47xxpart.c +++ b/drivers/mtd/parsers/bcm47xxpart.c @@ -233,11 +233,11 @@ static int bcm47xxpart_parse(struct mtd_info *master, } /* Read middle of the block */ - err = mtd_read(master, offset + 0x8000, 0x4, &bytes_read, + err = mtd_read(master, offset + (blocksize / 2), 0x4, &bytes_read, (uint8_t *)buf); if (err && !mtd_is_bitflip(err)) { pr_err("mtd_read error while parsing (offset: 0x%X): %d\n", - offset + 0x8000, err); + offset + (blocksize / 2), err); continue; } From efc6420d65ae90035dd4ad5322b2eef7acc5d516 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 24 Oct 2022 17:27:19 +0300 Subject: [PATCH 0617/1477] xhci-pci: Set runtime PM as default policy on all xHC 1.2 or later devices [ Upstream commit a611bf473d1f77b70f7188b5577542cb39b4701b ] For optimal power consumption of USB4 routers the XHCI PCIe endpoint used for tunneling must be in D3. Historically this is accomplished by a long list of PCIe IDs that correspond to these endpoints because the xhci_hcd driver will not default to allowing runtime PM for all devices. As both AMD and Intel have released new products with new XHCI controllers this list continues to grow. In reviewing the XHCI specification v1.2 on page 607 there is already a requirement that the PCI power management states D3hot and D3cold must be supported. In the quirk list, use this to indicate that runtime PM should be allowed on XHCI controllers. The following controllers are known to be xHC 1.2 and dropped explicitly: * AMD Yellow Carp * Intel Alder Lake * Intel Meteor Lake * Intel Raptor Lake [keep PCI ID for Alder Lake PCH for recently added quirk -Mathias] Cc: stable@vger.kernel.org Suggested-by: Mathias Nyman Link: https://www.intel.com/content/dam/www/public/us/en/documents/technical-specifications/extensible-host-controler-interface-usb-xhci.pdf Signed-off-by: Mario Limonciello Reviewed-by: Mika Westerberg Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20221024142720.4122053-4-mathias.nyman@intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/host/xhci-pci.c | 32 ++++---------------------------- 1 file changed, 4 insertions(+), 28 deletions(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 64d5a593682b..0ee11a937011 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -58,24 +58,12 @@ #define PCI_DEVICE_ID_INTEL_CML_XHCI 0xa3af #define PCI_DEVICE_ID_INTEL_TIGER_LAKE_XHCI 0x9a13 #define PCI_DEVICE_ID_INTEL_MAPLE_RIDGE_XHCI 0x1138 -#define PCI_DEVICE_ID_INTEL_ALDER_LAKE_XHCI 0x461e -#define PCI_DEVICE_ID_INTEL_ALDER_LAKE_N_XHCI 0x464e -#define PCI_DEVICE_ID_INTEL_ALDER_LAKE_PCH_XHCI 0x51ed -#define PCI_DEVICE_ID_INTEL_RAPTOR_LAKE_XHCI 0xa71e -#define PCI_DEVICE_ID_INTEL_METEOR_LAKE_XHCI 0x7ec0 +#define PCI_DEVICE_ID_INTEL_ALDER_LAKE_PCH_XHCI 0x51ed #define PCI_DEVICE_ID_AMD_PROMONTORYA_4 0x43b9 #define PCI_DEVICE_ID_AMD_PROMONTORYA_3 0x43ba #define PCI_DEVICE_ID_AMD_PROMONTORYA_2 0x43bb #define PCI_DEVICE_ID_AMD_PROMONTORYA_1 0x43bc -#define PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_1 0x161a -#define PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_2 0x161b -#define PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_3 0x161d -#define PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_4 0x161e -#define PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_5 0x15d6 -#define PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_6 0x15d7 -#define PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_7 0x161c -#define PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_8 0x161f #define PCI_DEVICE_ID_ASMEDIA_1042_XHCI 0x1042 #define PCI_DEVICE_ID_ASMEDIA_1042A_XHCI 0x1142 @@ -268,12 +256,7 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == PCI_DEVICE_ID_INTEL_TITAN_RIDGE_DD_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_ICE_LAKE_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_TIGER_LAKE_XHCI || - pdev->device == PCI_DEVICE_ID_INTEL_MAPLE_RIDGE_XHCI || - pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_XHCI || - pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_N_XHCI || - pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_PCH_XHCI || - pdev->device == PCI_DEVICE_ID_INTEL_RAPTOR_LAKE_XHCI || - pdev->device == PCI_DEVICE_ID_INTEL_METEOR_LAKE_XHCI)) + pdev->device == PCI_DEVICE_ID_INTEL_MAPLE_RIDGE_XHCI)) xhci->quirks |= XHCI_DEFAULT_PM_RUNTIME_ALLOW; if (pdev->vendor == PCI_VENDOR_ID_ETRON && @@ -342,15 +325,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == PCI_DEVICE_ID_AMD_PROMONTORYA_4)) xhci->quirks |= XHCI_NO_SOFT_RETRY; - if (pdev->vendor == PCI_VENDOR_ID_AMD && - (pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_1 || - pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_2 || - pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_3 || - pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_4 || - pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_5 || - pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_6 || - pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_7 || - pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_8)) + /* xHC spec requires PCI devices to support D3hot and D3cold */ + if (xhci->hci_version >= 0x120) xhci->quirks |= XHCI_DEFAULT_PM_RUNTIME_ALLOW; if (xhci->quirks & XHCI_RESET_ON_RESUME) From 6b24d9c2acdac63cb161f677a86a4e380523cf46 Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Fri, 16 Sep 2022 15:01:36 +0200 Subject: [PATCH 0618/1477] s390/boot: add secure boot trailer [ Upstream commit aa127a069ef312aca02b730d5137e1778d0c3ba7 ] This patch enhances the kernel image adding a trailer as required for secure boot by future firmware versions. Cc: # 5.2+ Signed-off-by: Peter Oberparleiter Reviewed-by: Sven Schnelle Signed-off-by: Vasily Gorbik Signed-off-by: Sasha Levin --- arch/s390/boot/compressed/vmlinux.lds.S | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/s390/boot/compressed/vmlinux.lds.S b/arch/s390/boot/compressed/vmlinux.lds.S index 9427e2cd0c15..11bf3919610e 100644 --- a/arch/s390/boot/compressed/vmlinux.lds.S +++ b/arch/s390/boot/compressed/vmlinux.lds.S @@ -91,8 +91,17 @@ SECTIONS _compressed_start = .; *(.vmlinux.bin.compressed) _compressed_end = .; - FILL(0xff); - . = ALIGN(4096); + } + +#define SB_TRAILER_SIZE 32 + /* Trailer needed for Secure Boot */ + . += SB_TRAILER_SIZE; /* make sure .sb.trailer does not overwrite the previous section */ + . = ALIGN(4096) - SB_TRAILER_SIZE; + .sb.trailer : { + QUAD(0) + QUAD(0) + QUAD(0) + QUAD(0x000000207a49504c) } _end = .; From 3144ce557440b72724dfbe4075881934c900873f Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Sat, 13 Aug 2022 00:44:14 +0200 Subject: [PATCH 0619/1477] media: rkisp1: Initialize color space on resizer sink and source pads [ Upstream commit 83b9296e399367862845d3b19984444fc756bd61 ] Initialize the four color space fields on the sink and source video pads of the resizer in the .init_cfg() operation. The resizer can't perform any color space conversion, so set the sink and source color spaces to the same defaults, which match the ISP source video pad default. Signed-off-by: Laurent Pinchart Reviewed-by: Paul Elder Reviewed-by: Dafna Hirschfeld Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/staging/media/rkisp1/rkisp1-resizer.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/staging/media/rkisp1/rkisp1-resizer.c b/drivers/staging/media/rkisp1/rkisp1-resizer.c index 4dcc342ac2b2..76f17dd7670f 100644 --- a/drivers/staging/media/rkisp1/rkisp1-resizer.c +++ b/drivers/staging/media/rkisp1/rkisp1-resizer.c @@ -500,6 +500,10 @@ static int rkisp1_rsz_init_config(struct v4l2_subdev *sd, sink_fmt->height = RKISP1_DEFAULT_HEIGHT; sink_fmt->field = V4L2_FIELD_NONE; sink_fmt->code = RKISP1_DEF_FMT; + sink_fmt->colorspace = V4L2_COLORSPACE_SRGB; + sink_fmt->xfer_func = V4L2_XFER_FUNC_SRGB; + sink_fmt->ycbcr_enc = V4L2_YCBCR_ENC_601; + sink_fmt->quantization = V4L2_QUANTIZATION_LIM_RANGE; sink_crop = v4l2_subdev_get_try_crop(sd, cfg, RKISP1_RSZ_PAD_SINK); sink_crop->width = RKISP1_DEFAULT_WIDTH; From c46759e3703b9d0472e34ebf3e8d3fdde207d09c Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Tue, 23 Aug 2022 17:11:36 +0200 Subject: [PATCH 0620/1477] media: rkisp1: Zero v4l2_subdev_format fields in when validating links [ Upstream commit c53e3a049f35978a150526671587fd46b1ae7ca1 ] The local sd_fmt variable in rkisp1_capture_link_validate() has uninitialized fields, which causes random failures when calling the subdev .get_fmt() operation. Fix it by initializing the variable when declaring it, which zeros all other fields. Signed-off-by: Laurent Pinchart Reviewed-by: Paul Elder Reviewed-by: Dafna Hirschfeld Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/staging/media/rkisp1/rkisp1-capture.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/staging/media/rkisp1/rkisp1-capture.c b/drivers/staging/media/rkisp1/rkisp1-capture.c index 0c934ca5adaa..8936f5a81680 100644 --- a/drivers/staging/media/rkisp1/rkisp1-capture.c +++ b/drivers/staging/media/rkisp1/rkisp1-capture.c @@ -1258,11 +1258,12 @@ static int rkisp1_capture_link_validate(struct media_link *link) struct rkisp1_capture *cap = video_get_drvdata(vdev); const struct rkisp1_capture_fmt_cfg *fmt = rkisp1_find_fmt_cfg(cap, cap->pix.fmt.pixelformat); - struct v4l2_subdev_format sd_fmt; + struct v4l2_subdev_format sd_fmt = { + .which = V4L2_SUBDEV_FORMAT_ACTIVE, + .pad = link->source->index, + }; int ret; - sd_fmt.which = V4L2_SUBDEV_FORMAT_ACTIVE; - sd_fmt.pad = link->source->index; ret = v4l2_subdev_call(sd, pad, get_fmt, NULL, &sd_fmt); if (ret) return ret; From 1609231f86760c1f6a429de7913dd795b9faa08c Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 24 Aug 2022 09:02:42 +0200 Subject: [PATCH 0621/1477] media: s5p_cec: limit msg.len to CEC_MAX_MSG_SIZE [ Upstream commit 93f65ce036863893c164ca410938e0968964b26c ] I expect that the hardware will have limited this to 16, but just in case it hasn't, check for this corner case. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/cec/platform/s5p/s5p_cec.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/media/cec/platform/s5p/s5p_cec.c b/drivers/media/cec/platform/s5p/s5p_cec.c index 028a09a7531e..102f1af01000 100644 --- a/drivers/media/cec/platform/s5p/s5p_cec.c +++ b/drivers/media/cec/platform/s5p/s5p_cec.c @@ -115,6 +115,8 @@ static irqreturn_t s5p_cec_irq_handler(int irq, void *priv) dev_dbg(cec->dev, "Buffer overrun (worker did not process previous message)\n"); cec->rx = STATE_BUSY; cec->msg.len = status >> 24; + if (cec->msg.len > CEC_MAX_MSG_SIZE) + cec->msg.len = CEC_MAX_MSG_SIZE; cec->msg.rx_status = CEC_RX_STATUS_OK; s5p_cec_get_rx_buf(cec, cec->msg.len, cec->msg.msg); From 7fdc58d8c213b7adccb6daa9e9cf14e1fadff269 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 24 Aug 2022 09:06:19 +0200 Subject: [PATCH 0622/1477] media: cros-ec-cec: limit msg.len to CEC_MAX_MSG_SIZE [ Upstream commit 2dc73b48665411a08c4e5f0f823dea8510761603 ] I expect that the hardware will have limited this to 16, but just in case it hasn't, check for this corner case. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/cec/platform/cros-ec/cros-ec-cec.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/media/cec/platform/cros-ec/cros-ec-cec.c b/drivers/media/cec/platform/cros-ec/cros-ec-cec.c index 2d95e16cd248..f66699d5dc66 100644 --- a/drivers/media/cec/platform/cros-ec/cros-ec-cec.c +++ b/drivers/media/cec/platform/cros-ec/cros-ec-cec.c @@ -44,6 +44,8 @@ static void handle_cec_message(struct cros_ec_cec *cros_ec_cec) uint8_t *cec_message = cros_ec->event_data.data.cec_message; unsigned int len = cros_ec->event_size; + if (len > CEC_MAX_MSG_SIZE) + len = CEC_MAX_MSG_SIZE; cros_ec_cec->rx_msg.len = len; memcpy(cros_ec_cec->rx_msg.msg, cec_message, len); From c5fd54a65c35fba67f9426e8ab92b0f0e96b616c Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 30 Aug 2022 07:59:24 +0200 Subject: [PATCH 0623/1477] media: dvb-frontends/drxk: initialize err to 0 [ Upstream commit 20694e96ca089ce6693c2348f8f628ee621e4e74 ] Fix a compiler warning: drivers/media/dvb-frontends/drxk_hard.c: In function 'drxk_read_ucblocks': drivers/media/dvb-frontends/drxk_hard.c:6673:21: warning: 'err' may be used uninitialized [-Wmaybe-uninitialized] 6673 | *ucblocks = (u32) err; | ^~~~~~~~~ drivers/media/dvb-frontends/drxk_hard.c:6663:13: note: 'err' was declared here 6663 | u16 err; | ^~~ Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/dvb-frontends/drxk_hard.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/dvb-frontends/drxk_hard.c b/drivers/media/dvb-frontends/drxk_hard.c index a57470bf71bf..2134e25096aa 100644 --- a/drivers/media/dvb-frontends/drxk_hard.c +++ b/drivers/media/dvb-frontends/drxk_hard.c @@ -6672,7 +6672,7 @@ static int drxk_read_snr(struct dvb_frontend *fe, u16 *snr) static int drxk_read_ucblocks(struct dvb_frontend *fe, u32 *ucblocks) { struct drxk_state *state = fe->demodulator_priv; - u16 err; + u16 err = 0; dprintk(1, "\n"); From be6e22f54623d8a856a4f167b25be73c2ff1ff80 Mon Sep 17 00:00:00 2001 From: Hangyu Hua Date: Tue, 6 Sep 2022 09:46:30 +0200 Subject: [PATCH 0624/1477] media: meson: vdec: fix possible refcount leak in vdec_probe() [ Upstream commit 7718999356234d9cc6a11b4641bb773928f1390f ] v4l2_device_unregister need to be called to put the refcount got by v4l2_device_register when vdec_probe fails or vdec_remove is called. Signed-off-by: Hangyu Hua Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/staging/media/meson/vdec/vdec.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/staging/media/meson/vdec/vdec.c b/drivers/staging/media/meson/vdec/vdec.c index 5ccb3846c879..7a818ca15b37 100644 --- a/drivers/staging/media/meson/vdec/vdec.c +++ b/drivers/staging/media/meson/vdec/vdec.c @@ -1109,6 +1109,7 @@ static int vdec_probe(struct platform_device *pdev) err_vdev_release: video_device_release(vdev); + v4l2_device_unregister(&core->v4l2_dev); return ret; } @@ -1117,6 +1118,7 @@ static int vdec_remove(struct platform_device *pdev) struct amvdec_core *core = platform_get_drvdata(pdev); video_unregister_device(core->vdev_dec); + v4l2_device_unregister(&core->v4l2_dev); return 0; } From 9edf20e5a1d805855e78f241cf221d741b50d482 Mon Sep 17 00:00:00 2001 From: Ashish Kalra Date: Wed, 5 Oct 2022 16:32:53 +0000 Subject: [PATCH 0625/1477] ACPI: APEI: Fix integer overflow in ghes_estatus_pool_init() [ Upstream commit 43d2748394c3feb86c0c771466f5847e274fc043 ] Change num_ghes from int to unsigned int, preventing an overflow and causing subsequent vmalloc() to fail. The overflow happens in ghes_estatus_pool_init() when calculating len during execution of the statement below as both multiplication operands here are signed int: len += (num_ghes * GHES_ESOURCE_PREALLOC_MAX_SIZE); The following call trace is observed because of this bug: [ 9.317108] swapper/0: vmalloc error: size 18446744071562596352, exceeds total pages, mode:0xcc0(GFP_KERNEL), nodemask=(null),cpuset=/,mems_allowed=0-1 [ 9.317131] Call Trace: [ 9.317134] [ 9.317137] dump_stack_lvl+0x49/0x5f [ 9.317145] dump_stack+0x10/0x12 [ 9.317146] warn_alloc.cold+0x7b/0xdf [ 9.317150] ? __device_attach+0x16a/0x1b0 [ 9.317155] __vmalloc_node_range+0x702/0x740 [ 9.317160] ? device_add+0x17f/0x920 [ 9.317164] ? dev_set_name+0x53/0x70 [ 9.317166] ? platform_device_add+0xf9/0x240 [ 9.317168] __vmalloc_node+0x49/0x50 [ 9.317170] ? ghes_estatus_pool_init+0x43/0xa0 [ 9.317176] vmalloc+0x21/0x30 [ 9.317177] ghes_estatus_pool_init+0x43/0xa0 [ 9.317179] acpi_hest_init+0x129/0x19c [ 9.317185] acpi_init+0x434/0x4a4 [ 9.317188] ? acpi_sleep_proc_init+0x2a/0x2a [ 9.317190] do_one_initcall+0x48/0x200 [ 9.317195] kernel_init_freeable+0x221/0x284 [ 9.317200] ? rest_init+0xe0/0xe0 [ 9.317204] kernel_init+0x1a/0x130 [ 9.317205] ret_from_fork+0x22/0x30 [ 9.317208] Signed-off-by: Ashish Kalra [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/apei/ghes.c | 2 +- include/acpi/ghes.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 5206fd3b7867..9bdb5bd5fda6 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -163,7 +163,7 @@ static void ghes_unmap(void __iomem *vaddr, enum fixed_addresses fixmap_idx) clear_fixmap(fixmap_idx); } -int ghes_estatus_pool_init(int num_ghes) +int ghes_estatus_pool_init(unsigned int num_ghes) { unsigned long addr, len; int rc; diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h index 34fb3431a8f3..292a5c40bd0c 100644 --- a/include/acpi/ghes.h +++ b/include/acpi/ghes.h @@ -71,7 +71,7 @@ int ghes_register_vendor_record_notifier(struct notifier_block *nb); void ghes_unregister_vendor_record_notifier(struct notifier_block *nb); #endif -int ghes_estatus_pool_init(int num_ghes); +int ghes_estatus_pool_init(unsigned int num_ghes); /* From drivers/edac/ghes_edac.c */ From ff06067b7086c0929360fc6101d2a59544ea18a8 Mon Sep 17 00:00:00 2001 From: Uday Shankar Date: Fri, 23 Sep 2022 18:02:42 -0600 Subject: [PATCH 0626/1477] scsi: core: Restrict legal sdev_state transitions via sysfs [ Upstream commit 2331ce6126be8864b39490e705286b66e2344aac ] Userspace can currently write to sysfs to transition sdev_state to RUNNING or OFFLINE from any source state. This causes issues because proper transitioning out of some states involves steps besides just changing sdev_state, so allowing userspace to change sdev_state regardless of the source state can result in inconsistencies; e.g. with ISCSI we can end up with sdev_state == SDEV_RUNNING while the device queue is quiesced. Any task attempting I/O on the device will then hang, and in more recent kernels, iscsid will hang as well. More detail about this bug is provided in my first attempt: https://groups.google.com/g/open-iscsi/c/PNKca4HgPDs/m/CXaDkntOAQAJ Link: https://lore.kernel.org/r/20220924000241.2967323-1-ushankar@purestorage.com Signed-off-by: Uday Shankar Suggested-by: Mike Christie Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/scsi_sysfs.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 42db9c52208e..6cc4d0792e3d 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -815,6 +815,14 @@ store_state_field(struct device *dev, struct device_attribute *attr, } mutex_lock(&sdev->state_mutex); + switch (sdev->sdev_state) { + case SDEV_RUNNING: + case SDEV_OFFLINE: + break; + default: + mutex_unlock(&sdev->state_mutex); + return -EINVAL; + } if (sdev->sdev_state == SDEV_RUNNING && state == SDEV_RUNNING) { ret = 0; } else { From 496eb203d046700805bd2b6644898c304ab3b970 Mon Sep 17 00:00:00 2001 From: Samuel Bailey Date: Wed, 5 Oct 2022 19:51:23 +0100 Subject: [PATCH 0627/1477] HID: saitek: add madcatz variant of MMO7 mouse device ID [ Upstream commit 79425b297f56bd481c6e97700a9a4e44c7bcfa35 ] The MadCatz variant of the MMO7 mouse has the ID 0738:1713 and the same quirks as the Saitek variant. Signed-off-by: Samuel Bailey Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-quirks.c | 1 + drivers/hid/hid-saitek.c | 2 ++ 3 files changed, 4 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index bb096dfb7b36..3350a41d7dce 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -827,6 +827,7 @@ #define USB_DEVICE_ID_MADCATZ_BEATPAD 0x4540 #define USB_DEVICE_ID_MADCATZ_RAT5 0x1705 #define USB_DEVICE_ID_MADCATZ_RAT9 0x1709 +#define USB_DEVICE_ID_MADCATZ_MMO7 0x1713 #define USB_VENDOR_ID_MCC 0x09db #define USB_DEVICE_ID_MCC_PMD1024LS 0x0076 diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index 2ab71d717bb0..4a8014e9a511 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -609,6 +609,7 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_USB_DEVICE(USB_VENDOR_ID_SAITEK, USB_DEVICE_ID_SAITEK_MMO7) }, { HID_USB_DEVICE(USB_VENDOR_ID_MADCATZ, USB_DEVICE_ID_MADCATZ_RAT5) }, { HID_USB_DEVICE(USB_VENDOR_ID_MADCATZ, USB_DEVICE_ID_MADCATZ_RAT9) }, + { HID_USB_DEVICE(USB_VENDOR_ID_MADCATZ, USB_DEVICE_ID_MADCATZ_MMO7) }, #endif #if IS_ENABLED(CONFIG_HID_SAMSUNG) { HID_USB_DEVICE(USB_VENDOR_ID_SAMSUNG, USB_DEVICE_ID_SAMSUNG_IR_REMOTE) }, diff --git a/drivers/hid/hid-saitek.c b/drivers/hid/hid-saitek.c index c7bf14c01960..b84e975977c4 100644 --- a/drivers/hid/hid-saitek.c +++ b/drivers/hid/hid-saitek.c @@ -187,6 +187,8 @@ static const struct hid_device_id saitek_devices[] = { .driver_data = SAITEK_RELEASE_MODE_RAT7 }, { HID_USB_DEVICE(USB_VENDOR_ID_SAITEK, USB_DEVICE_ID_SAITEK_MMO7), .driver_data = SAITEK_RELEASE_MODE_MMO7 }, + { HID_USB_DEVICE(USB_VENDOR_ID_MADCATZ, USB_DEVICE_ID_MADCATZ_MMO7), + .driver_data = SAITEK_RELEASE_MODE_MMO7 }, { } }; From 5bf8c7798b1c165da70cafd6af3900eb14a39cdd Mon Sep 17 00:00:00 2001 From: Danijel Slivka Date: Tue, 4 Oct 2022 15:39:44 +0200 Subject: [PATCH 0628/1477] drm/amdgpu: set vm_update_mode=0 as default for Sienna Cichlid in SRIOV case [ Upstream commit 65f8682b9aaae20c2cdee993e6fe52374ad513c9 ] For asic with VF MMIO access protection avoid using CPU for VM table updates. CPU pagetable updates have issues with HDP flush as VF MMIO access protection blocks write to mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL register during sriov runtime. v3: introduce virtualization capability flag AMDGPU_VF_MMIO_ACCESS_PROTECT which indicates that VF MMIO write access is not allowed in sriov runtime Signed-off-by: Danijel Slivka Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 6 ++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 4 ++++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 6 +++++- 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 16bfb36c27e4..d6f295103595 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -670,6 +670,12 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev) adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE; } + if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID) + /* VF MMIO access (except mailbox range) from CPU + * will be blocked during sriov runtime + */ + adev->virt.caps |= AMDGPU_VF_MMIO_ACCESS_PROTECT; + /* we have the ability to check now */ if (amdgpu_sriov_vf(adev)) { switch (adev->asic_type) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 77b9d37bfa1b..aea49bad914f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -31,6 +31,7 @@ #define AMDGPU_SRIOV_CAPS_IS_VF (1 << 2) /* this GPU is a virtual function */ #define AMDGPU_PASSTHROUGH_MODE (1 << 3) /* thw whole GPU is pass through for VM */ #define AMDGPU_SRIOV_CAPS_RUNTIME (1 << 4) /* is out of full access mode */ +#define AMDGPU_VF_MMIO_ACCESS_PROTECT (1 << 5) /* MMIO write access is not allowed in sriov runtime */ /* all asic after AI use this offset */ #define mmRCC_IOV_FUNC_IDENTIFIER 0xDE5 @@ -241,6 +242,9 @@ struct amdgpu_virt { #define amdgpu_passthrough(adev) \ ((adev)->virt.caps & AMDGPU_PASSTHROUGH_MODE) +#define amdgpu_sriov_vf_mmio_access_protection(adev) \ +((adev)->virt.caps & AMDGPU_VF_MMIO_ACCESS_PROTECT) + static inline bool is_virtual_machine(void) { #ifdef CONFIG_X86 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 635601d8b131..45b1f00c5968 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -3200,7 +3200,11 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) */ #ifdef CONFIG_X86_64 if (amdgpu_vm_update_mode == -1) { - if (amdgpu_gmc_vram_full_visible(&adev->gmc)) + /* For asic with VF MMIO access protection + * avoid using CPU for VM table updates + */ + if (amdgpu_gmc_vram_full_visible(&adev->gmc) && + !amdgpu_sriov_vf_mmio_access_protection(adev)) adev->vm_manager.vm_update_mode = AMDGPU_VM_USE_CPU_FOR_COMPUTE; else From c40b4d604b3e39262f380deb09504a0022fcbc15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20T=C5=AFma?= Date: Tue, 18 Oct 2022 16:03:37 +0200 Subject: [PATCH 0629/1477] i2c: xiic: Add platform module alias MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit b8caf0a0e04583fb71e21495bef84509182227ea ] The missing "platform" alias is required for the mgb4 v4l2 driver to load the i2c controller driver when probing the HW. Signed-off-by: Martin Tůma Acked-by: Michal Simek Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-xiic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c index 8dabb6ffb1a4..3b564e68130b 100644 --- a/drivers/i2c/busses/i2c-xiic.c +++ b/drivers/i2c/busses/i2c-xiic.c @@ -935,6 +935,7 @@ static struct platform_driver xiic_i2c_driver = { module_platform_driver(xiic_i2c_driver); +MODULE_ALIAS("platform:" DRIVER_NAME); MODULE_AUTHOR("info@mocean-labs.com"); MODULE_DESCRIPTION("Xilinx I2C bus driver"); MODULE_LICENSE("GPL v2"); From 160d8904b2b583ac7fb9344bfc68f651c88e9259 Mon Sep 17 00:00:00 2001 From: Jerry Snitselaar Date: Sat, 22 Oct 2022 08:23:52 -0700 Subject: [PATCH 0630/1477] efi/tpm: Pass correct address to memblock_reserve [ Upstream commit f4cd18c5b2000df0c382f6530eeca9141ea41faf ] memblock_reserve() expects a physical address, but the address being passed for the TPM final events log is what was returned from early_memremap(). This results in something like the following: [ 0.000000] memblock_reserve: [0xffffffffff2c0000-0xffffffffff2c00e4] efi_tpm_eventlog_init+0x324/0x370 Pass the address from efi like what is done for the TPM events log. Fixes: c46f3405692d ("tpm: Reserve the TPM final events table") Cc: Matthew Garrett Cc: Jarkko Sakkinen Cc: Bartosz Szczepanek Cc: Ard Biesheuvel Signed-off-by: Jerry Snitselaar Acked-by: Jarkko Sakkinen Signed-off-by: Ard Biesheuvel Signed-off-by: Sasha Levin --- drivers/firmware/efi/tpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/tpm.c b/drivers/firmware/efi/tpm.c index 8f665678e9e3..e8d69bd548f3 100644 --- a/drivers/firmware/efi/tpm.c +++ b/drivers/firmware/efi/tpm.c @@ -97,7 +97,7 @@ int __init efi_tpm_eventlog_init(void) goto out_calc; } - memblock_reserve((unsigned long)final_tbl, + memblock_reserve(efi.tpm_final_log, tbl_size + sizeof(*final_tbl)); efi_tpm_final_log_size = tbl_size; From d7b1e2cbe0a467b300409ccd9008515bc18dceea Mon Sep 17 00:00:00 2001 From: Tim Harvey Date: Thu, 29 Sep 2022 12:52:22 -0700 Subject: [PATCH 0631/1477] ARM: dts: imx6qdl-gw59{10,13}: fix user pushbutton GPIO offset [ Upstream commit bb5ad73941dc3f4e3c2241348f385da6501d50ea ] The GW5910 and GW5913 have a user pushbutton that is tied to the Gateworks System Controller GPIO offset 2. Fix the invalid offset of 0. Fixes: 64bf0a0af18d ("ARM: dts: imx6qdl-gw: add Gateworks System Controller support") Signed-off-by: Tim Harvey Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/imx6qdl-gw5910.dtsi | 2 +- arch/arm/boot/dts/imx6qdl-gw5913.dtsi | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/imx6qdl-gw5910.dtsi b/arch/arm/boot/dts/imx6qdl-gw5910.dtsi index ed4e22259959..852601d5ab6b 100644 --- a/arch/arm/boot/dts/imx6qdl-gw5910.dtsi +++ b/arch/arm/boot/dts/imx6qdl-gw5910.dtsi @@ -31,7 +31,7 @@ user-pb { label = "user_pb"; - gpios = <&gsc_gpio 0 GPIO_ACTIVE_LOW>; + gpios = <&gsc_gpio 2 GPIO_ACTIVE_LOW>; linux,code = ; }; diff --git a/arch/arm/boot/dts/imx6qdl-gw5913.dtsi b/arch/arm/boot/dts/imx6qdl-gw5913.dtsi index 4cd7d290f5b2..7a2628fdd142 100644 --- a/arch/arm/boot/dts/imx6qdl-gw5913.dtsi +++ b/arch/arm/boot/dts/imx6qdl-gw5913.dtsi @@ -28,7 +28,7 @@ user-pb { label = "user_pb"; - gpios = <&gsc_gpio 0 GPIO_ACTIVE_LOW>; + gpios = <&gsc_gpio 2 GPIO_ACTIVE_LOW>; linux,code = ; }; From 29e8e9bfc2f2db615f3dab42e0ba1bc73941b0c2 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Fri, 28 Oct 2022 15:08:27 +0100 Subject: [PATCH 0632/1477] firmware: arm_scmi: Suppress the driver's bind attributes [ Upstream commit fd96fbc8fad35d6b1872c90df8a2f5d721f14d91 ] Suppress the capability to unbind the core SCMI driver since all the SCMI stack protocol drivers depend on it. Fixes: aa4f886f3893 ("firmware: arm_scmi: add basic driver infrastructure for SCMI") Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20221028140833.280091-2-cristian.marussi@arm.com Signed-off-by: Sudeep Holla Signed-off-by: Sasha Levin --- drivers/firmware/arm_scmi/driver.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index 745b7f9eb335..4bc974ead02b 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -942,6 +942,7 @@ MODULE_DEVICE_TABLE(of, scmi_of_match); static struct platform_driver scmi_driver = { .driver = { .name = "arm-scmi", + .suppress_bind_attrs = true, .of_match_table = scmi_of_match, .dev_groups = versions_groups, }, From b743ecf29ca7323406a20b322cffc3dac1c47468 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Fri, 28 Oct 2022 15:08:29 +0100 Subject: [PATCH 0633/1477] firmware: arm_scmi: Make Rx chan_setup fail on memory errors [ Upstream commit be9ba1f7f9e0b565b19f4294f5871da9d654bc6d ] SCMI Rx channels are optional and they can fail to be setup when not present but anyway channels setup routines must bail-out on memory errors. Make channels setup, and related probing, fail when memory errors are reported on Rx channels. Fixes: 5c8a47a5a91d ("firmware: arm_scmi: Make scmi core independent of the transport type") Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20221028140833.280091-4-cristian.marussi@arm.com Signed-off-by: Sudeep Holla Signed-off-by: Sasha Levin --- drivers/firmware/arm_scmi/driver.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index 4bc974ead02b..8d24082848a8 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -711,8 +711,12 @@ scmi_txrx_setup(struct scmi_info *info, struct device *dev, int prot_id) { int ret = scmi_chan_setup(info, dev, prot_id, true); - if (!ret) /* Rx is optional, hence no error check */ - scmi_chan_setup(info, dev, prot_id, false); + if (!ret) { + /* Rx is optional, report only memory errors */ + ret = scmi_chan_setup(info, dev, prot_id, false); + if (ret && ret != -ENOMEM) + ret = 0; + } return ret; } From fc3e2fa0a5fb432927fed3f2cdfda22c41340c16 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Fri, 28 Oct 2022 15:08:33 +0100 Subject: [PATCH 0634/1477] arm64: dts: juno: Add thermal critical trip points [ Upstream commit c4a7b9b587ca1bb4678d48d8be7132492b23a81c ] When thermnal zones are defined, trip points definitions are mandatory. Define a couple of critical trip points for monitoring of existing PMIC and SOC thermal zones. This was lost between txt to yaml conversion and was re-enforced recently via the commit 8c596324232d ("dt-bindings: thermal: Fix missing required property") Cc: Rob Herring Cc: Krzysztof Kozlowski Cc: devicetree@vger.kernel.org Signed-off-by: Cristian Marussi Fixes: f7b636a8d83c ("arm64: dts: juno: add thermal zones for scpi sensors") Link: https://lore.kernel.org/r/20221028140833.280091-8-cristian.marussi@arm.com Signed-off-by: Sudeep Holla Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/arm/juno-base.dtsi | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/arm64/boot/dts/arm/juno-base.dtsi b/arch/arm64/boot/dts/arm/juno-base.dtsi index 2c0161125ece..cb45a2f0537a 100644 --- a/arch/arm64/boot/dts/arm/juno-base.dtsi +++ b/arch/arm64/boot/dts/arm/juno-base.dtsi @@ -595,12 +595,26 @@ polling-delay = <1000>; polling-delay-passive = <100>; thermal-sensors = <&scpi_sensors0 0>; + trips { + pmic_crit0: trip0 { + temperature = <90000>; + hysteresis = <2000>; + type = "critical"; + }; + }; }; soc { polling-delay = <1000>; polling-delay-passive = <100>; thermal-sensors = <&scpi_sensors0 3>; + trips { + soc_crit0: trip0 { + temperature = <80000>; + hysteresis = <2000>; + type = "critical"; + }; + }; }; big_cluster_thermal_zone: big-cluster { From bfd5e62f9a7ee214661cb6f143a3b40ccc63317f Mon Sep 17 00:00:00 2001 From: Chen Zhongjin Date: Thu, 27 Oct 2022 20:13:53 +0800 Subject: [PATCH 0635/1477] i2c: piix4: Fix adapter not be removed in piix4_remove() [ Upstream commit 569bea74c94d37785682b11bab76f557520477cd ] In piix4_probe(), the piix4 adapter will be registered in: piix4_probe() piix4_add_adapters_sb800() / piix4_add_adapter() i2c_add_adapter() Based on the probed device type, piix4_add_adapters_sb800() or single piix4_add_adapter() will be called. For the former case, piix4_adapter_count is set as the number of adapters, while for antoher case it is not set and kept default *zero*. When piix4 is removed, piix4_remove() removes the adapters added in piix4_probe(), basing on the piix4_adapter_count value. Because the count is zero for the single adapter case, the adapter won't be removed and makes the sources allocated for adapter leaked, such as the i2c client and device. These sources can still be accessed by i2c or bus and cause problems. An easily reproduced case is that if a new adapter is registered, i2c will get the leaked adapter and try to call smbus_algorithm, which was already freed: Triggered by: rmmod i2c_piix4 && modprobe max31730 BUG: unable to handle page fault for address: ffffffffc053d860 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page Oops: 0000 [#1] PREEMPT SMP KASAN CPU: 0 PID: 3752 Comm: modprobe Tainted: G Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) RIP: 0010:i2c_default_probe (drivers/i2c/i2c-core-base.c:2259) i2c_core RSP: 0018:ffff888107477710 EFLAGS: 00000246 ... i2c_detect (drivers/i2c/i2c-core-base.c:2302) i2c_core __process_new_driver (drivers/i2c/i2c-core-base.c:1336) i2c_core bus_for_each_dev (drivers/base/bus.c:301) i2c_for_each_dev (drivers/i2c/i2c-core-base.c:1823) i2c_core i2c_register_driver (drivers/i2c/i2c-core-base.c:1861) i2c_core do_one_initcall (init/main.c:1296) do_init_module (kernel/module/main.c:2455) ... ---[ end trace 0000000000000000 ]--- Fix this problem by correctly set piix4_adapter_count as 1 for the single adapter so it can be normally removed. Fixes: 528d53a1592b ("i2c: piix4: Fix probing of reserved ports on AMD Family 16h Model 30h") Signed-off-by: Chen Zhongjin Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-piix4.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c index 8c1b31ed0c42..aa1d3657ab4e 100644 --- a/drivers/i2c/busses/i2c-piix4.c +++ b/drivers/i2c/busses/i2c-piix4.c @@ -961,6 +961,7 @@ static int piix4_probe(struct pci_dev *dev, const struct pci_device_id *id) "", &piix4_main_adapters[0]); if (retval < 0) return retval; + piix4_adapter_count = 1; } /* Check for auxiliary SMBus on some AMD chipsets */ From 6b6f94fb9a74dd2891f11de4e638c6202bc89476 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 31 Oct 2022 16:10:32 -0700 Subject: [PATCH 0636/1477] Bluetooth: L2CAP: Fix accepting connection request for invalid SPSM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 711f8c3fb3db61897080468586b970c87c61d9e4 upstream. The Bluetooth spec states that the valid range for SPSM is from 0x0001-0x00ff so it is invalid to accept values outside of this range: BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 3, Part A page 1059: Table 4.15: L2CAP_LE_CREDIT_BASED_CONNECTION_REQ SPSM ranges CVE: CVE-2022-42896 CC: stable@vger.kernel.org Reported-by: Tamás Koczka Signed-off-by: Luiz Augusto von Dentz Reviewed-by: Tedd Ho-Jeong An Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/l2cap_core.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 7f37e22d9242..0fad4bb69746 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -5808,6 +5808,19 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn, BT_DBG("psm 0x%2.2x scid 0x%4.4x mtu %u mps %u", __le16_to_cpu(psm), scid, mtu, mps); + /* BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 3, Part A + * page 1059: + * + * Valid range: 0x0001-0x00ff + * + * Table 4.15: L2CAP_LE_CREDIT_BASED_CONNECTION_REQ SPSM ranges + */ + if (!psm || __le16_to_cpu(psm) > L2CAP_PSM_LE_DYN_END) { + result = L2CAP_CR_LE_BAD_PSM; + chan = NULL; + goto response; + } + /* Check if we have socket listening on psm */ pchan = l2cap_global_chan_by_psm(BT_LISTEN, psm, &conn->hcon->src, &conn->hcon->dst, LE_LINK); @@ -5988,6 +6001,18 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, psm = req->psm; + /* BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 3, Part A + * page 1059: + * + * Valid range: 0x0001-0x00ff + * + * Table 4.15: L2CAP_LE_CREDIT_BASED_CONNECTION_REQ SPSM ranges + */ + if (!psm || __le16_to_cpu(psm) > L2CAP_PSM_LE_DYN_END) { + result = L2CAP_CR_LE_BAD_PSM; + goto response; + } + BT_DBG("psm 0x%2.2x mtu %u mps %u", __le16_to_cpu(psm), mtu, mps); memset(&pdu, 0, sizeof(pdu)); From 26ca2ac091b49281d73df86111d16e5a76e43bd7 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 31 Oct 2022 16:10:52 -0700 Subject: [PATCH 0637/1477] Bluetooth: L2CAP: Fix attempting to access uninitialized memory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b1a2cd50c0357f243b7435a732b4e62ba3157a2e upstream. On l2cap_parse_conf_req the variable efs is only initialized if remote_efs has been set. CVE: CVE-2022-42895 CC: stable@vger.kernel.org Reported-by: Tamás Koczka Signed-off-by: Luiz Augusto von Dentz Reviewed-by: Tedd Ho-Jeong An Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/l2cap_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 0fad4bb69746..e69e96ef4927 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -3760,7 +3760,8 @@ done: l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc), (unsigned long) &rfc, endptr - ptr); - if (test_bit(FLAG_EFS_ENABLE, &chan->flags)) { + if (remote_efs && + test_bit(FLAG_EFS_ENABLE, &chan->flags)) { chan->remote_id = efs.id; chan->remote_stype = efs.stype; chan->remote_msdu = le16_to_cpu(efs.msdu); From a0e2577cf3cca31e5ad1cbb3f5b68d5d2f5d83ee Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Fri, 13 May 2022 10:35:06 +0800 Subject: [PATCH 0638/1477] block, bfq: protect 'bfqd->queued' by 'bfqd->lock' commit 181490d5321806e537dc5386db5ea640b826bf78 upstream. If bfq_schedule_dispatch() is called from bfq_idle_slice_timer_body(), then 'bfqd->queued' is read without holding 'bfqd->lock'. This is wrong since it can be wrote concurrently. Fix the problem by holding 'bfqd->lock' in such case. Signed-off-by: Yu Kuai Reviewed-by: Jan Kara Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20220513023507.2625717-2-yukuai3@huawei.com Signed-off-by: Jens Axboe Cc: Khazhy Kumykov Signed-off-by: Greg Kroah-Hartman --- block/bfq-iosched.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 592d32a46c4c..7c4b8d0635eb 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -421,6 +421,8 @@ static struct bfq_io_cq *bfq_bic_lookup(struct bfq_data *bfqd, */ void bfq_schedule_dispatch(struct bfq_data *bfqd) { + lockdep_assert_held(&bfqd->lock); + if (bfqd->queued != 0) { bfq_log(bfqd, "schedule dispatch"); blk_mq_run_hw_queues(bfqd->queue, true); @@ -6269,8 +6271,8 @@ bfq_idle_slice_timer_body(struct bfq_data *bfqd, struct bfq_queue *bfqq) bfq_bfqq_expire(bfqd, bfqq, true, reason); schedule_dispatch: - spin_unlock_irqrestore(&bfqd->lock, flags); bfq_schedule_dispatch(bfqd); + spin_unlock_irqrestore(&bfqd->lock, flags); } /* From 54c13d3520ef5b57f5a11cac883fce05d9ce6b52 Mon Sep 17 00:00:00 2001 From: John Veness Date: Fri, 24 Jun 2022 15:07:57 +0100 Subject: [PATCH 0639/1477] ALSA: usb-audio: Add quirks for MacroSilicon MS2100/MS2106 devices commit 6e2c9105e0b743c92a157389d40f00b81bdd09fe upstream. Treat the claimed 96kHz 1ch in the descriptors as 48kHz 2ch, so that the audio stream doesn't sound mono. Also fix initial stream alignment, so that left and right channels are in the correct order. Signed-off-by: John Veness Link: https://lore.kernel.org/r/20220624140757.28758-1-john-linux@pelago.org.uk Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/quirks-table.h | 52 ++++++++++++++++++++++++++++++++++++++++ sound/usb/quirks.c | 1 + 2 files changed, 53 insertions(+) diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index 1ac91c46da3c..a51591f68ae6 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -3656,6 +3656,58 @@ AU0828_DEVICE(0x2040, 0x7270, "Hauppauge", "HVR-950Q"), } }, +/* + * MacroSilicon MS2100/MS2106 based AV capture cards + * + * These claim 96kHz 1ch in the descriptors, but are actually 48kHz 2ch. + * They also need QUIRK_AUDIO_ALIGN_TRANSFER, which makes one wonder if + * they pretend to be 96kHz mono as a workaround for stereo being broken + * by that... + * + * They also have an issue with initial stream alignment that causes the + * channels to be swapped and out of phase, which is dealt with in quirks.c. + */ +{ + USB_AUDIO_DEVICE(0x534d, 0x0021), + .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { + .vendor_name = "MacroSilicon", + .product_name = "MS210x", + .ifnum = QUIRK_ANY_INTERFACE, + .type = QUIRK_COMPOSITE, + .data = &(const struct snd_usb_audio_quirk[]) { + { + .ifnum = 2, + .type = QUIRK_AUDIO_ALIGN_TRANSFER, + }, + { + .ifnum = 2, + .type = QUIRK_AUDIO_STANDARD_MIXER, + }, + { + .ifnum = 3, + .type = QUIRK_AUDIO_FIXED_ENDPOINT, + .data = &(const struct audioformat) { + .formats = SNDRV_PCM_FMTBIT_S16_LE, + .channels = 2, + .iface = 3, + .altsetting = 1, + .altset_idx = 1, + .attributes = 0, + .endpoint = 0x82, + .ep_attr = USB_ENDPOINT_XFER_ISOC | + USB_ENDPOINT_SYNC_ASYNC, + .rates = SNDRV_PCM_RATE_CONTINUOUS, + .rate_min = 48000, + .rate_max = 48000, + } + }, + { + .ifnum = -1 + } + } + } +}, + /* * MacroSilicon MS2109 based HDMI capture cards * diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 41f5d8242478..04a691bc560c 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1508,6 +1508,7 @@ void snd_usb_set_format_quirk(struct snd_usb_substream *subs, case USB_ID(0x2b73, 0x0017): /* Pioneer DJ DJM-250MK2 */ pioneer_djm_set_format_quirk(subs); break; + case USB_ID(0x534d, 0x0021): /* MacroSilicon MS2100/MS2106 */ case USB_ID(0x534d, 0x2109): /* MacroSilicon MS2109 */ subs->stream_offset_adj = 2; break; From 092401142b959de39ba3db8425cffd9b30f7dea9 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 4 Nov 2022 16:37:58 -0700 Subject: [PATCH 0640/1477] fscrypt: simplify master key locking commit 4a4b8721f1a5e4b01e45b3153c68d5a1014b25de upstream. The stated reasons for separating fscrypt_master_key::mk_secret_sem from the standard semaphore contained in every 'struct key' no longer apply. First, due to commit a992b20cd4ee ("fscrypt: add fscrypt_prepare_new_inode() and fscrypt_set_context()"), fscrypt_get_encryption_info() is no longer called from within a filesystem transaction. Second, due to commit d3ec10aa9581 ("KEYS: Don't write out to userspace while holding key semaphore"), the semaphore for the "keyring" key type no longer ranks above page faults. That leaves performance as the only possible reason to keep the separate mk_secret_sem. Specifically, having mk_secret_sem reduces the contention between setup_file_encryption_key() and FS_IOC_{ADD,REMOVE}_ENCRYPTION_KEY. However, these ioctls aren't executed often, so this doesn't seem to be worth the extra complexity. Therefore, simplify the locking design by just using key->sem instead of mk_secret_sem. Link: https://lore.kernel.org/r/20201117032626.320275-1-ebiggers@kernel.org Signed-off-by: Eric Biggers Signed-off-by: Greg Kroah-Hartman --- fs/crypto/fscrypt_private.h | 19 ++++++------------- fs/crypto/hooks.c | 8 +++++--- fs/crypto/keyring.c | 8 +------- fs/crypto/keysetup.c | 20 +++++++++----------- 4 files changed, 21 insertions(+), 34 deletions(-) diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index 052ad40ecdb2..8a0141f7195b 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -439,16 +439,9 @@ struct fscrypt_master_key { * FS_IOC_REMOVE_ENCRYPTION_KEY can be retried, or * FS_IOC_ADD_ENCRYPTION_KEY can add the secret again. * - * Locking: protected by key->sem (outer) and mk_secret_sem (inner). - * The reason for two locks is that key->sem also protects modifying - * mk_users, which ranks it above the semaphore for the keyring key - * type, which is in turn above page faults (via keyring_read). But - * sometimes filesystems call fscrypt_get_encryption_info() from within - * a transaction, which ranks it below page faults. So we need a - * separate lock which protects mk_secret but not also mk_users. + * Locking: protected by this master key's key->sem. */ struct fscrypt_master_key_secret mk_secret; - struct rw_semaphore mk_secret_sem; /* * For v1 policy keys: an arbitrary key descriptor which was assigned by @@ -467,8 +460,8 @@ struct fscrypt_master_key { * * This is NULL for v1 policy keys; those can only be added by root. * - * Locking: in addition to this keyrings own semaphore, this is - * protected by the master key's key->sem, so we can do atomic + * Locking: in addition to this keyring's own semaphore, this is + * protected by this master key's key->sem, so we can do atomic * search+insert. It can also be searched without taking any locks, but * in that case the returned key may have already been removed. */ @@ -510,9 +503,9 @@ is_master_key_secret_present(const struct fscrypt_master_key_secret *secret) /* * The READ_ONCE() is only necessary for fscrypt_drop_inode() and * fscrypt_key_describe(). These run in atomic context, so they can't - * take ->mk_secret_sem and thus 'secret' can change concurrently which - * would be a data race. But they only need to know whether the secret - * *was* present at the time of check, so READ_ONCE() suffices. + * take the key semaphore and thus 'secret' can change concurrently + * which would be a data race. But they only need to know whether the + * secret *was* present at the time of check, so READ_ONCE() suffices. */ return READ_ONCE(secret->size) != 0; } diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c index 4180371bf864..0c6fa5c2d6f3 100644 --- a/fs/crypto/hooks.c +++ b/fs/crypto/hooks.c @@ -139,6 +139,7 @@ int fscrypt_prepare_setflags(struct inode *inode, unsigned int oldflags, unsigned int flags) { struct fscrypt_info *ci; + struct key *key; struct fscrypt_master_key *mk; int err; @@ -154,13 +155,14 @@ int fscrypt_prepare_setflags(struct inode *inode, ci = inode->i_crypt_info; if (ci->ci_policy.version != FSCRYPT_POLICY_V2) return -EINVAL; - mk = ci->ci_master_key->payload.data[0]; - down_read(&mk->mk_secret_sem); + key = ci->ci_master_key; + mk = key->payload.data[0]; + down_read(&key->sem); if (is_master_key_secret_present(&mk->mk_secret)) err = fscrypt_derive_dirhash_key(ci, mk); else err = -ENOKEY; - up_read(&mk->mk_secret_sem); + up_read(&key->sem); return err; } return 0; diff --git a/fs/crypto/keyring.c b/fs/crypto/keyring.c index d7ec52cb3d9a..0b3ffbb4faf4 100644 --- a/fs/crypto/keyring.c +++ b/fs/crypto/keyring.c @@ -347,7 +347,6 @@ static int add_new_master_key(struct fscrypt_master_key_secret *secret, mk->mk_spec = *mk_spec; move_master_key_secret(&mk->mk_secret, secret); - init_rwsem(&mk->mk_secret_sem); refcount_set(&mk->mk_refcount, 1); /* secret is present */ INIT_LIST_HEAD(&mk->mk_decrypted_inodes); @@ -427,11 +426,8 @@ static int add_existing_master_key(struct fscrypt_master_key *mk, } /* Re-add the secret if needed. */ - if (rekey) { - down_write(&mk->mk_secret_sem); + if (rekey) move_master_key_secret(&mk->mk_secret, secret); - up_write(&mk->mk_secret_sem); - } return 0; } @@ -975,10 +971,8 @@ static int do_remove_key(struct file *filp, void __user *_uarg, bool all_users) /* No user claims remaining. Go ahead and wipe the secret. */ dead = false; if (is_master_key_secret_present(&mk->mk_secret)) { - down_write(&mk->mk_secret_sem); wipe_master_key_secret(&mk->mk_secret); dead = refcount_dec_and_test(&mk->mk_refcount); - up_write(&mk->mk_secret_sem); } up_write(&key->sem); if (dead) { diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c index 73d96e35d9ae..72aec33e0ea5 100644 --- a/fs/crypto/keysetup.c +++ b/fs/crypto/keysetup.c @@ -405,11 +405,11 @@ static bool fscrypt_valid_master_key_size(const struct fscrypt_master_key *mk, * Find the master key, then set up the inode's actual encryption key. * * If the master key is found in the filesystem-level keyring, then the - * corresponding 'struct key' is returned in *master_key_ret with - * ->mk_secret_sem read-locked. This is needed to ensure that only one task - * links the fscrypt_info into ->mk_decrypted_inodes (as multiple tasks may race - * to create an fscrypt_info for the same inode), and to synchronize the master - * key being removed with a new inode starting to use it. + * corresponding 'struct key' is returned in *master_key_ret with its semaphore + * read-locked. This is needed to ensure that only one task links the + * fscrypt_info into ->mk_decrypted_inodes (as multiple tasks may race to create + * an fscrypt_info for the same inode), and to synchronize the master key being + * removed with a new inode starting to use it. */ static int setup_file_encryption_key(struct fscrypt_info *ci, bool need_dirhash_key, @@ -458,7 +458,7 @@ static int setup_file_encryption_key(struct fscrypt_info *ci, } mk = key->payload.data[0]; - down_read(&mk->mk_secret_sem); + down_read(&key->sem); /* Has the secret been removed (via FS_IOC_REMOVE_ENCRYPTION_KEY)? */ if (!is_master_key_secret_present(&mk->mk_secret)) { @@ -490,7 +490,7 @@ static int setup_file_encryption_key(struct fscrypt_info *ci, return 0; out_release_key: - up_read(&mk->mk_secret_sem); + up_read(&key->sem); key_put(key); return err; } @@ -593,9 +593,7 @@ fscrypt_setup_encryption_info(struct inode *inode, res = 0; out: if (master_key) { - struct fscrypt_master_key *mk = master_key->payload.data[0]; - - up_read(&mk->mk_secret_sem); + up_read(&master_key->sem); key_put(master_key); } put_crypt_info(crypt_info); @@ -769,7 +767,7 @@ int fscrypt_drop_inode(struct inode *inode) return 0; /* - * Note: since we aren't holding ->mk_secret_sem, the result here can + * Note: since we aren't holding the key semaphore, the result here can * immediately become outdated. But there's no correctness problem with * unnecessarily evicting. Nor is there a correctness problem with not * evicting while iput() is racing with the key being removed, since From 391cceee6d435e616f68631e68f5b32d480b1e67 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 4 Nov 2022 16:37:59 -0700 Subject: [PATCH 0641/1477] fscrypt: stop using keyrings subsystem for fscrypt_master_key commit d7e7b9af104c7b389a0c21eb26532511bce4b510 upstream. The approach of fs/crypto/ internally managing the fscrypt_master_key structs as the payloads of "struct key" objects contained in a "struct key" keyring has outlived its usefulness. The original idea was to simplify the code by reusing code from the keyrings subsystem. However, several issues have arisen that can't easily be resolved: - When a master key struct is destroyed, blk_crypto_evict_key() must be called on any per-mode keys embedded in it. (This started being the case when inline encryption support was added.) Yet, the keyrings subsystem can arbitrarily delay the destruction of keys, even past the time the filesystem was unmounted. Therefore, currently there is no easy way to call blk_crypto_evict_key() when a master key is destroyed. Currently, this is worked around by holding an extra reference to the filesystem's request_queue(s). But it was overlooked that the request_queue reference is *not* guaranteed to pin the corresponding blk_crypto_profile too; for device-mapper devices that support inline crypto, it doesn't. This can cause a use-after-free. - When the last inode that was using an incompletely-removed master key is evicted, the master key removal is completed by removing the key struct from the keyring. Currently this is done via key_invalidate(). Yet, key_invalidate() takes the key semaphore. This can deadlock when called from the shrinker, since in fscrypt_ioctl_add_key(), memory is allocated with GFP_KERNEL under the same semaphore. - More generally, the fact that the keyrings subsystem can arbitrarily delay the destruction of keys (via garbage collection delay, or via random processes getting temporary key references) is undesirable, as it means we can't strictly guarantee that all secrets are ever wiped. - Doing the master key lookups via the keyrings subsystem results in the key_permission LSM hook being called. fscrypt doesn't want this, as all access control for encrypted files is designed to happen via the files themselves, like any other files. The workaround which SELinux users are using is to change their SELinux policy to grant key search access to all domains. This works, but it is an odd extra step that shouldn't really have to be done. The fix for all these issues is to change the implementation to what I should have done originally: don't use the keyrings subsystem to keep track of the filesystem's fscrypt_master_key structs. Instead, just store them in a regular kernel data structure, and rework the reference counting, locking, and lifetime accordingly. Retain support for RCU-mode key lookups by using a hash table. Replace fscrypt_sb_free() with fscrypt_sb_delete(), which releases the keys synchronously and runs a bit earlier during unmount, so that block devices are still available. A side effect of this patch is that neither the master keys themselves nor the filesystem keyrings will be listed in /proc/keys anymore. ("Master key users" and the master key users keyrings will still be listed.) However, this was mostly an implementation detail, and it was intended just for debugging purposes. I don't know of anyone using it. This patch does *not* change how "master key users" (->mk_users) works; that still uses the keyrings subsystem. That is still needed for key quotas, and changing that isn't necessary to solve the issues listed above. If we decide to change that too, it would be a separate patch. I've marked this as fixing the original commit that added the fscrypt keyring, but as noted above the most important issue that this patch fixes wasn't introduced until the addition of inline encryption support. Fixes: 22d94f493bfb ("fscrypt: add FS_IOC_ADD_ENCRYPTION_KEY ioctl") Signed-off-by: Eric Biggers Link: https://lore.kernel.org/r/20220901193208.138056-2-ebiggers@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/crypto/fscrypt_private.h | 71 ++++-- fs/crypto/hooks.c | 10 +- fs/crypto/keyring.c | 488 +++++++++++++++++++----------------- fs/crypto/keysetup.c | 81 +++--- fs/crypto/policy.c | 8 +- fs/super.c | 2 +- include/linux/fs.h | 2 +- include/linux/fscrypt.h | 4 +- 8 files changed, 354 insertions(+), 312 deletions(-) diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index 8a0141f7195b..b746d7df3758 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -220,7 +220,7 @@ struct fscrypt_info { * will be NULL if the master key was found in a process-subscribed * keyring rather than in the filesystem-level keyring. */ - struct key *ci_master_key; + struct fscrypt_master_key *ci_master_key; /* * Link in list of inodes that were unlocked with the master key. @@ -431,6 +431,40 @@ struct fscrypt_master_key_secret { */ struct fscrypt_master_key { + /* + * Back-pointer to the super_block of the filesystem to which this + * master key has been added. Only valid if ->mk_active_refs > 0. + */ + struct super_block *mk_sb; + + /* + * Link in ->mk_sb->s_master_keys->key_hashtable. + * Only valid if ->mk_active_refs > 0. + */ + struct hlist_node mk_node; + + /* Semaphore that protects ->mk_secret and ->mk_users */ + struct rw_semaphore mk_sem; + + /* + * Active and structural reference counts. An active ref guarantees + * that the struct continues to exist, continues to be in the keyring + * ->mk_sb->s_master_keys, and that any embedded subkeys (e.g. + * ->mk_direct_keys) that have been prepared continue to exist. + * A structural ref only guarantees that the struct continues to exist. + * + * There is one active ref associated with ->mk_secret being present, + * and one active ref for each inode in ->mk_decrypted_inodes. + * + * There is one structural ref associated with the active refcount being + * nonzero. Finding a key in the keyring also takes a structural ref, + * which is then held temporarily while the key is operated on. + */ + refcount_t mk_active_refs; + refcount_t mk_struct_refs; + + struct rcu_head mk_rcu_head; + /* * The secret key material. After FS_IOC_REMOVE_ENCRYPTION_KEY is * executed, this is wiped and no new inodes can be unlocked with this @@ -439,7 +473,10 @@ struct fscrypt_master_key { * FS_IOC_REMOVE_ENCRYPTION_KEY can be retried, or * FS_IOC_ADD_ENCRYPTION_KEY can add the secret again. * - * Locking: protected by this master key's key->sem. + * While ->mk_secret is present, one ref in ->mk_active_refs is held. + * + * Locking: protected by ->mk_sem. The manipulation of ->mk_active_refs + * associated with this field is protected by ->mk_sem as well. */ struct fscrypt_master_key_secret mk_secret; @@ -460,22 +497,12 @@ struct fscrypt_master_key { * * This is NULL for v1 policy keys; those can only be added by root. * - * Locking: in addition to this keyring's own semaphore, this is - * protected by this master key's key->sem, so we can do atomic - * search+insert. It can also be searched without taking any locks, but - * in that case the returned key may have already been removed. + * Locking: protected by ->mk_sem. (We don't just rely on the keyrings + * subsystem semaphore ->mk_users->sem, as we need support for atomic + * search+insert along with proper synchronization with ->mk_secret.) */ struct key *mk_users; - /* - * Length of ->mk_decrypted_inodes, plus one if mk_secret is present. - * Once this goes to 0, the master key is removed from ->s_master_keys. - * The 'struct fscrypt_master_key' will continue to live as long as the - * 'struct key' whose payload it is, but we won't let this reference - * count rise again. - */ - refcount_t mk_refcount; - /* * List of inodes that were unlocked using this key. This allows the * inodes to be evicted efficiently if the key is removed. @@ -501,10 +528,10 @@ static inline bool is_master_key_secret_present(const struct fscrypt_master_key_secret *secret) { /* - * The READ_ONCE() is only necessary for fscrypt_drop_inode() and - * fscrypt_key_describe(). These run in atomic context, so they can't - * take the key semaphore and thus 'secret' can change concurrently - * which would be a data race. But they only need to know whether the + * The READ_ONCE() is only necessary for fscrypt_drop_inode(). + * fscrypt_drop_inode() runs in atomic context, so it can't take the key + * semaphore and thus 'secret' can change concurrently which would be a + * data race. But fscrypt_drop_inode() only need to know whether the * secret *was* present at the time of check, so READ_ONCE() suffices. */ return READ_ONCE(secret->size) != 0; @@ -533,7 +560,11 @@ static inline int master_key_spec_len(const struct fscrypt_key_specifier *spec) return 0; } -struct key * +void fscrypt_put_master_key(struct fscrypt_master_key *mk); + +void fscrypt_put_master_key_activeref(struct fscrypt_master_key *mk); + +struct fscrypt_master_key * fscrypt_find_master_key(struct super_block *sb, const struct fscrypt_key_specifier *mk_spec); diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c index 0c6fa5c2d6f3..8268206ef21e 100644 --- a/fs/crypto/hooks.c +++ b/fs/crypto/hooks.c @@ -5,8 +5,6 @@ * Encryption hooks for higher-level filesystem operations. */ -#include - #include "fscrypt_private.h" /** @@ -139,7 +137,6 @@ int fscrypt_prepare_setflags(struct inode *inode, unsigned int oldflags, unsigned int flags) { struct fscrypt_info *ci; - struct key *key; struct fscrypt_master_key *mk; int err; @@ -155,14 +152,13 @@ int fscrypt_prepare_setflags(struct inode *inode, ci = inode->i_crypt_info; if (ci->ci_policy.version != FSCRYPT_POLICY_V2) return -EINVAL; - key = ci->ci_master_key; - mk = key->payload.data[0]; - down_read(&key->sem); + mk = ci->ci_master_key; + down_read(&mk->mk_sem); if (is_master_key_secret_present(&mk->mk_secret)) err = fscrypt_derive_dirhash_key(ci, mk); else err = -ENOKEY; - up_read(&key->sem); + up_read(&mk->mk_sem); return err; } return 0; diff --git a/fs/crypto/keyring.c b/fs/crypto/keyring.c index 0b3ffbb4faf4..175b071beaf8 100644 --- a/fs/crypto/keyring.c +++ b/fs/crypto/keyring.c @@ -18,6 +18,7 @@ * information about these ioctls. */ +#include #include #include #include @@ -25,6 +26,18 @@ #include "fscrypt_private.h" +/* The master encryption keys for a filesystem (->s_master_keys) */ +struct fscrypt_keyring { + /* + * Lock that protects ->key_hashtable. It does *not* protect the + * fscrypt_master_key structs themselves. + */ + spinlock_t lock; + + /* Hash table that maps fscrypt_key_specifier to fscrypt_master_key */ + struct hlist_head key_hashtable[128]; +}; + static void wipe_master_key_secret(struct fscrypt_master_key_secret *secret) { fscrypt_destroy_hkdf(&secret->hkdf); @@ -38,20 +51,70 @@ static void move_master_key_secret(struct fscrypt_master_key_secret *dst, memzero_explicit(src, sizeof(*src)); } -static void free_master_key(struct fscrypt_master_key *mk) +static void fscrypt_free_master_key(struct rcu_head *head) { + struct fscrypt_master_key *mk = + container_of(head, struct fscrypt_master_key, mk_rcu_head); + /* + * The master key secret and any embedded subkeys should have already + * been wiped when the last active reference to the fscrypt_master_key + * struct was dropped; doing it here would be unnecessarily late. + * Nevertheless, use kfree_sensitive() in case anything was missed. + */ + kfree_sensitive(mk); +} + +void fscrypt_put_master_key(struct fscrypt_master_key *mk) +{ + if (!refcount_dec_and_test(&mk->mk_struct_refs)) + return; + /* + * No structural references left, so free ->mk_users, and also free the + * fscrypt_master_key struct itself after an RCU grace period ensures + * that concurrent keyring lookups can no longer find it. + */ + WARN_ON(refcount_read(&mk->mk_active_refs) != 0); + key_put(mk->mk_users); + mk->mk_users = NULL; + call_rcu(&mk->mk_rcu_head, fscrypt_free_master_key); +} + +void fscrypt_put_master_key_activeref(struct fscrypt_master_key *mk) +{ + struct super_block *sb = mk->mk_sb; + struct fscrypt_keyring *keyring = sb->s_master_keys; size_t i; - wipe_master_key_secret(&mk->mk_secret); + if (!refcount_dec_and_test(&mk->mk_active_refs)) + return; + /* + * No active references left, so complete the full removal of this + * fscrypt_master_key struct by removing it from the keyring and + * destroying any subkeys embedded in it. + */ + + spin_lock(&keyring->lock); + hlist_del_rcu(&mk->mk_node); + spin_unlock(&keyring->lock); + + /* + * ->mk_active_refs == 0 implies that ->mk_secret is not present and + * that ->mk_decrypted_inodes is empty. + */ + WARN_ON(is_master_key_secret_present(&mk->mk_secret)); + WARN_ON(!list_empty(&mk->mk_decrypted_inodes)); for (i = 0; i <= FSCRYPT_MODE_MAX; i++) { fscrypt_destroy_prepared_key(&mk->mk_direct_keys[i]); fscrypt_destroy_prepared_key(&mk->mk_iv_ino_lblk_64_keys[i]); fscrypt_destroy_prepared_key(&mk->mk_iv_ino_lblk_32_keys[i]); } + memzero_explicit(&mk->mk_ino_hash_key, + sizeof(mk->mk_ino_hash_key)); + mk->mk_ino_hash_key_initialized = false; - key_put(mk->mk_users); - kfree_sensitive(mk); + /* Drop the structural ref associated with the active refs. */ + fscrypt_put_master_key(mk); } static inline bool valid_key_spec(const struct fscrypt_key_specifier *spec) @@ -61,44 +124,6 @@ static inline bool valid_key_spec(const struct fscrypt_key_specifier *spec) return master_key_spec_len(spec) != 0; } -static int fscrypt_key_instantiate(struct key *key, - struct key_preparsed_payload *prep) -{ - key->payload.data[0] = (struct fscrypt_master_key *)prep->data; - return 0; -} - -static void fscrypt_key_destroy(struct key *key) -{ - free_master_key(key->payload.data[0]); -} - -static void fscrypt_key_describe(const struct key *key, struct seq_file *m) -{ - seq_puts(m, key->description); - - if (key_is_positive(key)) { - const struct fscrypt_master_key *mk = key->payload.data[0]; - - if (!is_master_key_secret_present(&mk->mk_secret)) - seq_puts(m, ": secret removed"); - } -} - -/* - * Type of key in ->s_master_keys. Each key of this type represents a master - * key which has been added to the filesystem. Its payload is a - * 'struct fscrypt_master_key'. The "." prefix in the key type name prevents - * users from adding keys of this type via the keyrings syscalls rather than via - * the intended method of FS_IOC_ADD_ENCRYPTION_KEY. - */ -static struct key_type key_type_fscrypt = { - .name = "._fscrypt", - .instantiate = fscrypt_key_instantiate, - .destroy = fscrypt_key_destroy, - .describe = fscrypt_key_describe, -}; - static int fscrypt_user_key_instantiate(struct key *key, struct key_preparsed_payload *prep) { @@ -131,32 +156,6 @@ static struct key_type key_type_fscrypt_user = { .describe = fscrypt_user_key_describe, }; -/* Search ->s_master_keys or ->mk_users */ -static struct key *search_fscrypt_keyring(struct key *keyring, - struct key_type *type, - const char *description) -{ - /* - * We need to mark the keyring reference as "possessed" so that we - * acquire permission to search it, via the KEY_POS_SEARCH permission. - */ - key_ref_t keyref = make_key_ref(keyring, true /* possessed */); - - keyref = keyring_search(keyref, type, description, false); - if (IS_ERR(keyref)) { - if (PTR_ERR(keyref) == -EAGAIN || /* not found */ - PTR_ERR(keyref) == -EKEYREVOKED) /* recently invalidated */ - keyref = ERR_PTR(-ENOKEY); - return ERR_CAST(keyref); - } - return key_ref_to_ptr(keyref); -} - -#define FSCRYPT_FS_KEYRING_DESCRIPTION_SIZE \ - (CONST_STRLEN("fscrypt-") + sizeof_field(struct super_block, s_id)) - -#define FSCRYPT_MK_DESCRIPTION_SIZE (2 * FSCRYPT_KEY_IDENTIFIER_SIZE + 1) - #define FSCRYPT_MK_USERS_DESCRIPTION_SIZE \ (CONST_STRLEN("fscrypt-") + 2 * FSCRYPT_KEY_IDENTIFIER_SIZE + \ CONST_STRLEN("-users") + 1) @@ -164,21 +163,6 @@ static struct key *search_fscrypt_keyring(struct key *keyring, #define FSCRYPT_MK_USER_DESCRIPTION_SIZE \ (2 * FSCRYPT_KEY_IDENTIFIER_SIZE + CONST_STRLEN(".uid.") + 10 + 1) -static void format_fs_keyring_description( - char description[FSCRYPT_FS_KEYRING_DESCRIPTION_SIZE], - const struct super_block *sb) -{ - sprintf(description, "fscrypt-%s", sb->s_id); -} - -static void format_mk_description( - char description[FSCRYPT_MK_DESCRIPTION_SIZE], - const struct fscrypt_key_specifier *mk_spec) -{ - sprintf(description, "%*phN", - master_key_spec_len(mk_spec), (u8 *)&mk_spec->u); -} - static void format_mk_users_keyring_description( char description[FSCRYPT_MK_USERS_DESCRIPTION_SIZE], const u8 mk_identifier[FSCRYPT_KEY_IDENTIFIER_SIZE]) @@ -199,20 +183,15 @@ static void format_mk_user_description( /* Create ->s_master_keys if needed. Synchronized by fscrypt_add_key_mutex. */ static int allocate_filesystem_keyring(struct super_block *sb) { - char description[FSCRYPT_FS_KEYRING_DESCRIPTION_SIZE]; - struct key *keyring; + struct fscrypt_keyring *keyring; if (sb->s_master_keys) return 0; - format_fs_keyring_description(description, sb); - keyring = keyring_alloc(description, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, - current_cred(), KEY_POS_SEARCH | - KEY_USR_SEARCH | KEY_USR_READ | KEY_USR_VIEW, - KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL); - if (IS_ERR(keyring)) - return PTR_ERR(keyring); - + keyring = kzalloc(sizeof(*keyring), GFP_KERNEL); + if (!keyring) + return -ENOMEM; + spin_lock_init(&keyring->lock); /* * Pairs with the smp_load_acquire() in fscrypt_find_master_key(). * I.e., here we publish ->s_master_keys with a RELEASE barrier so that @@ -222,21 +201,75 @@ static int allocate_filesystem_keyring(struct super_block *sb) return 0; } -void fscrypt_sb_free(struct super_block *sb) +/* + * This is called at unmount time to release all encryption keys that have been + * added to the filesystem, along with the keyring that contains them. + * + * Note that besides clearing and freeing memory, this might need to evict keys + * from the keyslots of an inline crypto engine. Therefore, this must be called + * while the filesystem's underlying block device(s) are still available. + */ +void fscrypt_sb_delete(struct super_block *sb) { - key_put(sb->s_master_keys); + struct fscrypt_keyring *keyring = sb->s_master_keys; + size_t i; + + if (!keyring) + return; + + for (i = 0; i < ARRAY_SIZE(keyring->key_hashtable); i++) { + struct hlist_head *bucket = &keyring->key_hashtable[i]; + struct fscrypt_master_key *mk; + struct hlist_node *tmp; + + hlist_for_each_entry_safe(mk, tmp, bucket, mk_node) { + /* + * Since all inodes were already evicted, every key + * remaining in the keyring should have an empty inode + * list, and should only still be in the keyring due to + * the single active ref associated with ->mk_secret. + * There should be no structural refs beyond the one + * associated with the active ref. + */ + WARN_ON(refcount_read(&mk->mk_active_refs) != 1); + WARN_ON(refcount_read(&mk->mk_struct_refs) != 1); + WARN_ON(!is_master_key_secret_present(&mk->mk_secret)); + wipe_master_key_secret(&mk->mk_secret); + fscrypt_put_master_key_activeref(mk); + } + } + kfree_sensitive(keyring); sb->s_master_keys = NULL; } -/* - * Find the specified master key in ->s_master_keys. - * Returns ERR_PTR(-ENOKEY) if not found. - */ -struct key *fscrypt_find_master_key(struct super_block *sb, - const struct fscrypt_key_specifier *mk_spec) +static struct hlist_head * +fscrypt_mk_hash_bucket(struct fscrypt_keyring *keyring, + const struct fscrypt_key_specifier *mk_spec) { - struct key *keyring; - char description[FSCRYPT_MK_DESCRIPTION_SIZE]; + /* + * Since key specifiers should be "random" values, it is sufficient to + * use a trivial hash function that just takes the first several bits of + * the key specifier. + */ + unsigned long i = get_unaligned((unsigned long *)&mk_spec->u); + + return &keyring->key_hashtable[i % ARRAY_SIZE(keyring->key_hashtable)]; +} + +/* + * Find the specified master key struct in ->s_master_keys and take a structural + * ref to it. The structural ref guarantees that the key struct continues to + * exist, but it does *not* guarantee that ->s_master_keys continues to contain + * the key struct. The structural ref needs to be dropped by + * fscrypt_put_master_key(). Returns NULL if the key struct is not found. + */ +struct fscrypt_master_key * +fscrypt_find_master_key(struct super_block *sb, + const struct fscrypt_key_specifier *mk_spec) +{ + struct fscrypt_keyring *keyring; + struct hlist_head *bucket; + struct fscrypt_master_key *mk; /* * Pairs with the smp_store_release() in allocate_filesystem_keyring(). @@ -246,10 +279,38 @@ struct key *fscrypt_find_master_key(struct super_block *sb, */ keyring = smp_load_acquire(&sb->s_master_keys); if (keyring == NULL) - return ERR_PTR(-ENOKEY); /* No keyring yet, so no keys yet. */ + return NULL; /* No keyring yet, so no keys yet. */ - format_mk_description(description, mk_spec); - return search_fscrypt_keyring(keyring, &key_type_fscrypt, description); + bucket = fscrypt_mk_hash_bucket(keyring, mk_spec); + rcu_read_lock(); + switch (mk_spec->type) { + case FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR: + hlist_for_each_entry_rcu(mk, bucket, mk_node) { + if (mk->mk_spec.type == + FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR && + memcmp(mk->mk_spec.u.descriptor, + mk_spec->u.descriptor, + FSCRYPT_KEY_DESCRIPTOR_SIZE) == 0 && + refcount_inc_not_zero(&mk->mk_struct_refs)) + goto out; + } + break; + case FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER: + hlist_for_each_entry_rcu(mk, bucket, mk_node) { + if (mk->mk_spec.type == + FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER && + memcmp(mk->mk_spec.u.identifier, + mk_spec->u.identifier, + FSCRYPT_KEY_IDENTIFIER_SIZE) == 0 && + refcount_inc_not_zero(&mk->mk_struct_refs)) + goto out; + } + break; + } + mk = NULL; +out: + rcu_read_unlock(); + return mk; } static int allocate_master_key_users_keyring(struct fscrypt_master_key *mk) @@ -277,17 +338,30 @@ static int allocate_master_key_users_keyring(struct fscrypt_master_key *mk) static struct key *find_master_key_user(struct fscrypt_master_key *mk) { char description[FSCRYPT_MK_USER_DESCRIPTION_SIZE]; + key_ref_t keyref; format_mk_user_description(description, mk->mk_spec.u.identifier); - return search_fscrypt_keyring(mk->mk_users, &key_type_fscrypt_user, - description); + + /* + * We need to mark the keyring reference as "possessed" so that we + * acquire permission to search it, via the KEY_POS_SEARCH permission. + */ + keyref = keyring_search(make_key_ref(mk->mk_users, true /*possessed*/), + &key_type_fscrypt_user, description, false); + if (IS_ERR(keyref)) { + if (PTR_ERR(keyref) == -EAGAIN || /* not found */ + PTR_ERR(keyref) == -EKEYREVOKED) /* recently invalidated */ + keyref = ERR_PTR(-ENOKEY); + return ERR_CAST(keyref); + } + return key_ref_to_ptr(keyref); } /* * Give the current user a "key" in ->mk_users. This charges the user's quota * and marks the master key as added by the current user, so that it cannot be - * removed by another user with the key. Either the master key's key->sem must - * be held for write, or the master key must be still undergoing initialization. + * removed by another user with the key. Either ->mk_sem must be held for + * write, or the master key must be still undergoing initialization. */ static int add_master_key_user(struct fscrypt_master_key *mk) { @@ -309,7 +383,7 @@ static int add_master_key_user(struct fscrypt_master_key *mk) /* * Remove the current user's "key" from ->mk_users. - * The master key's key->sem must be held for write. + * ->mk_sem must be held for write. * * Returns 0 if removed, -ENOKEY if not found, or another -errno code. */ @@ -327,63 +401,49 @@ static int remove_master_key_user(struct fscrypt_master_key *mk) } /* - * Allocate a new fscrypt_master_key which contains the given secret, set it as - * the payload of a new 'struct key' of type fscrypt, and link the 'struct key' - * into the given keyring. Synchronized by fscrypt_add_key_mutex. + * Allocate a new fscrypt_master_key, transfer the given secret over to it, and + * insert it into sb->s_master_keys. */ -static int add_new_master_key(struct fscrypt_master_key_secret *secret, - const struct fscrypt_key_specifier *mk_spec, - struct key *keyring) +static int add_new_master_key(struct super_block *sb, + struct fscrypt_master_key_secret *secret, + const struct fscrypt_key_specifier *mk_spec) { + struct fscrypt_keyring *keyring = sb->s_master_keys; struct fscrypt_master_key *mk; - char description[FSCRYPT_MK_DESCRIPTION_SIZE]; - struct key *key; int err; mk = kzalloc(sizeof(*mk), GFP_KERNEL); if (!mk) return -ENOMEM; + mk->mk_sb = sb; + init_rwsem(&mk->mk_sem); + refcount_set(&mk->mk_struct_refs, 1); mk->mk_spec = *mk_spec; - move_master_key_secret(&mk->mk_secret, secret); - - refcount_set(&mk->mk_refcount, 1); /* secret is present */ INIT_LIST_HEAD(&mk->mk_decrypted_inodes); spin_lock_init(&mk->mk_decrypted_inodes_lock); if (mk_spec->type == FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER) { err = allocate_master_key_users_keyring(mk); if (err) - goto out_free_mk; + goto out_put; err = add_master_key_user(mk); if (err) - goto out_free_mk; + goto out_put; } - /* - * Note that we don't charge this key to anyone's quota, since when - * ->mk_users is in use those keys are charged instead, and otherwise - * (when ->mk_users isn't in use) only root can add these keys. - */ - format_mk_description(description, mk_spec); - key = key_alloc(&key_type_fscrypt, description, - GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, current_cred(), - KEY_POS_SEARCH | KEY_USR_SEARCH | KEY_USR_VIEW, - KEY_ALLOC_NOT_IN_QUOTA, NULL); - if (IS_ERR(key)) { - err = PTR_ERR(key); - goto out_free_mk; - } - err = key_instantiate_and_link(key, mk, sizeof(*mk), keyring, NULL); - key_put(key); - if (err) - goto out_free_mk; + move_master_key_secret(&mk->mk_secret, secret); + refcount_set(&mk->mk_active_refs, 1); /* ->mk_secret is present */ + spin_lock(&keyring->lock); + hlist_add_head_rcu(&mk->mk_node, + fscrypt_mk_hash_bucket(keyring, mk_spec)); + spin_unlock(&keyring->lock); return 0; -out_free_mk: - free_master_key(mk); +out_put: + fscrypt_put_master_key(mk); return err; } @@ -392,42 +452,34 @@ out_free_mk: static int add_existing_master_key(struct fscrypt_master_key *mk, struct fscrypt_master_key_secret *secret) { - struct key *mk_user; - bool rekey; int err; /* * If the current user is already in ->mk_users, then there's nothing to - * do. (Not applicable for v1 policy keys, which have NULL ->mk_users.) + * do. Otherwise, we need to add the user to ->mk_users. (Neither is + * applicable for v1 policy keys, which have NULL ->mk_users.) */ if (mk->mk_users) { - mk_user = find_master_key_user(mk); + struct key *mk_user = find_master_key_user(mk); + if (mk_user != ERR_PTR(-ENOKEY)) { if (IS_ERR(mk_user)) return PTR_ERR(mk_user); key_put(mk_user); return 0; } - } - - /* If we'll be re-adding ->mk_secret, try to take the reference. */ - rekey = !is_master_key_secret_present(&mk->mk_secret); - if (rekey && !refcount_inc_not_zero(&mk->mk_refcount)) - return KEY_DEAD; - - /* Add the current user to ->mk_users, if applicable. */ - if (mk->mk_users) { err = add_master_key_user(mk); - if (err) { - if (rekey && refcount_dec_and_test(&mk->mk_refcount)) - return KEY_DEAD; + if (err) return err; - } } /* Re-add the secret if needed. */ - if (rekey) + if (!is_master_key_secret_present(&mk->mk_secret)) { + if (!refcount_inc_not_zero(&mk->mk_active_refs)) + return KEY_DEAD; move_master_key_secret(&mk->mk_secret, secret); + } + return 0; } @@ -436,38 +488,36 @@ static int do_add_master_key(struct super_block *sb, const struct fscrypt_key_specifier *mk_spec) { static DEFINE_MUTEX(fscrypt_add_key_mutex); - struct key *key; + struct fscrypt_master_key *mk; int err; mutex_lock(&fscrypt_add_key_mutex); /* serialize find + link */ -retry: - key = fscrypt_find_master_key(sb, mk_spec); - if (IS_ERR(key)) { - err = PTR_ERR(key); - if (err != -ENOKEY) - goto out_unlock; + + mk = fscrypt_find_master_key(sb, mk_spec); + if (!mk) { /* Didn't find the key in ->s_master_keys. Add it. */ err = allocate_filesystem_keyring(sb); - if (err) - goto out_unlock; - err = add_new_master_key(secret, mk_spec, sb->s_master_keys); + if (!err) + err = add_new_master_key(sb, secret, mk_spec); } else { /* * Found the key in ->s_master_keys. Re-add the secret if * needed, and add the user to ->mk_users if needed. */ - down_write(&key->sem); - err = add_existing_master_key(key->payload.data[0], secret); - up_write(&key->sem); + down_write(&mk->mk_sem); + err = add_existing_master_key(mk, secret); + up_write(&mk->mk_sem); if (err == KEY_DEAD) { - /* Key being removed or needs to be removed */ - key_invalidate(key); - key_put(key); - goto retry; + /* + * We found a key struct, but it's already been fully + * removed. Ignore the old struct and add a new one. + * fscrypt_add_key_mutex means we don't need to worry + * about concurrent adds. + */ + err = add_new_master_key(sb, secret, mk_spec); } - key_put(key); + fscrypt_put_master_key(mk); } -out_unlock: mutex_unlock(&fscrypt_add_key_mutex); return err; } @@ -731,19 +781,19 @@ int fscrypt_verify_key_added(struct super_block *sb, const u8 identifier[FSCRYPT_KEY_IDENTIFIER_SIZE]) { struct fscrypt_key_specifier mk_spec; - struct key *key, *mk_user; struct fscrypt_master_key *mk; + struct key *mk_user; int err; mk_spec.type = FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER; memcpy(mk_spec.u.identifier, identifier, FSCRYPT_KEY_IDENTIFIER_SIZE); - key = fscrypt_find_master_key(sb, &mk_spec); - if (IS_ERR(key)) { - err = PTR_ERR(key); + mk = fscrypt_find_master_key(sb, &mk_spec); + if (!mk) { + err = -ENOKEY; goto out; } - mk = key->payload.data[0]; + down_read(&mk->mk_sem); mk_user = find_master_key_user(mk); if (IS_ERR(mk_user)) { err = PTR_ERR(mk_user); @@ -751,7 +801,8 @@ int fscrypt_verify_key_added(struct super_block *sb, key_put(mk_user); err = 0; } - key_put(key); + up_read(&mk->mk_sem); + fscrypt_put_master_key(mk); out: if (err == -ENOKEY && capable(CAP_FOWNER)) err = 0; @@ -913,11 +964,10 @@ static int do_remove_key(struct file *filp, void __user *_uarg, bool all_users) struct super_block *sb = file_inode(filp)->i_sb; struct fscrypt_remove_key_arg __user *uarg = _uarg; struct fscrypt_remove_key_arg arg; - struct key *key; struct fscrypt_master_key *mk; u32 status_flags = 0; int err; - bool dead; + bool inodes_remain; if (copy_from_user(&arg, uarg, sizeof(arg))) return -EFAULT; @@ -937,12 +987,10 @@ static int do_remove_key(struct file *filp, void __user *_uarg, bool all_users) return -EACCES; /* Find the key being removed. */ - key = fscrypt_find_master_key(sb, &arg.key_spec); - if (IS_ERR(key)) - return PTR_ERR(key); - mk = key->payload.data[0]; - - down_write(&key->sem); + mk = fscrypt_find_master_key(sb, &arg.key_spec); + if (!mk) + return -ENOKEY; + down_write(&mk->mk_sem); /* If relevant, remove current user's (or all users) claim to the key */ if (mk->mk_users && mk->mk_users->keys.nr_leaves_on_tree != 0) { @@ -951,7 +999,7 @@ static int do_remove_key(struct file *filp, void __user *_uarg, bool all_users) else err = remove_master_key_user(mk); if (err) { - up_write(&key->sem); + up_write(&mk->mk_sem); goto out_put_key; } if (mk->mk_users->keys.nr_leaves_on_tree != 0) { @@ -963,26 +1011,22 @@ static int do_remove_key(struct file *filp, void __user *_uarg, bool all_users) status_flags |= FSCRYPT_KEY_REMOVAL_STATUS_FLAG_OTHER_USERS; err = 0; - up_write(&key->sem); + up_write(&mk->mk_sem); goto out_put_key; } } /* No user claims remaining. Go ahead and wipe the secret. */ - dead = false; + err = -ENOKEY; if (is_master_key_secret_present(&mk->mk_secret)) { wipe_master_key_secret(&mk->mk_secret); - dead = refcount_dec_and_test(&mk->mk_refcount); - } - up_write(&key->sem); - if (dead) { - /* - * No inodes reference the key, and we wiped the secret, so the - * key object is free to be removed from the keyring. - */ - key_invalidate(key); + fscrypt_put_master_key_activeref(mk); err = 0; - } else { + } + inodes_remain = refcount_read(&mk->mk_active_refs) > 0; + up_write(&mk->mk_sem); + + if (inodes_remain) { /* Some inodes still reference this key; try to evict them. */ err = try_to_lock_encrypted_files(sb, mk); if (err == -EBUSY) { @@ -998,7 +1042,7 @@ static int do_remove_key(struct file *filp, void __user *_uarg, bool all_users) * has been fully removed including all files locked. */ out_put_key: - key_put(key); + fscrypt_put_master_key(mk); if (err == 0) err = put_user(status_flags, &uarg->removal_status_flags); return err; @@ -1045,7 +1089,6 @@ int fscrypt_ioctl_get_key_status(struct file *filp, void __user *uarg) { struct super_block *sb = file_inode(filp)->i_sb; struct fscrypt_get_key_status_arg arg; - struct key *key; struct fscrypt_master_key *mk; int err; @@ -1062,19 +1105,18 @@ int fscrypt_ioctl_get_key_status(struct file *filp, void __user *uarg) arg.user_count = 0; memset(arg.__out_reserved, 0, sizeof(arg.__out_reserved)); - key = fscrypt_find_master_key(sb, &arg.key_spec); - if (IS_ERR(key)) { - if (key != ERR_PTR(-ENOKEY)) - return PTR_ERR(key); + mk = fscrypt_find_master_key(sb, &arg.key_spec); + if (!mk) { arg.status = FSCRYPT_KEY_STATUS_ABSENT; err = 0; goto out; } - mk = key->payload.data[0]; - down_read(&key->sem); + down_read(&mk->mk_sem); if (!is_master_key_secret_present(&mk->mk_secret)) { - arg.status = FSCRYPT_KEY_STATUS_INCOMPLETELY_REMOVED; + arg.status = refcount_read(&mk->mk_active_refs) > 0 ? + FSCRYPT_KEY_STATUS_INCOMPLETELY_REMOVED : + FSCRYPT_KEY_STATUS_ABSENT /* raced with full removal */; err = 0; goto out_release_key; } @@ -1096,8 +1138,8 @@ int fscrypt_ioctl_get_key_status(struct file *filp, void __user *uarg) } err = 0; out_release_key: - up_read(&key->sem); - key_put(key); + up_read(&mk->mk_sem); + fscrypt_put_master_key(mk); out: if (!err && copy_to_user(uarg, &arg, sizeof(arg))) err = -EFAULT; @@ -1109,13 +1151,9 @@ int __init fscrypt_init_keyring(void) { int err; - err = register_key_type(&key_type_fscrypt); - if (err) - return err; - err = register_key_type(&key_type_fscrypt_user); if (err) - goto err_unregister_fscrypt; + return err; err = register_key_type(&key_type_fscrypt_provisioning); if (err) @@ -1125,7 +1163,5 @@ int __init fscrypt_init_keyring(void) err_unregister_fscrypt_user: unregister_key_type(&key_type_fscrypt_user); -err_unregister_fscrypt: - unregister_key_type(&key_type_fscrypt); return err; } diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c index 72aec33e0ea5..7b14054fab49 100644 --- a/fs/crypto/keysetup.c +++ b/fs/crypto/keysetup.c @@ -9,7 +9,6 @@ */ #include -#include #include #include "fscrypt_private.h" @@ -151,6 +150,7 @@ void fscrypt_destroy_prepared_key(struct fscrypt_prepared_key *prep_key) { crypto_free_skcipher(prep_key->tfm); fscrypt_destroy_inline_crypt_key(prep_key); + memzero_explicit(prep_key, sizeof(*prep_key)); } /* Given a per-file encryption key, set up the file's crypto transform object */ @@ -404,20 +404,18 @@ static bool fscrypt_valid_master_key_size(const struct fscrypt_master_key *mk, /* * Find the master key, then set up the inode's actual encryption key. * - * If the master key is found in the filesystem-level keyring, then the - * corresponding 'struct key' is returned in *master_key_ret with its semaphore - * read-locked. This is needed to ensure that only one task links the - * fscrypt_info into ->mk_decrypted_inodes (as multiple tasks may race to create - * an fscrypt_info for the same inode), and to synchronize the master key being - * removed with a new inode starting to use it. + * If the master key is found in the filesystem-level keyring, then it is + * returned in *mk_ret with its semaphore read-locked. This is needed to ensure + * that only one task links the fscrypt_info into ->mk_decrypted_inodes (as + * multiple tasks may race to create an fscrypt_info for the same inode), and to + * synchronize the master key being removed with a new inode starting to use it. */ static int setup_file_encryption_key(struct fscrypt_info *ci, bool need_dirhash_key, - struct key **master_key_ret) + struct fscrypt_master_key **mk_ret) { - struct key *key; - struct fscrypt_master_key *mk = NULL; struct fscrypt_key_specifier mk_spec; + struct fscrypt_master_key *mk; int err; err = fscrypt_select_encryption_impl(ci); @@ -442,11 +440,10 @@ static int setup_file_encryption_key(struct fscrypt_info *ci, return -EINVAL; } - key = fscrypt_find_master_key(ci->ci_inode->i_sb, &mk_spec); - if (IS_ERR(key)) { - if (key != ERR_PTR(-ENOKEY) || - ci->ci_policy.version != FSCRYPT_POLICY_V1) - return PTR_ERR(key); + mk = fscrypt_find_master_key(ci->ci_inode->i_sb, &mk_spec); + if (!mk) { + if (ci->ci_policy.version != FSCRYPT_POLICY_V1) + return -ENOKEY; /* * As a legacy fallback for v1 policies, search for the key in @@ -456,9 +453,7 @@ static int setup_file_encryption_key(struct fscrypt_info *ci, */ return fscrypt_setup_v1_file_key_via_subscribed_keyrings(ci); } - - mk = key->payload.data[0]; - down_read(&key->sem); + down_read(&mk->mk_sem); /* Has the secret been removed (via FS_IOC_REMOVE_ENCRYPTION_KEY)? */ if (!is_master_key_secret_present(&mk->mk_secret)) { @@ -486,18 +481,18 @@ static int setup_file_encryption_key(struct fscrypt_info *ci, if (err) goto out_release_key; - *master_key_ret = key; + *mk_ret = mk; return 0; out_release_key: - up_read(&key->sem); - key_put(key); + up_read(&mk->mk_sem); + fscrypt_put_master_key(mk); return err; } static void put_crypt_info(struct fscrypt_info *ci) { - struct key *key; + struct fscrypt_master_key *mk; if (!ci) return; @@ -507,24 +502,18 @@ static void put_crypt_info(struct fscrypt_info *ci) else if (ci->ci_owns_key) fscrypt_destroy_prepared_key(&ci->ci_enc_key); - key = ci->ci_master_key; - if (key) { - struct fscrypt_master_key *mk = key->payload.data[0]; - + mk = ci->ci_master_key; + if (mk) { /* * Remove this inode from the list of inodes that were unlocked - * with the master key. - * - * In addition, if we're removing the last inode from a key that - * already had its secret removed, invalidate the key so that it - * gets removed from ->s_master_keys. + * with the master key. In addition, if we're removing the last + * inode from a master key struct that already had its secret + * removed, then complete the full removal of the struct. */ spin_lock(&mk->mk_decrypted_inodes_lock); list_del(&ci->ci_master_key_link); spin_unlock(&mk->mk_decrypted_inodes_lock); - if (refcount_dec_and_test(&mk->mk_refcount)) - key_invalidate(key); - key_put(key); + fscrypt_put_master_key_activeref(mk); } memzero_explicit(ci, sizeof(*ci)); kmem_cache_free(fscrypt_info_cachep, ci); @@ -538,7 +527,7 @@ fscrypt_setup_encryption_info(struct inode *inode, { struct fscrypt_info *crypt_info; struct fscrypt_mode *mode; - struct key *master_key = NULL; + struct fscrypt_master_key *mk = NULL; int res; res = fscrypt_initialize(inode->i_sb->s_cop->flags); @@ -561,8 +550,7 @@ fscrypt_setup_encryption_info(struct inode *inode, WARN_ON(mode->ivsize > FSCRYPT_MAX_IV_SIZE); crypt_info->ci_mode = mode; - res = setup_file_encryption_key(crypt_info, need_dirhash_key, - &master_key); + res = setup_file_encryption_key(crypt_info, need_dirhash_key, &mk); if (res) goto out; @@ -577,12 +565,9 @@ fscrypt_setup_encryption_info(struct inode *inode, * We won the race and set ->i_crypt_info to our crypt_info. * Now link it into the master key's inode list. */ - if (master_key) { - struct fscrypt_master_key *mk = - master_key->payload.data[0]; - - refcount_inc(&mk->mk_refcount); - crypt_info->ci_master_key = key_get(master_key); + if (mk) { + crypt_info->ci_master_key = mk; + refcount_inc(&mk->mk_active_refs); spin_lock(&mk->mk_decrypted_inodes_lock); list_add(&crypt_info->ci_master_key_link, &mk->mk_decrypted_inodes); @@ -592,9 +577,9 @@ fscrypt_setup_encryption_info(struct inode *inode, } res = 0; out: - if (master_key) { - up_read(&master_key->sem); - key_put(master_key); + if (mk) { + up_read(&mk->mk_sem); + fscrypt_put_master_key(mk); } put_crypt_info(crypt_info); return res; @@ -745,7 +730,6 @@ EXPORT_SYMBOL(fscrypt_free_inode); int fscrypt_drop_inode(struct inode *inode) { const struct fscrypt_info *ci = fscrypt_get_info(inode); - const struct fscrypt_master_key *mk; /* * If ci is NULL, then the inode doesn't have an encryption key set up @@ -755,7 +739,6 @@ int fscrypt_drop_inode(struct inode *inode) */ if (!ci || !ci->ci_master_key) return 0; - mk = ci->ci_master_key->payload.data[0]; /* * With proper, non-racy use of FS_IOC_REMOVE_ENCRYPTION_KEY, all inodes @@ -774,6 +757,6 @@ int fscrypt_drop_inode(struct inode *inode) * then the thread removing the key will either evict the inode itself * or will correctly detect that it wasn't evicted due to the race. */ - return !is_master_key_secret_present(&mk->mk_secret); + return !is_master_key_secret_present(&ci->ci_master_key->mk_secret); } EXPORT_SYMBOL_GPL(fscrypt_drop_inode); diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c index faa0f21daa68..f68265c36377 100644 --- a/fs/crypto/policy.c +++ b/fs/crypto/policy.c @@ -686,12 +686,8 @@ int fscrypt_set_context(struct inode *inode, void *fs_data) * delayed key setup that requires the inode number. */ if (ci->ci_policy.version == FSCRYPT_POLICY_V2 && - (ci->ci_policy.v2.flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32)) { - const struct fscrypt_master_key *mk = - ci->ci_master_key->payload.data[0]; - - fscrypt_hash_inode_number(ci, mk); - } + (ci->ci_policy.v2.flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32)) + fscrypt_hash_inode_number(ci, ci->ci_master_key); return inode->i_sb->s_cop->set_context(inode, &ctx, ctxsize, fs_data); } diff --git a/fs/super.c b/fs/super.c index bae3fe80f852..beee3e1b4c29 100644 --- a/fs/super.c +++ b/fs/super.c @@ -293,7 +293,6 @@ static void __put_super(struct super_block *s) WARN_ON(s->s_inode_lru.node); WARN_ON(!list_empty(&s->s_mounts)); security_sb_free(s); - fscrypt_sb_free(s); put_user_ns(s->s_user_ns); kfree(s->s_subtype); call_rcu(&s->rcu, destroy_super_rcu); @@ -454,6 +453,7 @@ void generic_shutdown_super(struct super_block *sb) evict_inodes(sb); /* only nonzero refcount inodes can have marks */ fsnotify_sb_delete(sb); + fscrypt_sb_delete(sb); if (sb->s_dio_done_wq) { destroy_workqueue(sb->s_dio_done_wq); diff --git a/include/linux/fs.h b/include/linux/fs.h index c8f887641878..df54acdd3554 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1437,7 +1437,7 @@ struct super_block { const struct xattr_handler **s_xattr; #ifdef CONFIG_FS_ENCRYPTION const struct fscrypt_operations *s_cop; - struct key *s_master_keys; /* master crypto keys in use */ + struct fscrypt_keyring *s_master_keys; /* master crypto keys in use */ #endif #ifdef CONFIG_FS_VERITY const struct fsverity_operations *s_vop; diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index d0a1b8edfd9d..23d3ea47f764 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -193,7 +193,7 @@ fscrypt_free_dummy_policy(struct fscrypt_dummy_policy *dummy_policy) } /* keyring.c */ -void fscrypt_sb_free(struct super_block *sb); +void fscrypt_sb_delete(struct super_block *sb); int fscrypt_ioctl_add_key(struct file *filp, void __user *arg); int fscrypt_ioctl_remove_key(struct file *filp, void __user *arg); int fscrypt_ioctl_remove_key_all_users(struct file *filp, void __user *arg); @@ -380,7 +380,7 @@ fscrypt_free_dummy_policy(struct fscrypt_dummy_policy *dummy_policy) } /* keyring.c */ -static inline void fscrypt_sb_free(struct super_block *sb) +static inline void fscrypt_sb_delete(struct super_block *sb) { } From 29997a6fa60de1de2fa0de471e7652efa6e95868 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 4 Nov 2022 16:38:00 -0700 Subject: [PATCH 0642/1477] fscrypt: fix keyring memory leak on mount failure commit ccd30a476f8e864732de220bd50e6f372f5ebcab upstream. Commit d7e7b9af104c ("fscrypt: stop using keyrings subsystem for fscrypt_master_key") moved the keyring destruction from __put_super() to generic_shutdown_super() so that the filesystem's block device(s) are still available. Unfortunately, this causes a memory leak in the case where a mount is attempted with the test_dummy_encryption mount option, but the mount fails after the option has already been processed. To fix this, attempt the keyring destruction in both places. Reported-by: syzbot+104c2a89561289cec13e@syzkaller.appspotmail.com Fixes: d7e7b9af104c ("fscrypt: stop using keyrings subsystem for fscrypt_master_key") Signed-off-by: Eric Biggers Reviewed-by: Christian Brauner (Microsoft) Link: https://lore.kernel.org/r/20221011213838.209879-1-ebiggers@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/crypto/keyring.c | 17 +++++++++++------ fs/super.c | 3 ++- include/linux/fscrypt.h | 4 ++-- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/fs/crypto/keyring.c b/fs/crypto/keyring.c index 175b071beaf8..02f8bf8bd54d 100644 --- a/fs/crypto/keyring.c +++ b/fs/crypto/keyring.c @@ -202,14 +202,19 @@ static int allocate_filesystem_keyring(struct super_block *sb) } /* - * This is called at unmount time to release all encryption keys that have been - * added to the filesystem, along with the keyring that contains them. + * Release all encryption keys that have been added to the filesystem, along + * with the keyring that contains them. * - * Note that besides clearing and freeing memory, this might need to evict keys - * from the keyslots of an inline crypto engine. Therefore, this must be called - * while the filesystem's underlying block device(s) are still available. + * This is called at unmount time. The filesystem's underlying block device(s) + * are still available at this time; this is important because after user file + * accesses have been allowed, this function may need to evict keys from the + * keyslots of an inline crypto engine, which requires the block device(s). + * + * This is also called when the super_block is being freed. This is needed to + * avoid a memory leak if mounting fails after the "test_dummy_encryption" + * option was processed, as in that case the unmount-time call isn't made. */ -void fscrypt_sb_delete(struct super_block *sb) +void fscrypt_destroy_keyring(struct super_block *sb) { struct fscrypt_keyring *keyring = sb->s_master_keys; size_t i; diff --git a/fs/super.c b/fs/super.c index beee3e1b4c29..7629f9dd031c 100644 --- a/fs/super.c +++ b/fs/super.c @@ -293,6 +293,7 @@ static void __put_super(struct super_block *s) WARN_ON(s->s_inode_lru.node); WARN_ON(!list_empty(&s->s_mounts)); security_sb_free(s); + fscrypt_destroy_keyring(s); put_user_ns(s->s_user_ns); kfree(s->s_subtype); call_rcu(&s->rcu, destroy_super_rcu); @@ -453,7 +454,7 @@ void generic_shutdown_super(struct super_block *sb) evict_inodes(sb); /* only nonzero refcount inodes can have marks */ fsnotify_sb_delete(sb); - fscrypt_sb_delete(sb); + fscrypt_destroy_keyring(sb); if (sb->s_dio_done_wq) { destroy_workqueue(sb->s_dio_done_wq); diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 23d3ea47f764..d0bc66fae7e0 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -193,7 +193,7 @@ fscrypt_free_dummy_policy(struct fscrypt_dummy_policy *dummy_policy) } /* keyring.c */ -void fscrypt_sb_delete(struct super_block *sb); +void fscrypt_destroy_keyring(struct super_block *sb); int fscrypt_ioctl_add_key(struct file *filp, void __user *arg); int fscrypt_ioctl_remove_key(struct file *filp, void __user *arg); int fscrypt_ioctl_remove_key_all_users(struct file *filp, void __user *arg); @@ -380,7 +380,7 @@ fscrypt_free_dummy_policy(struct fscrypt_dummy_policy *dummy_policy) } /* keyring.c */ -static inline void fscrypt_sb_delete(struct super_block *sb) +static inline void fscrypt_destroy_keyring(struct super_block *sb) { } From 818c36b988b82f31e4be8ad8415e1be902b8e5f8 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 6 Oct 2022 11:53:45 -0700 Subject: [PATCH 0643/1477] tcp/udp: Fix memory leak in ipv6_renew_options(). commit 3c52c6bb831f6335c176a0fc7214e26f43adbd11 upstream. syzbot reported a memory leak [0] related to IPV6_ADDRFORM. The scenario is that while one thread is converting an IPv6 socket into IPv4 with IPV6_ADDRFORM, another thread calls do_ipv6_setsockopt() and allocates memory to inet6_sk(sk)->XXX after conversion. Then, the converted sk with (tcp|udp)_prot never frees the IPv6 resources, which inet6_destroy_sock() should have cleaned up. setsockopt(IPV6_ADDRFORM) setsockopt(IPV6_DSTOPTS) +-----------------------+ +----------------------+ - do_ipv6_setsockopt(sk, ...) - sockopt_lock_sock(sk) - do_ipv6_setsockopt(sk, ...) - lock_sock(sk) ^._ called via tcpv6_prot - WRITE_ONCE(sk->sk_prot, &tcp_prot) before WRITE_ONCE() - xchg(&np->opt, NULL) - txopt_put(opt) - sockopt_release_sock(sk) - release_sock(sk) - sockopt_lock_sock(sk) - lock_sock(sk) - ipv6_set_opt_hdr(sk, ...) - ipv6_update_options(sk, opt) - xchg(&inet6_sk(sk)->opt, opt) ^._ opt is never freed. - sockopt_release_sock(sk) - release_sock(sk) Since IPV6_DSTOPTS allocates options under lock_sock(), we can avoid this memory leak by testing whether sk_family is changed by IPV6_ADDRFORM after acquiring the lock. This issue exists from the initial commit between IPV6_ADDRFORM and IPV6_PKTOPTIONS. [0]: BUG: memory leak unreferenced object 0xffff888009ab9f80 (size 96): comm "syz-executor583", pid 328, jiffies 4294916198 (age 13.034s) hex dump (first 32 bytes): 01 00 00 00 48 00 00 00 08 00 00 00 00 00 00 00 ....H........... 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<000000002ee98ae1>] kmalloc include/linux/slab.h:605 [inline] [<000000002ee98ae1>] sock_kmalloc+0xb3/0x100 net/core/sock.c:2566 [<0000000065d7b698>] ipv6_renew_options+0x21e/0x10b0 net/ipv6/exthdrs.c:1318 [<00000000a8c756d7>] ipv6_set_opt_hdr net/ipv6/ipv6_sockglue.c:354 [inline] [<00000000a8c756d7>] do_ipv6_setsockopt.constprop.0+0x28b7/0x4350 net/ipv6/ipv6_sockglue.c:668 [<000000002854d204>] ipv6_setsockopt+0xdf/0x190 net/ipv6/ipv6_sockglue.c:1021 [<00000000e69fdcf8>] tcp_setsockopt+0x13b/0x2620 net/ipv4/tcp.c:3789 [<0000000090da4b9b>] __sys_setsockopt+0x239/0x620 net/socket.c:2252 [<00000000b10d192f>] __do_sys_setsockopt net/socket.c:2263 [inline] [<00000000b10d192f>] __se_sys_setsockopt net/socket.c:2260 [inline] [<00000000b10d192f>] __x64_sys_setsockopt+0xbe/0x160 net/socket.c:2260 [<000000000a80d7aa>] do_syscall_x64 arch/x86/entry/common.c:50 [inline] [<000000000a80d7aa>] do_syscall_64+0x38/0x90 arch/x86/entry/common.c:80 [<000000004562b5c6>] entry_SYSCALL_64_after_hwframe+0x63/0xcd Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot Signed-off-by: Kuniyuki Iwashima Signed-off-by: Jakub Kicinski Signed-off-by: Meena Shanmugam Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ipv6_sockglue.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 6fa118bf40cd..2017257cb278 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -417,6 +417,12 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, rtnl_lock(); lock_sock(sk); + /* Another thread has converted the socket into IPv4 with + * IPV6_ADDRFORM concurrently. + */ + if (unlikely(sk->sk_family != AF_INET6)) + goto unlock; + switch (optname) { case IPV6_ADDRFORM: @@ -976,6 +982,7 @@ done: break; } +unlock: release_sock(sk); if (needs_rtnl) rtnl_unlock(); From e4e4b24b42e710db058cc2a79a7cf16bf02b4915 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Fri, 1 Jul 2022 13:03:41 +0200 Subject: [PATCH 0644/1477] mtd: rawnand: gpmi: Set WAIT_FOR_READY timeout based on program/erase times MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 0fddf9ad06fd9f439f137139861556671673e31c upstream. 06781a5026350 Fixes the calculation of the DEVICE_BUSY_TIMEOUT register value from busy_timeout_cycles. busy_timeout_cycles is calculated wrong though: It is calculated based on the maximum page read time, but the timeout is also used for page write and block erase operations which require orders of magnitude bigger timeouts. Fix this by calculating busy_timeout_cycles from the maximum of tBERS_max and tPROG_max. This is for now the easiest and most obvious way to fix the driver. There's room for improvements though: The NAND_OP_WAITRDY_INSTR tells us the desired timeout for the current operation, so we could program the timeout dynamically for each operation instead of setting a fixed timeout. Also we could wire up the interrupt handler to actually detect and forward timeouts occurred when waiting for the chip being ready. As a sidenote I verified that the change in 06781a5026350 is really correct. I wired up the interrupt handler in my tree and measured the time between starting the operation and the timeout interrupt handler coming in. The time increases 41us with each step in the timeout register which corresponds to 4096 clock cycles with the 99MHz clock that I have. Fixes: 06781a5026350 ("mtd: rawnand: gpmi: Fix setting busy timeout setting") Fixes: b1206122069aa ("mtd: rawniand: gpmi: use core timings instead of an empirical derivation") Cc: stable@vger.kernel.org Signed-off-by: Sascha Hauer Acked-by: Han Xu Tested-by: Tomasz Moń Signed-off-by: Richard Weinberger Signed-off-by: Tim Harvey Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c index 92e8ca56f566..200d3ab343b0 100644 --- a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c +++ b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c @@ -653,8 +653,9 @@ static void gpmi_nfc_compute_timings(struct gpmi_nand_data *this, unsigned int tRP_ps; bool use_half_period; int sample_delay_ps, sample_delay_factor; - u16 busy_timeout_cycles; + unsigned int busy_timeout_cycles; u8 wrn_dly_sel; + u64 busy_timeout_ps; if (sdr->tRC_min >= 30000) { /* ONFI non-EDO modes [0-3] */ @@ -678,7 +679,8 @@ static void gpmi_nfc_compute_timings(struct gpmi_nand_data *this, addr_setup_cycles = TO_CYCLES(sdr->tALS_min, period_ps); data_setup_cycles = TO_CYCLES(sdr->tDS_min, period_ps); data_hold_cycles = TO_CYCLES(sdr->tDH_min, period_ps); - busy_timeout_cycles = TO_CYCLES(sdr->tWB_max + sdr->tR_max, period_ps); + busy_timeout_ps = max(sdr->tBERS_max, sdr->tPROG_max); + busy_timeout_cycles = TO_CYCLES(busy_timeout_ps, period_ps); hw->timing0 = BF_GPMI_TIMING0_ADDRESS_SETUP(addr_setup_cycles) | BF_GPMI_TIMING0_DATA_HOLD(data_hold_cycles) | From 836686e1a01d7e2fda6a5a18252243ff30a6e196 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Thu, 2 Sep 2021 14:55:31 -0700 Subject: [PATCH 0645/1477] memcg: enable accounting of ipc resources commit 18319498fdd4cdf8c1c2c48cd432863b1f915d6f upstream. When user creates IPC objects it forces kernel to allocate memory for these long-living objects. It makes sense to account them to restrict the host's memory consumption from inside the memcg-limited container. This patch enables accounting for IPC shared memory segments, messages semaphores and semaphore's undo lists. Link: https://lkml.kernel.org/r/d6507b06-4df6-78f8-6c54-3ae86e3b5339@virtuozzo.com Signed-off-by: Vasily Averin Reviewed-by: Shakeel Butt Cc: Alexander Viro Cc: Alexey Dobriyan Cc: Andrei Vagin Cc: Borislav Petkov Cc: Borislav Petkov Cc: Christian Brauner Cc: Dmitry Safonov <0x7f454c46@gmail.com> Cc: "Eric W. Biederman" Cc: Greg Kroah-Hartman Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: "J. Bruce Fields" Cc: Jeff Layton Cc: Jens Axboe Cc: Jiri Slaby Cc: Johannes Weiner Cc: Kirill Tkhai Cc: Michal Hocko Cc: Oleg Nesterov Cc: Roman Gushchin Cc: Serge Hallyn Cc: Tejun Heo Cc: Thomas Gleixner Cc: Vladimir Davydov Cc: Yutian Yang Cc: Zefan Li Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Luiz Capitulino Signed-off-by: Greg Kroah-Hartman --- ipc/msg.c | 2 +- ipc/sem.c | 9 +++++---- ipc/shm.c | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/ipc/msg.c b/ipc/msg.c index 6e6c8e0c9380..8ded6b8f10a2 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -147,7 +147,7 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params) key_t key = params->key; int msgflg = params->flg; - msq = kvmalloc(sizeof(*msq), GFP_KERNEL); + msq = kvmalloc(sizeof(*msq), GFP_KERNEL_ACCOUNT); if (unlikely(!msq)) return -ENOMEM; diff --git a/ipc/sem.c b/ipc/sem.c index 7d9c06b0ad6e..d3b9b73cd9ca 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -511,7 +511,7 @@ static struct sem_array *sem_alloc(size_t nsems) if (nsems > (INT_MAX - sizeof(*sma)) / sizeof(sma->sems[0])) return NULL; - sma = kvzalloc(struct_size(sma, sems, nsems), GFP_KERNEL); + sma = kvzalloc(struct_size(sma, sems, nsems), GFP_KERNEL_ACCOUNT); if (unlikely(!sma)) return NULL; @@ -1852,7 +1852,7 @@ static inline int get_undo_list(struct sem_undo_list **undo_listp) undo_list = current->sysvsem.undo_list; if (!undo_list) { - undo_list = kzalloc(sizeof(*undo_list), GFP_KERNEL); + undo_list = kzalloc(sizeof(*undo_list), GFP_KERNEL_ACCOUNT); if (undo_list == NULL) return -ENOMEM; spin_lock_init(&undo_list->lock); @@ -1937,7 +1937,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) rcu_read_unlock(); /* step 2: allocate new undo structure */ - new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); + new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL_ACCOUNT); if (!new) { ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); return ERR_PTR(-ENOMEM); @@ -2001,7 +2001,8 @@ static long do_semtimedop(int semid, struct sembuf __user *tsops, if (nsops > ns->sc_semopm) return -E2BIG; if (nsops > SEMOPM_FAST) { - sops = kvmalloc_array(nsops, sizeof(*sops), GFP_KERNEL); + sops = kvmalloc_array(nsops, sizeof(*sops), + GFP_KERNEL_ACCOUNT); if (sops == NULL) return -ENOMEM; } diff --git a/ipc/shm.c b/ipc/shm.c index 471ac3e7498d..b418731d66e8 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -711,7 +711,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) ns->shm_tot + numpages > ns->shm_ctlall) return -ENOSPC; - shp = kvmalloc(sizeof(*shp), GFP_KERNEL); + shp = kvmalloc(sizeof(*shp), GFP_KERNEL_ACCOUNT); if (unlikely(!shp)) return -ENOMEM; From 015ac18be7de25d17d6e5f1643cb3b60bfbe859e Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Fri, 4 Nov 2022 17:54:49 +0000 Subject: [PATCH 0646/1477] binder: fix UAF of alloc->vma in race with munmap() In commit 720c24192404 ("ANDROID: binder: change down_write to down_read") binder assumed the mmap read lock is sufficient to protect alloc->vma inside binder_update_page_range(). This used to be accurate until commit dd2283f2605e ("mm: mmap: zap pages with read mmap_sem in munmap"), which now downgrades the mmap_lock after detaching the vma from the rbtree in munmap(). Then it proceeds to teardown and free the vma with only the read lock held. This means that accesses to alloc->vma in binder_update_page_range() now will race with vm_area_free() in munmap() and can cause a UAF as shown in the following KASAN trace: ================================================================== BUG: KASAN: use-after-free in vm_insert_page+0x7c/0x1f0 Read of size 8 at addr ffff16204ad00600 by task server/558 CPU: 3 PID: 558 Comm: server Not tainted 5.10.150-00001-gdc8dcf942daa #1 Hardware name: linux,dummy-virt (DT) Call trace: dump_backtrace+0x0/0x2a0 show_stack+0x18/0x2c dump_stack+0xf8/0x164 print_address_description.constprop.0+0x9c/0x538 kasan_report+0x120/0x200 __asan_load8+0xa0/0xc4 vm_insert_page+0x7c/0x1f0 binder_update_page_range+0x278/0x50c binder_alloc_new_buf+0x3f0/0xba0 binder_transaction+0x64c/0x3040 binder_thread_write+0x924/0x2020 binder_ioctl+0x1610/0x2e5c __arm64_sys_ioctl+0xd4/0x120 el0_svc_common.constprop.0+0xac/0x270 do_el0_svc+0x38/0xa0 el0_svc+0x1c/0x2c el0_sync_handler+0xe8/0x114 el0_sync+0x180/0x1c0 Allocated by task 559: kasan_save_stack+0x38/0x6c __kasan_kmalloc.constprop.0+0xe4/0xf0 kasan_slab_alloc+0x18/0x2c kmem_cache_alloc+0x1b0/0x2d0 vm_area_alloc+0x28/0x94 mmap_region+0x378/0x920 do_mmap+0x3f0/0x600 vm_mmap_pgoff+0x150/0x17c ksys_mmap_pgoff+0x284/0x2dc __arm64_sys_mmap+0x84/0xa4 el0_svc_common.constprop.0+0xac/0x270 do_el0_svc+0x38/0xa0 el0_svc+0x1c/0x2c el0_sync_handler+0xe8/0x114 el0_sync+0x180/0x1c0 Freed by task 560: kasan_save_stack+0x38/0x6c kasan_set_track+0x28/0x40 kasan_set_free_info+0x24/0x4c __kasan_slab_free+0x100/0x164 kasan_slab_free+0x14/0x20 kmem_cache_free+0xc4/0x34c vm_area_free+0x1c/0x2c remove_vma+0x7c/0x94 __do_munmap+0x358/0x710 __vm_munmap+0xbc/0x130 __arm64_sys_munmap+0x4c/0x64 el0_svc_common.constprop.0+0xac/0x270 do_el0_svc+0x38/0xa0 el0_svc+0x1c/0x2c el0_sync_handler+0xe8/0x114 el0_sync+0x180/0x1c0 [...] ================================================================== To prevent the race above, revert back to taking the mmap write lock inside binder_update_page_range(). One might expect an increase of mmap lock contention. However, binder already serializes these calls via top level alloc->mutex. Also, there was no performance impact shown when running the binder benchmark tests. Note this patch is specific to stable branches 5.4 and 5.10. Since in newer kernel releases binder no longer caches a pointer to the vma. Instead, it has been refactored to use vma_lookup() which avoids the issue described here. This switch was introduced in commit a43cfc87caaf ("android: binder: stop saving a pointer to the VMA"). Fixes: dd2283f2605e ("mm: mmap: zap pages with read mmap_sem in munmap") Reported-by: Jann Horn Cc: # 5.10.x Cc: Minchan Kim Cc: Yang Shi Cc: Liam Howlett Signed-off-by: Carlos Llamas Acked-by: Todd Kjos Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder_alloc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index 95ca4f934d28..a77ed66425f2 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -212,7 +212,7 @@ static int binder_update_page_range(struct binder_alloc *alloc, int allocate, mm = alloc->vma_vm_mm; if (mm) { - mmap_read_lock(mm); + mmap_write_lock(mm); vma = alloc->vma; } @@ -270,7 +270,7 @@ static int binder_update_page_range(struct binder_alloc *alloc, int allocate, trace_binder_alloc_page_end(alloc, index); } if (mm) { - mmap_read_unlock(mm); + mmap_write_unlock(mm); mmput(mm); } return 0; @@ -303,7 +303,7 @@ err_page_ptr_cleared: } err_no_vma: if (mm) { - mmap_read_unlock(mm); + mmap_write_unlock(mm); mmput(mm); } return vma ? -ENOMEM : -ESRCH; From e33ce54cef5d429430e3b1ae5c8ee4f4103c4fdc Mon Sep 17 00:00:00 2001 From: James Clark Date: Tue, 25 Oct 2022 14:10:32 +0100 Subject: [PATCH 0647/1477] coresight: cti: Fix hang in cti_disable_hw() commit 6746eae4bbaddcc16b40efb33dab79210828b3ce upstream. cti_enable_hw() and cti_disable_hw() are called from an atomic context so shouldn't use runtime PM because it can result in a sleep when communicating with firmware. Since commit 3c6656337852 ("Revert "firmware: arm_scmi: Add clock management to the SCMI power domain""), this causes a hang on Juno when running the Perf Coresight tests or running this command: perf record -e cs_etm//u -- ls This was also missed until the revert commit because pm_runtime_put() was called with the wrong device until commit 692c9a499b28 ("coresight: cti: Correct the parameter for pm_runtime_put") With lock and scheduler debugging enabled the following is output: coresight cti_sys0: cti_enable_hw -- dev:cti_sys0 parent: 20020000.cti BUG: sleeping function called from invalid context at drivers/base/power/runtime.c:1151 in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid: 330, name: perf-exec preempt_count: 2, expected: 0 RCU nest depth: 0, expected: 0 INFO: lockdep is turned off. irq event stamp: 0 hardirqs last enabled at (0): [<0000000000000000>] 0x0 hardirqs last disabled at (0): [] copy_process+0xa0c/0x1948 softirqs last enabled at (0): [] copy_process+0xa0c/0x1948 softirqs last disabled at (0): [<0000000000000000>] 0x0 CPU: 3 PID: 330 Comm: perf-exec Not tainted 6.0.0-00053-g042116d99298 #7 Hardware name: ARM LTD ARM Juno Development Platform/ARM Juno Development Platform, BIOS EDK II Sep 13 2022 Call trace: dump_backtrace+0x134/0x140 show_stack+0x20/0x58 dump_stack_lvl+0x8c/0xb8 dump_stack+0x18/0x34 __might_resched+0x180/0x228 __might_sleep+0x50/0x88 __pm_runtime_resume+0xac/0xb0 cti_enable+0x44/0x120 coresight_control_assoc_ectdev+0xc0/0x150 coresight_enable_path+0xb4/0x288 etm_event_start+0x138/0x170 etm_event_add+0x48/0x70 event_sched_in.isra.122+0xb4/0x280 merge_sched_in+0x1fc/0x3d0 visit_groups_merge.constprop.137+0x16c/0x4b0 ctx_sched_in+0x114/0x1f0 perf_event_sched_in+0x60/0x90 ctx_resched+0x68/0xb0 perf_event_exec+0x138/0x508 begin_new_exec+0x52c/0xd40 load_elf_binary+0x6b8/0x17d0 bprm_execve+0x360/0x7f8 do_execveat_common.isra.47+0x218/0x238 __arm64_sys_execve+0x48/0x60 invoke_syscall+0x4c/0x110 el0_svc_common.constprop.4+0xfc/0x120 do_el0_svc+0x34/0xc0 el0_svc+0x40/0x98 el0t_64_sync_handler+0x98/0xc0 el0t_64_sync+0x170/0x174 Fix the issue by removing the runtime PM calls completely. They are not needed here because it must have already been done when building the path for a trace. Fixes: 835d722ba10a ("coresight: cti: Initial CoreSight CTI Driver") Cc: stable Reported-by: Aishwarya TCV Reported-by: Cristian Marussi Suggested-by: Suzuki K Poulose Signed-off-by: James Clark Reviewed-by: Mike Leach Tested-by: Mike Leach [ Fix build warnings ] Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20221025131032.1149459-1-suzuki.poulose@arm.com Signed-off-by: James Clark Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/coresight/coresight-cti-core.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-cti-core.c b/drivers/hwtracing/coresight/coresight-cti-core.c index 0276700c246d..90270696206c 100644 --- a/drivers/hwtracing/coresight/coresight-cti-core.c +++ b/drivers/hwtracing/coresight/coresight-cti-core.c @@ -90,11 +90,9 @@ void cti_write_all_hw_regs(struct cti_drvdata *drvdata) static int cti_enable_hw(struct cti_drvdata *drvdata) { struct cti_config *config = &drvdata->config; - struct device *dev = &drvdata->csdev->dev; unsigned long flags; int rc = 0; - pm_runtime_get_sync(dev->parent); spin_lock_irqsave(&drvdata->spinlock, flags); /* no need to do anything if enabled or unpowered*/ @@ -119,7 +117,6 @@ cti_state_unchanged: /* cannot enable due to error */ cti_err_not_enabled: spin_unlock_irqrestore(&drvdata->spinlock, flags); - pm_runtime_put(dev->parent); return rc; } @@ -153,7 +150,6 @@ cti_hp_not_enabled: static int cti_disable_hw(struct cti_drvdata *drvdata) { struct cti_config *config = &drvdata->config; - struct device *dev = &drvdata->csdev->dev; spin_lock(&drvdata->spinlock); @@ -174,7 +170,6 @@ static int cti_disable_hw(struct cti_drvdata *drvdata) coresight_disclaim_device_unlocked(drvdata->base); CS_LOCK(drvdata->base); spin_unlock(&drvdata->spinlock); - pm_runtime_put(dev->parent); return 0; /* not disabled this call */ From 06de93a47cec1920a7c8d99577c30f80b97b5f0c Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 18 Oct 2022 16:05:52 +0200 Subject: [PATCH 0648/1477] btrfs: fix type of parameter generation in btrfs_get_dentry commit 2398091f9c2c8e0040f4f9928666787a3e8108a7 upstream. The type of parameter generation has been u32 since the beginning, however all callers pass a u64 generation, so unify the types to prevent potential loss. CC: stable@vger.kernel.org # 4.9+ Reviewed-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/export.c | 2 +- fs/btrfs/export.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index 1a8d419d9e1f..bfa2bf44529c 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c @@ -58,7 +58,7 @@ static int btrfs_encode_fh(struct inode *inode, u32 *fh, int *max_len, } struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, - u64 root_objectid, u32 generation, + u64 root_objectid, u64 generation, int check_generation) { struct btrfs_fs_info *fs_info = btrfs_sb(sb); diff --git a/fs/btrfs/export.h b/fs/btrfs/export.h index f32f4113c976..5afb7ca42828 100644 --- a/fs/btrfs/export.h +++ b/fs/btrfs/export.h @@ -19,7 +19,7 @@ struct btrfs_fid { } __attribute__ ((packed)); struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, - u64 root_objectid, u32 generation, + u64 root_objectid, u64 generation, int check_generation); struct dentry *btrfs_get_parent(struct dentry *child); From ea5f2fd4640ecbb9df969bf8bb27733ae2183169 Mon Sep 17 00:00:00 2001 From: Li Huafei Date: Thu, 3 Nov 2022 11:10:10 +0800 Subject: [PATCH 0649/1477] ftrace: Fix use-after-free for dynamic ftrace_ops commit 0e792b89e6800cd9cb4757a76a96f7ef3e8b6294 upstream. KASAN reported a use-after-free with ftrace ops [1]. It was found from vmcore that perf had registered two ops with the same content successively, both dynamic. After unregistering the second ops, a use-after-free occurred. In ftrace_shutdown(), when the second ops is unregistered, the FTRACE_UPDATE_CALLS command is not set because there is another enabled ops with the same content. Also, both ops are dynamic and the ftrace callback function is ftrace_ops_list_func, so the FTRACE_UPDATE_TRACE_FUNC command will not be set. Eventually the value of 'command' will be 0 and ftrace_shutdown() will skip the rcu synchronization. However, ftrace may be activated. When the ops is released, another CPU may be accessing the ops. Add the missing synchronization to fix this problem. [1] BUG: KASAN: use-after-free in __ftrace_ops_list_func kernel/trace/ftrace.c:7020 [inline] BUG: KASAN: use-after-free in ftrace_ops_list_func+0x2b0/0x31c kernel/trace/ftrace.c:7049 Read of size 8 at addr ffff56551965bbc8 by task syz-executor.2/14468 CPU: 1 PID: 14468 Comm: syz-executor.2 Not tainted 5.10.0 #7 Hardware name: linux,dummy-virt (DT) Call trace: dump_backtrace+0x0/0x40c arch/arm64/kernel/stacktrace.c:132 show_stack+0x30/0x40 arch/arm64/kernel/stacktrace.c:196 __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1b4/0x248 lib/dump_stack.c:118 print_address_description.constprop.0+0x28/0x48c mm/kasan/report.c:387 __kasan_report mm/kasan/report.c:547 [inline] kasan_report+0x118/0x210 mm/kasan/report.c:564 check_memory_region_inline mm/kasan/generic.c:187 [inline] __asan_load8+0x98/0xc0 mm/kasan/generic.c:253 __ftrace_ops_list_func kernel/trace/ftrace.c:7020 [inline] ftrace_ops_list_func+0x2b0/0x31c kernel/trace/ftrace.c:7049 ftrace_graph_call+0x0/0x4 __might_sleep+0x8/0x100 include/linux/perf_event.h:1170 __might_fault mm/memory.c:5183 [inline] __might_fault+0x58/0x70 mm/memory.c:5171 do_strncpy_from_user lib/strncpy_from_user.c:41 [inline] strncpy_from_user+0x1f4/0x4b0 lib/strncpy_from_user.c:139 getname_flags+0xb0/0x31c fs/namei.c:149 getname+0x2c/0x40 fs/namei.c:209 [...] Allocated by task 14445: kasan_save_stack+0x24/0x50 mm/kasan/common.c:48 kasan_set_track mm/kasan/common.c:56 [inline] __kasan_kmalloc mm/kasan/common.c:479 [inline] __kasan_kmalloc.constprop.0+0x110/0x13c mm/kasan/common.c:449 kasan_kmalloc+0xc/0x14 mm/kasan/common.c:493 kmem_cache_alloc_trace+0x440/0x924 mm/slub.c:2950 kmalloc include/linux/slab.h:563 [inline] kzalloc include/linux/slab.h:675 [inline] perf_event_alloc.part.0+0xb4/0x1350 kernel/events/core.c:11230 perf_event_alloc kernel/events/core.c:11733 [inline] __do_sys_perf_event_open kernel/events/core.c:11831 [inline] __se_sys_perf_event_open+0x550/0x15f4 kernel/events/core.c:11723 __arm64_sys_perf_event_open+0x6c/0x80 kernel/events/core.c:11723 [...] Freed by task 14445: kasan_save_stack+0x24/0x50 mm/kasan/common.c:48 kasan_set_track+0x24/0x34 mm/kasan/common.c:56 kasan_set_free_info+0x20/0x40 mm/kasan/generic.c:358 __kasan_slab_free.part.0+0x11c/0x1b0 mm/kasan/common.c:437 __kasan_slab_free mm/kasan/common.c:445 [inline] kasan_slab_free+0x2c/0x40 mm/kasan/common.c:446 slab_free_hook mm/slub.c:1569 [inline] slab_free_freelist_hook mm/slub.c:1608 [inline] slab_free mm/slub.c:3179 [inline] kfree+0x12c/0xc10 mm/slub.c:4176 perf_event_alloc.part.0+0xa0c/0x1350 kernel/events/core.c:11434 perf_event_alloc kernel/events/core.c:11733 [inline] __do_sys_perf_event_open kernel/events/core.c:11831 [inline] __se_sys_perf_event_open+0x550/0x15f4 kernel/events/core.c:11723 [...] Link: https://lore.kernel.org/linux-trace-kernel/20221103031010.166498-1-lihuafei1@huawei.com Fixes: edb096e00724f ("ftrace: Fix memleak when unregistering dynamic ops when tracing disabled") Cc: stable@vger.kernel.org Suggested-by: Steven Rostedt Signed-off-by: Li Huafei Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ftrace.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 2165c9ac14bf..8e9ef0f55596 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2946,18 +2946,8 @@ int ftrace_shutdown(struct ftrace_ops *ops, int command) command |= FTRACE_UPDATE_TRACE_FUNC; } - if (!command || !ftrace_enabled) { - /* - * If these are dynamic or per_cpu ops, they still - * need their data freed. Since, function tracing is - * not currently active, we can just free them - * without synchronizing all CPUs. - */ - if (ops->flags & FTRACE_OPS_FL_DYNAMIC) - goto free_ops; - - return 0; - } + if (!command || !ftrace_enabled) + goto out; /* * If the ops uses a trampoline, then it needs to be @@ -2994,6 +2984,7 @@ int ftrace_shutdown(struct ftrace_ops *ops, int command) removed_ops = NULL; ops->flags &= ~FTRACE_OPS_FL_REMOVING; +out: /* * Dynamic ops may be freed, we must make sure that all * callers are done before leaving this function. @@ -3021,7 +3012,6 @@ int ftrace_shutdown(struct ftrace_ops *ops, int command) if (IS_ENABLED(CONFIG_PREEMPTION)) synchronize_rcu_tasks(); - free_ops: ftrace_trampoline_free(ops); } From 2bf33b5ea46dbe547de44cdcbee6b1c0b6c167d4 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 13 Jul 2022 10:52:07 -0700 Subject: [PATCH 0650/1477] tcp/udp: Make early_demux back namespacified. commit 11052589cf5c0bab3b4884d423d5f60c38fcf25d upstream. Commit e21145a9871a ("ipv4: namespacify ip_early_demux sysctl knob") made it possible to enable/disable early_demux on a per-netns basis. Then, we introduced two knobs, tcp_early_demux and udp_early_demux, to switch it for TCP/UDP in commit dddb64bcb346 ("net: Add sysctl to toggle early demux for tcp and udp"). However, the .proc_handler() was wrong and actually disabled us from changing the behaviour in each netns. We can execute early_demux if net.ipv4.ip_early_demux is on and each proto .early_demux() handler is not NULL. When we toggle (tcp|udp)_early_demux, the change itself is saved in each netns variable, but the .early_demux() handler is a global variable, so the handler is switched based on the init_net's sysctl variable. Thus, netns (tcp|udp)_early_demux knobs have nothing to do with the logic. Whether we CAN execute proto .early_demux() is always decided by init_net's sysctl knob, and whether we DO it or not is by each netns ip_early_demux knob. This patch namespacifies (tcp|udp)_early_demux again. For now, the users of the .early_demux() handler are TCP and UDP only, and they are called directly to avoid retpoline. So, we can remove the .early_demux() handler from inet6?_protos and need not dereference them in ip6?_rcv_finish_core(). If another proto needs .early_demux(), we can restore it at that time. Fixes: dddb64bcb346 ("net: Add sysctl to toggle early demux for tcp and udp") Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20220713175207.7727-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- include/net/protocol.h | 4 --- include/net/tcp.h | 2 +- include/net/udp.h | 1 + net/ipv4/af_inet.c | 14 ++------- net/ipv4/ip_input.c | 35 +++++++++++++--------- net/ipv4/sysctl_net_ipv4.c | 59 ++------------------------------------ net/ipv6/ip6_input.c | 26 ++++++++++------- net/ipv6/tcp_ipv6.c | 9 ++---- net/ipv6/udp.c | 9 ++---- 9 files changed, 46 insertions(+), 113 deletions(-) diff --git a/include/net/protocol.h b/include/net/protocol.h index 2b778e1d2d8f..0fd2df844fc7 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -35,8 +35,6 @@ /* This is used to register protocols. */ struct net_protocol { - int (*early_demux)(struct sk_buff *skb); - int (*early_demux_handler)(struct sk_buff *skb); int (*handler)(struct sk_buff *skb); /* This returns an error if we weren't able to handle the error. */ @@ -53,8 +51,6 @@ struct net_protocol { #if IS_ENABLED(CONFIG_IPV6) struct inet6_protocol { - void (*early_demux)(struct sk_buff *skb); - void (*early_demux_handler)(struct sk_buff *skb); int (*handler)(struct sk_buff *skb); /* This returns an error if we weren't able to handle the error. */ diff --git a/include/net/tcp.h b/include/net/tcp.h index bf4af27f5620..9a8d98639b20 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -934,7 +934,7 @@ extern const struct inet_connection_sock_af_ops ipv6_specific; INDIRECT_CALLABLE_DECLARE(void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)); INDIRECT_CALLABLE_DECLARE(int tcp_v6_rcv(struct sk_buff *skb)); -INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *skb)); +void tcp_v6_early_demux(struct sk_buff *skb); #endif diff --git a/include/net/udp.h b/include/net/udp.h index 010bc324f860..388e68c7bca0 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -176,6 +176,7 @@ INDIRECT_CALLABLE_DECLARE(int udp6_gro_complete(struct sk_buff *, int)); struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, struct udphdr *uh, struct sock *sk); int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup); +void udp_v6_early_demux(struct sk_buff *skb); struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, netdev_features_t features, bool is_ipv6); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 87d73a3e92ba..48223c264991 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1726,12 +1726,7 @@ static const struct net_protocol igmp_protocol = { }; #endif -/* thinking of making this const? Don't. - * early_demux can change based on sysctl. - */ -static struct net_protocol tcp_protocol = { - .early_demux = tcp_v4_early_demux, - .early_demux_handler = tcp_v4_early_demux, +static const struct net_protocol tcp_protocol = { .handler = tcp_v4_rcv, .err_handler = tcp_v4_err, .no_policy = 1, @@ -1739,12 +1734,7 @@ static struct net_protocol tcp_protocol = { .icmp_strict_tag_validation = 1, }; -/* thinking of making this const? Don't. - * early_demux can change based on sysctl. - */ -static struct net_protocol udp_protocol = { - .early_demux = udp_v4_early_demux, - .early_demux_handler = udp_v4_early_demux, +static const struct net_protocol udp_protocol = { .handler = udp_rcv, .err_handler = udp_err, .no_policy = 1, diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index b0c244af1e4d..f6b3237e88ca 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -309,14 +309,13 @@ static bool ip_can_use_hint(const struct sk_buff *skb, const struct iphdr *iph, ip_hdr(hint)->tos == iph->tos; } -INDIRECT_CALLABLE_DECLARE(int udp_v4_early_demux(struct sk_buff *)); -INDIRECT_CALLABLE_DECLARE(int tcp_v4_early_demux(struct sk_buff *)); +int tcp_v4_early_demux(struct sk_buff *skb); +int udp_v4_early_demux(struct sk_buff *skb); static int ip_rcv_finish_core(struct net *net, struct sock *sk, struct sk_buff *skb, struct net_device *dev, const struct sk_buff *hint) { const struct iphdr *iph = ip_hdr(skb); - int (*edemux)(struct sk_buff *skb); struct rtable *rt; int err; @@ -327,21 +326,29 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk, goto drop_error; } - if (net->ipv4.sysctl_ip_early_demux && + if (READ_ONCE(net->ipv4.sysctl_ip_early_demux) && !skb_dst(skb) && !skb->sk && !ip_is_fragment(iph)) { - const struct net_protocol *ipprot; - int protocol = iph->protocol; + switch (iph->protocol) { + case IPPROTO_TCP: + if (READ_ONCE(net->ipv4.sysctl_tcp_early_demux)) { + tcp_v4_early_demux(skb); - ipprot = rcu_dereference(inet_protos[protocol]); - if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) { - err = INDIRECT_CALL_2(edemux, tcp_v4_early_demux, - udp_v4_early_demux, skb); - if (unlikely(err)) - goto drop_error; - /* must reload iph, skb->head might have changed */ - iph = ip_hdr(skb); + /* must reload iph, skb->head might have changed */ + iph = ip_hdr(skb); + } + break; + case IPPROTO_UDP: + if (READ_ONCE(net->ipv4.sysctl_udp_early_demux)) { + err = udp_v4_early_demux(skb); + if (unlikely(err)) + goto drop_error; + + /* must reload iph, skb->head might have changed */ + iph = ip_hdr(skb); + } + break; } } diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 86f553864f98..439970e02ac6 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -361,61 +361,6 @@ bad_key: return ret; } -static void proc_configure_early_demux(int enabled, int protocol) -{ - struct net_protocol *ipprot; -#if IS_ENABLED(CONFIG_IPV6) - struct inet6_protocol *ip6prot; -#endif - - rcu_read_lock(); - - ipprot = rcu_dereference(inet_protos[protocol]); - if (ipprot) - ipprot->early_demux = enabled ? ipprot->early_demux_handler : - NULL; - -#if IS_ENABLED(CONFIG_IPV6) - ip6prot = rcu_dereference(inet6_protos[protocol]); - if (ip6prot) - ip6prot->early_demux = enabled ? ip6prot->early_demux_handler : - NULL; -#endif - rcu_read_unlock(); -} - -static int proc_tcp_early_demux(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) -{ - int ret = 0; - - ret = proc_dointvec(table, write, buffer, lenp, ppos); - - if (write && !ret) { - int enabled = init_net.ipv4.sysctl_tcp_early_demux; - - proc_configure_early_demux(enabled, IPPROTO_TCP); - } - - return ret; -} - -static int proc_udp_early_demux(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) -{ - int ret = 0; - - ret = proc_dointvec(table, write, buffer, lenp, ppos); - - if (write && !ret) { - int enabled = init_net.ipv4.sysctl_udp_early_demux; - - proc_configure_early_demux(enabled, IPPROTO_UDP); - } - - return ret; -} - static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) @@ -685,14 +630,14 @@ static struct ctl_table ipv4_net_table[] = { .data = &init_net.ipv4.sysctl_udp_early_demux, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_udp_early_demux + .proc_handler = proc_douintvec_minmax, }, { .procname = "tcp_early_demux", .data = &init_net.ipv4.sysctl_tcp_early_demux, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_tcp_early_demux + .proc_handler = proc_douintvec_minmax, }, { .procname = "nexthop_compat_mode", diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 15ea3d082534..4eb9fbfdce33 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -44,21 +44,25 @@ #include #include -INDIRECT_CALLABLE_DECLARE(void udp_v6_early_demux(struct sk_buff *)); -INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *)); +void udp_v6_early_demux(struct sk_buff *); +void tcp_v6_early_demux(struct sk_buff *); static void ip6_rcv_finish_core(struct net *net, struct sock *sk, struct sk_buff *skb) { - void (*edemux)(struct sk_buff *skb); - - if (net->ipv4.sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) { - const struct inet6_protocol *ipprot; - - ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]); - if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) - INDIRECT_CALL_2(edemux, tcp_v6_early_demux, - udp_v6_early_demux, skb); + if (READ_ONCE(net->ipv4.sysctl_ip_early_demux) && + !skb_dst(skb) && !skb->sk) { + switch (ipv6_hdr(skb)->nexthdr) { + case IPPROTO_TCP: + if (READ_ONCE(net->ipv4.sysctl_tcp_early_demux)) + tcp_v6_early_demux(skb); + break; + case IPPROTO_UDP: + if (READ_ONCE(net->ipv4.sysctl_udp_early_demux)) + udp_v6_early_demux(skb); + break; + } } + if (!skb_valid_dst(skb)) ip6_route_input(skb); } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c14eaec64a0b..a558dd9d177b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1818,7 +1818,7 @@ do_time_wait: goto discard_it; } -INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb) +void tcp_v6_early_demux(struct sk_buff *skb) { const struct ipv6hdr *hdr; const struct tcphdr *th; @@ -2169,12 +2169,7 @@ struct proto tcpv6_prot = { }; EXPORT_SYMBOL_GPL(tcpv6_prot); -/* thinking of making this const? Don't. - * early_demux can change based on sysctl. - */ -static struct inet6_protocol tcpv6_protocol = { - .early_demux = tcp_v6_early_demux, - .early_demux_handler = tcp_v6_early_demux, +static const struct inet6_protocol tcpv6_protocol = { .handler = tcp_v6_rcv, .err_handler = tcp_v6_err, .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 514e6a55959f..1805cc5f7418 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1027,7 +1027,7 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net, return NULL; } -INDIRECT_CALLABLE_SCOPE void udp_v6_early_demux(struct sk_buff *skb) +void udp_v6_early_demux(struct sk_buff *skb) { struct net *net = dev_net(skb->dev); const struct udphdr *uh; @@ -1640,12 +1640,7 @@ int udpv6_getsockopt(struct sock *sk, int level, int optname, return ipv6_getsockopt(sk, level, optname, optval, optlen); } -/* thinking of making this const? Don't. - * early_demux can change based on sysctl. - */ -static struct inet6_protocol udpv6_protocol = { - .early_demux = udp_v6_early_demux, - .early_demux_handler = udp_v6_early_demux, +static const struct inet6_protocol udpv6_protocol = { .handler = udpv6_rcv, .err_handler = udpv6_err, .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, From bef08acbe560a926b4cee9cc46404cc98ae5703b Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Wed, 2 Nov 2022 15:29:54 +0800 Subject: [PATCH 0651/1477] tracing: kprobe: Fix memory leak in test_gen_kprobe/kretprobe_cmd() commit 66f0919c953ef7b55e5ab94389a013da2ce80a2c upstream. test_gen_kprobe_cmd() only free buf in fail path, hence buf will leak when there is no failure. Move kfree(buf) from fail path to common path to prevent the memleak. The same reason and solution in test_gen_kretprobe_cmd(). unreferenced object 0xffff888143b14000 (size 2048): comm "insmod", pid 52490, jiffies 4301890980 (age 40.553s) hex dump (first 32 bytes): 70 3a 6b 70 72 6f 62 65 73 2f 67 65 6e 5f 6b 70 p:kprobes/gen_kp 72 6f 62 65 5f 74 65 73 74 20 64 6f 5f 73 79 73 robe_test do_sys backtrace: [<000000006d7b836b>] kmalloc_trace+0x27/0xa0 [<0000000009528b5b>] 0xffffffffa059006f [<000000008408b580>] do_one_initcall+0x87/0x2a0 [<00000000c4980a7e>] do_init_module+0xdf/0x320 [<00000000d775aad0>] load_module+0x3006/0x3390 [<00000000e9a74b80>] __do_sys_finit_module+0x113/0x1b0 [<000000003726480d>] do_syscall_64+0x35/0x80 [<000000003441e93b>] entry_SYSCALL_64_after_hwframe+0x46/0xb0 Link: https://lore.kernel.org/all/20221102072954.26555-1-shangxiaojing@huawei.com/ Fixes: 64836248dda2 ("tracing: Add kprobe event command generation test module") Cc: stable@vger.kernel.org Signed-off-by: Shang XiaoJing Acked-by: Masami Hiramatsu (Google) Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/kprobe_event_gen_test.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/kernel/trace/kprobe_event_gen_test.c b/kernel/trace/kprobe_event_gen_test.c index 80e04a1e1977..d81f7c51025c 100644 --- a/kernel/trace/kprobe_event_gen_test.c +++ b/kernel/trace/kprobe_event_gen_test.c @@ -100,20 +100,20 @@ static int __init test_gen_kprobe_cmd(void) KPROBE_GEN_TEST_FUNC, KPROBE_GEN_TEST_ARG0, KPROBE_GEN_TEST_ARG1); if (ret) - goto free; + goto out; /* Use kprobe_event_add_fields to add the rest of the fields */ ret = kprobe_event_add_fields(&cmd, KPROBE_GEN_TEST_ARG2, KPROBE_GEN_TEST_ARG3); if (ret) - goto free; + goto out; /* * This actually creates the event. */ ret = kprobe_event_gen_cmd_end(&cmd); if (ret) - goto free; + goto out; /* * Now get the gen_kprobe_test event file. We need to prevent @@ -136,13 +136,11 @@ static int __init test_gen_kprobe_cmd(void) goto delete; } out: + kfree(buf); return ret; delete: /* We got an error after creating the event, delete it */ ret = kprobe_event_delete("gen_kprobe_test"); - free: - kfree(buf); - goto out; } @@ -170,14 +168,14 @@ static int __init test_gen_kretprobe_cmd(void) KPROBE_GEN_TEST_FUNC, "$retval"); if (ret) - goto free; + goto out; /* * This actually creates the event. */ ret = kretprobe_event_gen_cmd_end(&cmd); if (ret) - goto free; + goto out; /* * Now get the gen_kretprobe_test event file. We need to @@ -201,13 +199,11 @@ static int __init test_gen_kretprobe_cmd(void) goto delete; } out: + kfree(buf); return ret; delete: /* We got an error after creating the event, delete it */ ret = kprobe_event_delete("gen_kretprobe_test"); - free: - kfree(buf); - goto out; } From f100a02748613c108481b1d3baaa15b60494e4cc Mon Sep 17 00:00:00 2001 From: Li Qiang Date: Fri, 4 Nov 2022 08:49:31 +0900 Subject: [PATCH 0652/1477] kprobe: reverse kp->flags when arm_kprobe failed commit 4a6f316d6855a434f56dbbeba05e14c01acde8f8 upstream. In aggregate kprobe case, when arm_kprobe failed, we need set the kp->flags with KPROBE_FLAG_DISABLED again. If not, the 'kp' kprobe will been considered as enabled but it actually not enabled. Link: https://lore.kernel.org/all/20220902155820.34755-1-liq3ea@163.com/ Fixes: 12310e343755 ("kprobes: Propagate error from arm_kprobe_ftrace()") Cc: stable@vger.kernel.org Signed-off-by: Li Qiang Acked-by: Masami Hiramatsu (Google) Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/kprobes.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index dac82a0e7c0b..b0f444e86487 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -2335,8 +2335,11 @@ int enable_kprobe(struct kprobe *kp) if (!kprobes_all_disarmed && kprobe_disabled(p)) { p->flags &= ~KPROBE_FLAG_DISABLED; ret = arm_kprobe(p); - if (ret) + if (ret) { p->flags |= KPROBE_FLAG_DISABLED; + if (p != kp) + kp->flags |= KPROBE_FLAG_DISABLED; + } } out: mutex_unlock(&kprobe_mutex); From 533bfacbacb86f1e415d09e7a602de871a16a90e Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 21 Oct 2022 08:01:53 +0200 Subject: [PATCH 0653/1477] tools/nolibc/string: Fix memcmp() implementation commit b3f4f51ea68a495f8a5956064c33dce711a2df91 upstream. The C standard says that memcmp() must treat the buffers as consisting of "unsigned chars". If char happens to be unsigned, the casts are ok, but then obviously the c1 variable can never contain a negative value. And when char is signed, the casts are wrong, and there's still a problem with using an 8-bit quantity to hold the difference, because that can range from -255 to +255. For example, assuming char is signed, comparing two 1-byte buffers, one containing 0x00 and another 0x80, the current implementation would return -128 for both memcmp(a, b, 1) and memcmp(b, a, 1), whereas one of those should of course return something positive. Signed-off-by: Rasmus Villemoes Fixes: 66b6f755ad45 ("rcutorture: Import a copy of nolibc") Cc: stable@vger.kernel.org # v5.0+ Signed-off-by: Willy Tarreau Signed-off-by: Paul E. McKenney Signed-off-by: Greg Kroah-Hartman --- tools/include/nolibc/nolibc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h index b8cecb66d28b..c20d2fe7ceba 100644 --- a/tools/include/nolibc/nolibc.h +++ b/tools/include/nolibc/nolibc.h @@ -2318,9 +2318,9 @@ static __attribute__((unused)) int memcmp(const void *s1, const void *s2, size_t n) { size_t ofs = 0; - char c1 = 0; + int c1 = 0; - while (ofs < n && !(c1 = ((char *)s1)[ofs] - ((char *)s2)[ofs])) { + while (ofs < n && !(c1 = ((unsigned char *)s1)[ofs] - ((unsigned char *)s2)[ofs])) { ofs++; } return c1; From ff32d8a099dcac080ede4f0edfece75254d26a19 Mon Sep 17 00:00:00 2001 From: Zheng Yejian Date: Mon, 17 Oct 2022 10:38:06 +0000 Subject: [PATCH 0654/1477] tracing/histogram: Update document for KEYS_MAX size commit a635beeacc6d56d2b71c39e6c0103f85b53d108e upstream. After commit 4f36c2d85ced ("tracing: Increase tracing map KEYS_MAX size"), 'keys' supports up to three fields. Signed-off-by: Zheng Yejian Cc: stable@vger.kernel.org Acked-by: Masami Hiramatsu (Google) Link: https://lore.kernel.org/r/20221017103806.2479139-1-zhengyejian1@huawei.com Signed-off-by: Jonathan Corbet Signed-off-by: Greg Kroah-Hartman --- Documentation/trace/histogram.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/trace/histogram.rst b/Documentation/trace/histogram.rst index f99be8062bc8..a9ffc4f3ee69 100644 --- a/Documentation/trace/histogram.rst +++ b/Documentation/trace/histogram.rst @@ -39,7 +39,7 @@ Documentation written by Tom Zanussi will use the event's kernel stacktrace as the key. The keywords 'keys' or 'key' can be used to specify keys, and the keywords 'values', 'vals', or 'val' can be used to specify values. Compound - keys consisting of up to two fields can be specified by the 'keys' + keys consisting of up to three fields can be specified by the 'keys' keyword. Hashing a compound key produces a unique entry in the table for each unique combination of component keys, and can be useful for providing more fine-grained summaries of event data. From cdf01c807e974048c43c7fd3ca574f6086a57906 Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Tue, 25 Oct 2022 21:33:57 +0800 Subject: [PATCH 0655/1477] capabilities: fix potential memleak on error path from vfs_getxattr_alloc() commit 8cf0a1bc12870d148ae830a4ba88cfdf0e879cee upstream. In cap_inode_getsecurity(), we will use vfs_getxattr_alloc() to complete the memory allocation of tmpbuf, if we have completed the memory allocation of tmpbuf, but failed to call handler->get(...), there will be a memleak in below logic: |-- ret = (int)vfs_getxattr_alloc(mnt_userns, ...) | /* ^^^ alloc for tmpbuf */ |-- value = krealloc(*xattr_value, error + 1, flags) | /* ^^^ alloc memory */ |-- error = handler->get(handler, ...) | /* error! */ |-- *xattr_value = value | /* xattr_value is &tmpbuf (memory leak!) */ So we will try to free(tmpbuf) after vfs_getxattr_alloc() fails to fix it. Cc: stable@vger.kernel.org Fixes: 8db6c34f1dbc ("Introduce v3 namespaced file capabilities") Signed-off-by: Gaosheng Cui Acked-by: Serge Hallyn [PM: subject line and backtrace tweaks] Signed-off-by: Paul Moore Signed-off-by: Greg Kroah-Hartman --- security/commoncap.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/security/commoncap.c b/security/commoncap.c index 28d582ed80c9..b44b69796c0b 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -391,8 +391,10 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer, &tmpbuf, size, GFP_NOFS); dput(dentry); - if (ret < 0 || !tmpbuf) - return ret; + if (ret < 0 || !tmpbuf) { + size = ret; + goto out_free; + } fs_ns = inode->i_sb->s_user_ns; cap = (struct vfs_cap_data *) tmpbuf; From f8e8cda869fd0a023e22c1fa98906d1a61bd152a Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 28 Oct 2022 14:25:20 +0200 Subject: [PATCH 0656/1477] fuse: add file_modified() to fallocate commit 4a6f278d4827b59ba26ceae0ff4529ee826aa258 upstream. Add missing file_modified() call to fuse_file_fallocate(). Without this fallocate on fuse failed to clear privileges. Fixes: 05ba1f082300 ("fuse: add FALLOCATE operation") Cc: Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/fuse/file.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index d1bc96ee6eb3..253308fcb047 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -3311,6 +3311,10 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, goto out; } + err = file_modified(file); + if (err) + goto out; + if (!(mode & FALLOC_FL_KEEP_SIZE)) set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); From 83294f7c7759e1e4c3909edc2e2c1dfbb159e153 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 20 Oct 2022 10:39:08 +0200 Subject: [PATCH 0657/1477] efi: random: reduce seed size to 32 bytes commit 161a438d730dade2ba2b1bf8785f0759aba4ca5f upstream. We no longer need at least 64 bytes of random seed to permit the early crng init to complete. The RNG is now based on Blake2s, so reduce the EFI seed size to the Blake2s hash size, which is sufficient for our purposes. While at it, drop the READ_ONCE(), which was supposed to prevent size from being evaluated after seed was unmapped. However, this cannot actually happen, so READ_ONCE() is unnecessary here. Cc: # v4.14+ Signed-off-by: Ard Biesheuvel Reviewed-by: Jason A. Donenfeld Acked-by: Ilias Apalodimas Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/efi.c | 2 +- include/linux/efi.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index e3df82d5d37a..70be9c87fb67 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -590,7 +590,7 @@ int __init efi_config_parse_tables(const efi_config_table_t *config_tables, seed = early_memremap(efi_rng_seed, sizeof(*seed)); if (seed != NULL) { - size = READ_ONCE(seed->size); + size = min(seed->size, EFI_RANDOM_SEED_SIZE); early_memunmap(seed, sizeof(*seed)); } else { pr_err("Could not map UEFI random seed!\n"); diff --git a/include/linux/efi.h b/include/linux/efi.h index 3bac68fb7ff1..7feb70d32d95 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1161,7 +1161,7 @@ void efi_retrieve_tpm2_eventlog(void); arch_efi_call_virt_teardown(); \ }) -#define EFI_RANDOM_SEED_SIZE 64U +#define EFI_RANDOM_SEED_SIZE 32U // BLAKE2S_HASH_SIZE struct linux_efi_random_seed { u32 size; From 3be2d66822a07183d28c78cd8261774e2a3cca3a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 20 Oct 2022 10:39:09 +0200 Subject: [PATCH 0658/1477] efi: random: Use 'ACPI reclaim' memory for random seed commit 7d866e38c7e9ece8a096d0d098fa9d92b9d4f97e upstream. EFI runtime services data is guaranteed to be preserved by the OS, making it a suitable candidate for the EFI random seed table, which may be passed to kexec kernels as well (after refreshing the seed), and so we need to ensure that the memory is preserved without support from the OS itself. However, runtime services data is intended for allocations that are relevant to the implementations of the runtime services themselves, and so they are unmapped from the kernel linear map, and mapped into the EFI page tables that are active while runtime service invocations are in progress. None of this is needed for the RNG seed. So let's switch to EFI 'ACPI reclaim' memory: in spite of the name, there is nothing exclusively ACPI about it, it is simply a type of allocation that carries firmware provided data which may or may not be relevant to the OS, and it is left up to the OS to decide whether to reclaim it after having consumed its contents. Given that in Linux, we never reclaim these allocations, it is a good choice for the EFI RNG seed, as the allocation is guaranteed to survive kexec reboots. One additional reason for changing this now is to align it with the upcoming recommendation for EFI bootloader provided RNG seeds, which must not use EFI runtime services code/data allocations. Cc: # v4.14+ Signed-off-by: Ard Biesheuvel Reviewed-by: Ilias Apalodimas Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/libstub/random.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/efi/libstub/random.c b/drivers/firmware/efi/libstub/random.c index 24aa37535372..33ab56769595 100644 --- a/drivers/firmware/efi/libstub/random.c +++ b/drivers/firmware/efi/libstub/random.c @@ -75,7 +75,12 @@ efi_status_t efi_random_get_seed(void) if (status != EFI_SUCCESS) return status; - status = efi_bs_call(allocate_pool, EFI_RUNTIME_SERVICES_DATA, + /* + * Use EFI_ACPI_RECLAIM_MEMORY here so that it is guaranteed that the + * allocation will survive a kexec reboot (although we refresh the seed + * beforehand) + */ + status = efi_bs_call(allocate_pool, EFI_ACPI_RECLAIM_MEMORY, sizeof(*seed) + EFI_RANDOM_SEED_SIZE, (void **)&seed); if (status != EFI_SUCCESS) From 98f6e7c337036dff5a851f557950f5846d3f7d73 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 31 Oct 2022 08:41:18 -0700 Subject: [PATCH 0659/1477] perf/x86/intel: Fix pebs event constraints for ICL commit acc5568b90c19ac6375508a93b9676cd18a92a35 upstream. According to the latest event list, update the MEM_INST_RETIRED events which support the DataLA facility. Fixes: 6017608936c1 ("perf/x86/intel: Add Icelake support") Reported-by: Jannis Klinkenberg Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20221031154119.571386-1-kan.liang@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/ds.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 945d470f62d0..48f30ffef1f4 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -855,8 +855,13 @@ struct event_constraint intel_icl_pebs_event_constraints[] = { INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL), /* SLOTS */ INTEL_PLD_CONSTRAINT(0x1cd, 0xff), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ - INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x1d0, 0xf), /* MEM_INST_RETIRED.LOAD */ - INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x2d0, 0xf), /* MEM_INST_RETIRED.STORE */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */ INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf), /* MEM_LOAD_*_RETIRED.* */ From 29d106d086d2f39f8635f402004d1c17a6205583 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 31 Oct 2022 08:45:50 -0700 Subject: [PATCH 0660/1477] perf/x86/intel: Add Cooper Lake stepping to isolation_ucodes[] commit 6f8faf471446844bb9c318e0340221049d5c19f4 upstream. The intel_pebs_isolation quirk checks both model number and stepping. Cooper Lake has a different stepping (11) than the other Skylake Xeon. It cannot benefit from the optimization in commit 9b545c04abd4f ("perf/x86/kvm: Avoid unnecessary work in guest filtering"). Add the stepping of Cooper Lake into the isolation_ucodes[] table. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20221031154550.571663-1-kan.liang@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index f6eadf9320a1..990d5543e3bf 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4412,6 +4412,7 @@ static const struct x86_cpu_desc isolation_ucodes[] = { INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 5, 0x00000000), INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 6, 0x00000000), INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 7, 0x00000000), + INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 11, 0x00000000), INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_L, 3, 0x0000007c), INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE, 3, 0x0000007c), INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE, 9, 0x0000004e), From 814af9a32b031a09131e9a57bfc9710a5d742ed9 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 21 Oct 2022 07:44:49 +0200 Subject: [PATCH 0661/1477] parisc: Make 8250_gsc driver dependend on CONFIG_PARISC commit e8a18e3f00f3ee8d07c17ab1ea3ad4df4a3b6fe0 upstream. Although the name of the driver 8250_gsc.c suggests that it handles only serial ports on the GSC bus, it does handle serial ports listed in the parisc machine inventory as well, e.g. the serial ports in a C8000 PCI-only workstation. Change the dependency to CONFIG_PARISC, so that the driver gets included in the kernel even if CONFIG_GSC isn't set. Reported-by: Mikulas Patocka Cc: Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/8250/Kconfig b/drivers/tty/serial/8250/Kconfig index 603137da4736..136f2b1460f9 100644 --- a/drivers/tty/serial/8250/Kconfig +++ b/drivers/tty/serial/8250/Kconfig @@ -119,7 +119,7 @@ config SERIAL_8250_CONSOLE config SERIAL_8250_GSC tristate - depends on SERIAL_8250 && GSC + depends on SERIAL_8250 && PARISC default SERIAL_8250 config SERIAL_8250_DMA From 98f836e80d210312ec98ffbbce4a0b6c4b6dc34e Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 27 Oct 2022 09:12:05 +0200 Subject: [PATCH 0662/1477] parisc: Export iosapic_serial_irq() symbol for serial port driver commit a0c9f1f2e53b8eb2ae43987a30e547ba56b4fa18 upstream. The parisc serial port driver needs this symbol when it's compiled as module. Signed-off-by: Helge Deller Reported-by: kernel test robot Cc: Signed-off-by: Greg Kroah-Hartman --- drivers/parisc/iosapic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/parisc/iosapic.c b/drivers/parisc/iosapic.c index 8a3b0c3a1e92..fd99735dca3e 100644 --- a/drivers/parisc/iosapic.c +++ b/drivers/parisc/iosapic.c @@ -875,6 +875,7 @@ int iosapic_serial_irq(struct parisc_device *dev) return vi->txn_irq; } +EXPORT_SYMBOL(iosapic_serial_irq); #endif From c9598cf629530823fd5d6c3342311895de504707 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 28 Oct 2022 18:12:49 +0200 Subject: [PATCH 0663/1477] parisc: Avoid printing the hardware path twice commit 2b6ae0962b421103feb41a80406732944b0665b3 upstream. Avoid that the hardware path is shown twice in the kernel log, and clean up the output of the version numbers to show up in the same order as they are listed in the hardware database in the hardware.c file. Additionally, optimize the memory footprint of the hardware database and mark some code as init code. Fixes: cab56b51ec0e ("parisc: Fix device names in /proc/iomem") Signed-off-by: Helge Deller Cc: # v4.9+ Signed-off-by: Greg Kroah-Hartman --- arch/parisc/include/asm/hardware.h | 12 ++++++------ arch/parisc/kernel/drivers.c | 14 ++++++-------- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/arch/parisc/include/asm/hardware.h b/arch/parisc/include/asm/hardware.h index 9d3d7737c58b..a005ebc54779 100644 --- a/arch/parisc/include/asm/hardware.h +++ b/arch/parisc/include/asm/hardware.h @@ -10,12 +10,12 @@ #define SVERSION_ANY_ID PA_SVERSION_ANY_ID struct hp_hardware { - unsigned short hw_type:5; /* HPHW_xxx */ - unsigned short hversion; - unsigned long sversion:28; - unsigned short opt; - const char name[80]; /* The hardware description */ -}; + unsigned int hw_type:8; /* HPHW_xxx */ + unsigned int hversion:12; + unsigned int sversion:12; + unsigned char opt; + unsigned char name[59]; /* The hardware description */ +} __packed; struct parisc_device; diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c index f5a25ed0930d..d95157488832 100644 --- a/arch/parisc/kernel/drivers.c +++ b/arch/parisc/kernel/drivers.c @@ -883,15 +883,13 @@ void __init walk_central_bus(void) &root); } -static void print_parisc_device(struct parisc_device *dev) +static __init void print_parisc_device(struct parisc_device *dev) { - char hw_path[64]; - static int count; + static int count __initdata; - print_pa_hwpath(dev, hw_path); - pr_info("%d. %s at %pap [%s] { %d, 0x%x, 0x%.3x, 0x%.5x }", - ++count, dev->name, &(dev->hpa.start), hw_path, dev->id.hw_type, - dev->id.hversion_rev, dev->id.hversion, dev->id.sversion); + pr_info("%d. %s at %pap { type:%d, hv:%#x, sv:%#x, rev:%#x }", + ++count, dev->name, &(dev->hpa.start), dev->id.hw_type, + dev->id.hversion, dev->id.sversion, dev->id.hversion_rev); if (dev->num_addrs) { int k; @@ -1080,7 +1078,7 @@ static __init int qemu_print_iodc_data(struct device *lin_dev, void *data) -static int print_one_device(struct device * dev, void * data) +static __init int print_one_device(struct device * dev, void * data) { struct parisc_device * pdev = to_parisc_device(dev); From 5370b965b7a945bb8f48b9ee23d83a76a947902e Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Tue, 18 Oct 2022 10:27:01 +0800 Subject: [PATCH 0664/1477] ext4: fix warning in 'ext4_da_release_space' commit 1b8f787ef547230a3249bcf897221ef0cc78481b upstream. Syzkaller report issue as follows: EXT4-fs (loop0): Free/Dirty block details EXT4-fs (loop0): free_blocks=0 EXT4-fs (loop0): dirty_blocks=0 EXT4-fs (loop0): Block reservation details EXT4-fs (loop0): i_reserved_data_blocks=0 EXT4-fs warning (device loop0): ext4_da_release_space:1527: ext4_da_release_space: ino 18, to_free 1 with only 0 reserved data blocks ------------[ cut here ]------------ WARNING: CPU: 0 PID: 92 at fs/ext4/inode.c:1528 ext4_da_release_space+0x25e/0x370 fs/ext4/inode.c:1524 Modules linked in: CPU: 0 PID: 92 Comm: kworker/u4:4 Not tainted 6.0.0-syzkaller-09423-g493ffd6605b2 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/22/2022 Workqueue: writeback wb_workfn (flush-7:0) RIP: 0010:ext4_da_release_space+0x25e/0x370 fs/ext4/inode.c:1528 RSP: 0018:ffffc900015f6c90 EFLAGS: 00010296 RAX: 42215896cd52ea00 RBX: 0000000000000000 RCX: 42215896cd52ea00 RDX: 0000000000000000 RSI: 0000000080000001 RDI: 0000000000000000 RBP: 1ffff1100e907d96 R08: ffffffff816aa79d R09: fffff520002bece5 R10: fffff520002bece5 R11: 1ffff920002bece4 R12: ffff888021fd2000 R13: ffff88807483ecb0 R14: 0000000000000001 R15: ffff88807483e740 FS: 0000000000000000(0000) GS:ffff8880b9a00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00005555569ba628 CR3: 000000000c88e000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ext4_es_remove_extent+0x1ab/0x260 fs/ext4/extents_status.c:1461 mpage_release_unused_pages+0x24d/0xef0 fs/ext4/inode.c:1589 ext4_writepages+0x12eb/0x3be0 fs/ext4/inode.c:2852 do_writepages+0x3c3/0x680 mm/page-writeback.c:2469 __writeback_single_inode+0xd1/0x670 fs/fs-writeback.c:1587 writeback_sb_inodes+0xb3b/0x18f0 fs/fs-writeback.c:1870 wb_writeback+0x41f/0x7b0 fs/fs-writeback.c:2044 wb_do_writeback fs/fs-writeback.c:2187 [inline] wb_workfn+0x3cb/0xef0 fs/fs-writeback.c:2227 process_one_work+0x877/0xdb0 kernel/workqueue.c:2289 worker_thread+0xb14/0x1330 kernel/workqueue.c:2436 kthread+0x266/0x300 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:306 Above issue may happens as follows: ext4_da_write_begin ext4_create_inline_data ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS); ext4_set_inode_flag(inode, EXT4_INODE_INLINE_DATA); __ext4_ioctl ext4_ext_migrate -> will lead to eh->eh_entries not zero, and set extent flag ext4_da_write_begin ext4_da_convert_inline_data_to_extent ext4_da_write_inline_data_begin ext4_da_map_blocks ext4_insert_delayed_block if (!ext4_es_scan_clu(inode, &ext4_es_is_delonly, lblk)) if (!ext4_es_scan_clu(inode, &ext4_es_is_mapped, lblk)) ext4_clu_mapped(inode, EXT4_B2C(sbi, lblk)); -> will return 1 allocated = true; ext4_es_insert_delayed_block(inode, lblk, allocated); ext4_writepages mpage_map_and_submit_extent(handle, &mpd, &give_up_on_write); -> return -ENOSPC mpage_release_unused_pages(&mpd, give_up_on_write); -> give_up_on_write == 1 ext4_es_remove_extent ext4_da_release_space(inode, reserved); if (unlikely(to_free > ei->i_reserved_data_blocks)) -> to_free == 1 but ei->i_reserved_data_blocks == 0 -> then trigger warning as above To solve above issue, forbid inode do migrate which has inline data. Cc: stable@kernel.org Reported-by: syzbot+c740bb18df70ad00952e@syzkaller.appspotmail.com Signed-off-by: Ye Bin Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20221018022701.683489-1-yebin10@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/migrate.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 04320715d61f..4bfe2252d9a4 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -425,7 +425,8 @@ int ext4_ext_migrate(struct inode *inode) * already is extent-based, error out. */ if (!ext4_has_feature_extents(inode->i_sb) || - (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) + ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) || + ext4_has_inline_data(inode)) return -EINVAL; if (S_ISLNK(inode->i_mode) && inode->i_blocks == 0) From 156451a67b93986fb07c274ef6995ff40766c5ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Henriques?= Date: Wed, 12 Oct 2022 14:13:30 +0100 Subject: [PATCH 0665/1477] ext4: fix BUG_ON() when directory entry has invalid rec_len MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 17a0bc9bd697f75cfdf9b378d5eb2d7409c91340 upstream. The rec_len field in the directory entry has to be a multiple of 4. A corrupted filesystem image can be used to hit a BUG() in ext4_rec_len_to_disk(), called from make_indexed_dir(). ------------[ cut here ]------------ kernel BUG at fs/ext4/ext4.h:2413! ... RIP: 0010:make_indexed_dir+0x53f/0x5f0 ... Call Trace: ? add_dirent_to_buf+0x1b2/0x200 ext4_add_entry+0x36e/0x480 ext4_add_nondir+0x2b/0xc0 ext4_create+0x163/0x200 path_openat+0x635/0xe90 do_filp_open+0xb4/0x160 ? __create_object.isra.0+0x1de/0x3b0 ? _raw_spin_unlock+0x12/0x30 do_sys_openat2+0x91/0x150 __x64_sys_open+0x6c/0xa0 do_syscall_64+0x3c/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 The fix simply adds a call to ext4_check_dir_entry() to validate the directory entry, returning -EFSCORRUPTED if the entry is invalid. CC: stable@kernel.org Link: https://bugzilla.kernel.org/show_bug.cgi?id=216540 Signed-off-by: Luís Henriques Link: https://lore.kernel.org/r/20221012131330.32456-1-lhenriques@suse.de Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/namei.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 646cc1935dff..b2e131d11cf8 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2153,8 +2153,16 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname, memcpy(data2, de, len); de = (struct ext4_dir_entry_2 *) data2; top = data2 + len; - while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top) + while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top) { + if (ext4_check_dir_entry(dir, NULL, de, bh2, data2, len, + (data2 + (blocksize - csum_size) - + (char *) de))) { + brelse(bh2); + brelse(bh); + return -EFSCORRUPTED; + } de = de2; + } de->rec_len = ext4_rec_len_to_disk(data2 + (blocksize - csum_size) - (char *) de, blocksize); From bd64a88f364cbe2e19b5f55be1cffef2b47bd0a5 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Thu, 29 Sep 2022 15:51:59 -0700 Subject: [PATCH 0666/1477] KVM: x86: Mask off reserved bits in CPUID.80000006H commit eeb69eab57c6604ac90b3fd8e5ac43f24a5535b1 upstream. KVM_GET_SUPPORTED_CPUID should only enumerate features that KVM actually supports. CPUID.80000006H:EDX[17:16] are reserved bits and should be masked off. Fixes: 43d05de2bee7 ("KVM: pass through CPUID(0x80000006)") Signed-off-by: Jim Mattson Message-Id: <20220929225203.2234702-2-jmattson@google.com> Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/cpuid.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 6f44274aa949..f5870217a472 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -817,7 +817,8 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) cpuid_entry_override(entry, CPUID_8000_0001_ECX); break; case 0x80000006: - /* L2 cache and TLB: pass through host info. */ + /* Drop reserved bits, pass host L2 cache and TLB info. */ + entry->edx &= ~GENMASK(17, 16); break; case 0x80000007: /* Advanced power management */ /* invariant TSC is CPUID.80000007H:EDX[8] */ From cc40c5f3e9211523873c001d34e2f6695d57249b Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Thu, 29 Sep 2022 15:52:01 -0700 Subject: [PATCH 0667/1477] KVM: x86: Mask off reserved bits in CPUID.8000001AH commit 079f6889818dd07903fb36c252532ab47ebb6d48 upstream. KVM_GET_SUPPORTED_CPUID should only enumerate features that KVM actually supports. In the case of CPUID.8000001AH, only three bits are currently defined. The 125 reserved bits should be masked off. Fixes: 24c82e576b78 ("KVM: Sanitize cpuid") Signed-off-by: Jim Mattson Message-Id: <20220929225203.2234702-4-jmattson@google.com> Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/cpuid.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index f5870217a472..a4c6973c8b7b 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -860,6 +860,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) entry->ecx = entry->edx = 0; break; case 0x8000001a: + entry->eax &= GENMASK(2, 0); + entry->ebx = entry->ecx = entry->edx = 0; + break; case 0x8000001e: break; /* Support memory encryption cpuid if host supports it */ From 9302ebc1c21d93ef7004cd228c8b493dc64f7194 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Thu, 29 Sep 2022 15:52:00 -0700 Subject: [PATCH 0668/1477] KVM: x86: Mask off reserved bits in CPUID.80000008H commit 7030d8530e533844e2f4b0e7476498afcd324634 upstream. KVM_GET_SUPPORTED_CPUID should only enumerate features that KVM actually supports. The following ranges of CPUID.80000008H are reserved and should be masked off: ECX[31:18] ECX[11:8] In addition, the PerfTscSize field at ECX[17:16] should also be zero because KVM does not set the PERFTSC bit at CPUID.80000001H.ECX[27]. Fixes: 24c82e576b78 ("KVM: Sanitize cpuid") Signed-off-by: Jim Mattson Message-Id: <20220929225203.2234702-3-jmattson@google.com> Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/cpuid.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index a4c6973c8b7b..b4f6d81c873f 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -841,6 +841,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) g_phys_as = phys_as; entry->eax = g_phys_as | (virt_as << 8); + entry->ecx &= ~(GENMASK(31, 16) | GENMASK(11, 8)); entry->edx = 0; cpuid_entry_override(entry, CPUID_8000_0008_EBX); break; From e0c7410378cd1adbfd35a65faca5df8936631b1a Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Fri, 30 Sep 2022 00:51:58 +0200 Subject: [PATCH 0669/1477] KVM: x86: Mask off reserved bits in CPUID.80000001H commit 0469e56a14bf8cfb80507e51b7aeec0332cdbc13 upstream. KVM_GET_SUPPORTED_CPUID should only enumerate features that KVM actually supports. CPUID.80000001:EBX[27:16] are reserved bits and should be masked off. Fixes: 0771671749b5 ("KVM: Enhance guest cpuid management") Signed-off-by: Jim Mattson Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/cpuid.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index b4f6d81c873f..06a776fdb90c 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -813,6 +813,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) entry->eax = min(entry->eax, 0x8000001f); break; case 0x80000001: + entry->ebx &= ~GENMASK(27, 16); cpuid_entry_override(entry, CPUID_8000_0001_EDX); cpuid_entry_override(entry, CPUID_8000_0001_ECX); break; From c8a2fd7a715d9882eed5ddb4105bce2f6a8163c2 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Tue, 25 Oct 2022 15:47:28 +0300 Subject: [PATCH 0670/1477] KVM: x86: emulator: em_sysexit should update ctxt->mode commit 5015bb89b58225f97df6ac44383e7e8c8662c8c9 upstream. SYSEXIT is one of the instructions that can change the processor mode, thus ctxt->mode should be updated after it. Note that this is likely a benign bug, because the only problematic mode change is from 32 bit to 64 bit which can lead to truncation of RIP, and it is not possible to do with sysexit, since sysexit running in 32 bit mode will be limited to 32 bit version. Signed-off-by: Maxim Levitsky Message-Id: <20221025124741.228045-11-mlevitsk@redhat.com> Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/emulate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 52a881d24070..8404d5d00bf3 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2957,6 +2957,7 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); ctxt->_eip = rdx; + ctxt->mode = usermode; *reg_write(ctxt, VCPU_REGS_RSP) = rcx; return X86EMUL_CONTINUE; From ce9261accccd2a0edffa9b65cd9d23400f71bdfa Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Tue, 25 Oct 2022 15:47:29 +0300 Subject: [PATCH 0671/1477] KVM: x86: emulator: introduce emulator_recalc_and_set_mode commit d087e0f79fa0dd336a9a6b2f79ec23120f5eff73 upstream. Some instructions update the cpu execution mode, which needs to update the emulation mode. Extract this code, and make assign_eip_far use it. assign_eip_far now reads CS, instead of getting it via a parameter, which is ok, because callers always assign CS to the same value before calling this function. No functional change is intended. Signed-off-by: Maxim Levitsky Message-Id: <20221025124741.228045-12-mlevitsk@redhat.com> Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/emulate.c | 87 ++++++++++++++++++++++++++++-------------- 1 file changed, 58 insertions(+), 29 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 8404d5d00bf3..76230e36c1b7 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -796,8 +796,7 @@ static int linearize(struct x86_emulate_ctxt *ctxt, ctxt->mode, linear); } -static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst, - enum x86emul_mode mode) +static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst) { ulong linear; int rc; @@ -807,41 +806,71 @@ static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst, if (ctxt->op_bytes != sizeof(unsigned long)) addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1); - rc = __linearize(ctxt, addr, &max_size, 1, false, true, mode, &linear); + rc = __linearize(ctxt, addr, &max_size, 1, false, true, ctxt->mode, &linear); if (rc == X86EMUL_CONTINUE) ctxt->_eip = addr.ea; return rc; } -static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst) +static inline int emulator_recalc_and_set_mode(struct x86_emulate_ctxt *ctxt) { - return assign_eip(ctxt, dst, ctxt->mode); + u64 efer; + struct desc_struct cs; + u16 selector; + u32 base3; + + ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); + + if (!(ctxt->ops->get_cr(ctxt, 0) & X86_CR0_PE)) { + /* Real mode. cpu must not have long mode active */ + if (efer & EFER_LMA) + return X86EMUL_UNHANDLEABLE; + ctxt->mode = X86EMUL_MODE_REAL; + return X86EMUL_CONTINUE; + } + + if (ctxt->eflags & X86_EFLAGS_VM) { + /* Protected/VM86 mode. cpu must not have long mode active */ + if (efer & EFER_LMA) + return X86EMUL_UNHANDLEABLE; + ctxt->mode = X86EMUL_MODE_VM86; + return X86EMUL_CONTINUE; + } + + if (!ctxt->ops->get_segment(ctxt, &selector, &cs, &base3, VCPU_SREG_CS)) + return X86EMUL_UNHANDLEABLE; + + if (efer & EFER_LMA) { + if (cs.l) { + /* Proper long mode */ + ctxt->mode = X86EMUL_MODE_PROT64; + } else if (cs.d) { + /* 32 bit compatibility mode*/ + ctxt->mode = X86EMUL_MODE_PROT32; + } else { + ctxt->mode = X86EMUL_MODE_PROT16; + } + } else { + /* Legacy 32 bit / 16 bit mode */ + ctxt->mode = cs.d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; + } + + return X86EMUL_CONTINUE; } -static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst, - const struct desc_struct *cs_desc) +static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst) { - enum x86emul_mode mode = ctxt->mode; - int rc; + return assign_eip(ctxt, dst); +} -#ifdef CONFIG_X86_64 - if (ctxt->mode >= X86EMUL_MODE_PROT16) { - if (cs_desc->l) { - u64 efer = 0; +static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst) +{ + int rc = emulator_recalc_and_set_mode(ctxt); - ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); - if (efer & EFER_LMA) - mode = X86EMUL_MODE_PROT64; - } else - mode = X86EMUL_MODE_PROT32; /* temporary value */ - } -#endif - if (mode == X86EMUL_MODE_PROT16 || mode == X86EMUL_MODE_PROT32) - mode = cs_desc->d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; - rc = assign_eip(ctxt, dst, mode); - if (rc == X86EMUL_CONTINUE) - ctxt->mode = mode; - return rc; + if (rc != X86EMUL_CONTINUE) + return rc; + + return assign_eip(ctxt, dst); } static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel) @@ -2256,7 +2285,7 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt) if (rc != X86EMUL_CONTINUE) return rc; - rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc); + rc = assign_eip_far(ctxt, ctxt->src.val); /* Error handling is not implemented. */ if (rc != X86EMUL_CONTINUE) return X86EMUL_UNHANDLEABLE; @@ -2337,7 +2366,7 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt) &new_desc); if (rc != X86EMUL_CONTINUE) return rc; - rc = assign_eip_far(ctxt, eip, &new_desc); + rc = assign_eip_far(ctxt, eip); /* Error handling is not implemented. */ if (rc != X86EMUL_CONTINUE) return X86EMUL_UNHANDLEABLE; @@ -3554,7 +3583,7 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt) if (rc != X86EMUL_CONTINUE) return rc; - rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc); + rc = assign_eip_far(ctxt, ctxt->src.val); if (rc != X86EMUL_CONTINUE) goto fail; From e5cef906cb409a7b56641561a715d431b034d1c2 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Tue, 25 Oct 2022 15:47:31 +0300 Subject: [PATCH 0672/1477] KVM: x86: emulator: update the emulation mode after CR0 write commit ad8f9e69942c7db90758d9d774157e53bce94840 upstream. Update the emulation mode when handling writes to CR0, because toggling CR0.PE switches between Real and Protected Mode, and toggling CR0.PG when EFER.LME=1 switches between Long and Protected Mode. This is likely a benign bug because there is no writeback of state, other than the RIP increment, and when toggling CR0.PE, the CPU has to execute code from a very low memory address. Signed-off-by: Maxim Levitsky Message-Id: <20221025124741.228045-14-mlevitsk@redhat.com> Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/emulate.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 76230e36c1b7..63efccc8f429 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -3725,11 +3725,25 @@ static int em_movbe(struct x86_emulate_ctxt *ctxt) static int em_cr_write(struct x86_emulate_ctxt *ctxt) { - if (ctxt->ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val)) + int cr_num = ctxt->modrm_reg; + int r; + + if (ctxt->ops->set_cr(ctxt, cr_num, ctxt->src.val)) return emulate_gp(ctxt, 0); /* Disable writeback. */ ctxt->dst.type = OP_NONE; + + if (cr_num == 0) { + /* + * CR0 write might have updated CR0.PE and/or CR0.PG + * which can affect the cpu's execution mode. + */ + r = emulator_recalc_and_set_mode(ctxt); + if (r != X86EMUL_CONTINUE) + return r; + } + return X86EMUL_CONTINUE; } From 23f1fc7ce55fbfb195db8abd5e07eef13700cd0e Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 12 Oct 2022 20:34:19 +0100 Subject: [PATCH 0673/1477] ext4,f2fs: fix readahead of verity data commit 4fa0e3ff217f775cb58d2d6d51820ec519243fb9 upstream. The recent change of page_cache_ra_unbounded() arguments was buggy in the two callers, causing us to readahead the wrong pages. Move the definition of ractl down to after the index is set correctly. This affected performance on configurations that use fs-verity. Link: https://lkml.kernel.org/r/20221012193419.1453558-1-willy@infradead.org Fixes: 73bb49da50cd ("mm/readahead: make page_cache_ra_unbounded take a readahead_control") Signed-off-by: Matthew Wilcox (Oracle) Reported-by: Jintao Yin Signed-off-by: Andrew Morton Signed-off-by: Eric Biggers Signed-off-by: Greg Kroah-Hartman --- fs/ext4/verity.c | 3 ++- fs/f2fs/verity.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c index 00e3cbde472e..35be8e7ec2a0 100644 --- a/fs/ext4/verity.c +++ b/fs/ext4/verity.c @@ -370,13 +370,14 @@ static struct page *ext4_read_merkle_tree_page(struct inode *inode, pgoff_t index, unsigned long num_ra_pages) { - DEFINE_READAHEAD(ractl, NULL, inode->i_mapping, index); struct page *page; index += ext4_verity_metadata_pos(inode) >> PAGE_SHIFT; page = find_get_page_flags(inode->i_mapping, index, FGP_ACCESSED); if (!page || !PageUptodate(page)) { + DEFINE_READAHEAD(ractl, NULL, inode->i_mapping, index); + if (page) put_page(page); else if (num_ra_pages > 1) diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c index 15ba36926fad..cff94d095d0f 100644 --- a/fs/f2fs/verity.c +++ b/fs/f2fs/verity.c @@ -261,13 +261,14 @@ static struct page *f2fs_read_merkle_tree_page(struct inode *inode, pgoff_t index, unsigned long num_ra_pages) { - DEFINE_READAHEAD(ractl, NULL, inode->i_mapping, index); struct page *page; index += f2fs_verity_metadata_pos(inode) >> PAGE_SHIFT; page = find_get_page_flags(inode->i_mapping, index, FGP_ACCESSED); if (!page || !PageUptodate(page)) { + DEFINE_READAHEAD(ractl, NULL, inode->i_mapping, index); + if (page) put_page(page); else if (num_ra_pages > 1) From 9f3b8678080a5bf0135b13f9c489faa509cbb1c7 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 19 Oct 2022 17:03:49 -0700 Subject: [PATCH 0674/1477] drm/rockchip: dsi: Force synchronous probe commit 81e592f86f7afdb76d655e7fbd7803d7b8f985d8 upstream. We can't safely probe a dual-DSI display asynchronously (driver_async_probe='*' or driver_async_probe='dw-mipi-dsi-rockchip' cmdline), because dw_mipi_dsi_rockchip_find_second() pokes one DSI device's drvdata from the other device without any locking. Request synchronous probe, at least until this driver learns some appropriate locking for dual-DSI initialization. Cc: Signed-off-by: Brian Norris Signed-off-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/20221019170255.2.I6b985b0ca372b7e35c6d9ea970b24bcb262d4fc1@changeid Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c index b0fb3c3cba59..c51be1c9c207 100644 --- a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c +++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c @@ -1286,5 +1286,11 @@ struct platform_driver dw_mipi_dsi_rockchip_driver = { .of_match_table = dw_mipi_dsi_rockchip_dt_ids, .pm = &dw_mipi_dsi_rockchip_pm_ops, .name = "dw-mipi-dsi-rockchip", + /* + * For dual-DSI display, one DSI pokes at the other DSI's + * drvdata in dw_mipi_dsi_rockchip_find_second(). This is not + * safe for asynchronous probe. + */ + .probe_type = PROBE_FORCE_SYNCHRONOUS, }, }; From b86830cc95af9bc0ba27041f163ce61a03bcded2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 26 Oct 2022 13:11:27 +0300 Subject: [PATCH 0675/1477] drm/i915/sdvo: Filter out invalid outputs more sensibly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3e206b6aa6df7eed4297577e0cf8403169b800a2 upstream. We try to filter out the corresponding xxx1 output if the xxx0 output is not present. But the way that is being done is pretty awkward. Make it less so. Cc: stable@vger.kernel.org Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20221026101134.20865-2-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula (cherry picked from commit cc1e66394daaa7e9f005e2487a84e34a39f9308b) Signed-off-by: Tvrtko Ursulin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/display/intel_sdvo.c | 27 ++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c index 4eaa4aa86ecd..2d22b7bb11ca 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.c +++ b/drivers/gpu/drm/i915/display/intel_sdvo.c @@ -2937,16 +2937,33 @@ err: return false; } +static u16 intel_sdvo_filter_output_flags(u16 flags) +{ + flags &= SDVO_OUTPUT_MASK; + + /* SDVO requires XXX1 function may not exist unless it has XXX0 function.*/ + if (!(flags & SDVO_OUTPUT_TMDS0)) + flags &= ~SDVO_OUTPUT_TMDS1; + + if (!(flags & SDVO_OUTPUT_RGB0)) + flags &= ~SDVO_OUTPUT_RGB1; + + if (!(flags & SDVO_OUTPUT_LVDS0)) + flags &= ~SDVO_OUTPUT_LVDS1; + + return flags; +} + static bool intel_sdvo_output_setup(struct intel_sdvo *intel_sdvo, u16 flags) { - /* SDVO requires XXX1 function may not exist unless it has XXX0 function.*/ + flags = intel_sdvo_filter_output_flags(flags); if (flags & SDVO_OUTPUT_TMDS0) if (!intel_sdvo_dvi_init(intel_sdvo, 0)) return false; - if ((flags & SDVO_TMDS_MASK) == SDVO_TMDS_MASK) + if (flags & SDVO_OUTPUT_TMDS1) if (!intel_sdvo_dvi_init(intel_sdvo, 1)) return false; @@ -2967,7 +2984,7 @@ intel_sdvo_output_setup(struct intel_sdvo *intel_sdvo, u16 flags) if (!intel_sdvo_analog_init(intel_sdvo, 0)) return false; - if ((flags & SDVO_RGB_MASK) == SDVO_RGB_MASK) + if (flags & SDVO_OUTPUT_RGB1) if (!intel_sdvo_analog_init(intel_sdvo, 1)) return false; @@ -2975,11 +2992,11 @@ intel_sdvo_output_setup(struct intel_sdvo *intel_sdvo, u16 flags) if (!intel_sdvo_lvds_init(intel_sdvo, 0)) return false; - if ((flags & SDVO_LVDS_MASK) == SDVO_LVDS_MASK) + if (flags & SDVO_OUTPUT_LVDS1) if (!intel_sdvo_lvds_init(intel_sdvo, 1)) return false; - if ((flags & SDVO_OUTPUT_MASK) == 0) { + if (flags == 0) { unsigned char bytes[2]; intel_sdvo->controlled_output = 0; From a6c57adec567f3833225cb7b76b84ca73e5932d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 26 Oct 2022 13:11:28 +0300 Subject: [PATCH 0676/1477] drm/i915/sdvo: Setup DDC fully before output init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e79762512120f11c51317570519a1553c70805d8 upstream. Call intel_sdvo_select_ddc_bus() before initializing any of the outputs. And before that is functional (assuming no VBT) we have to set up the controlled_outputs thing. Otherwise DDC won't be functional during the output init but LVDS really needs it for the fixed mode setup. Note that the whole multi output support still looks very bogus, and more work will be needed to make it correct. But for now this should at least fix the LVDS EDID fixed mode setup. Cc: stable@vger.kernel.org Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/7301 Fixes: aa2b88074a56 ("drm/i915/sdvo: Fix multi function encoder stuff") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20221026101134.20865-3-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula (cherry picked from commit 64b7b557dc8a96d9cfed6aedbf81de2df80c025d) Signed-off-by: Tvrtko Ursulin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/display/intel_sdvo.c | 31 +++++++++-------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c index 2d22b7bb11ca..58f8fb7c8799 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.c +++ b/drivers/gpu/drm/i915/display/intel_sdvo.c @@ -2760,13 +2760,10 @@ intel_sdvo_dvi_init(struct intel_sdvo *intel_sdvo, int device) if (!intel_sdvo_connector) return false; - if (device == 0) { - intel_sdvo->controlled_output |= SDVO_OUTPUT_TMDS0; + if (device == 0) intel_sdvo_connector->output_flag = SDVO_OUTPUT_TMDS0; - } else if (device == 1) { - intel_sdvo->controlled_output |= SDVO_OUTPUT_TMDS1; + else if (device == 1) intel_sdvo_connector->output_flag = SDVO_OUTPUT_TMDS1; - } intel_connector = &intel_sdvo_connector->base; connector = &intel_connector->base; @@ -2821,7 +2818,6 @@ intel_sdvo_tv_init(struct intel_sdvo *intel_sdvo, int type) encoder->encoder_type = DRM_MODE_ENCODER_TVDAC; connector->connector_type = DRM_MODE_CONNECTOR_SVIDEO; - intel_sdvo->controlled_output |= type; intel_sdvo_connector->output_flag = type; if (intel_sdvo_connector_init(intel_sdvo_connector, intel_sdvo) < 0) { @@ -2862,13 +2858,10 @@ intel_sdvo_analog_init(struct intel_sdvo *intel_sdvo, int device) encoder->encoder_type = DRM_MODE_ENCODER_DAC; connector->connector_type = DRM_MODE_CONNECTOR_VGA; - if (device == 0) { - intel_sdvo->controlled_output |= SDVO_OUTPUT_RGB0; + if (device == 0) intel_sdvo_connector->output_flag = SDVO_OUTPUT_RGB0; - } else if (device == 1) { - intel_sdvo->controlled_output |= SDVO_OUTPUT_RGB1; + else if (device == 1) intel_sdvo_connector->output_flag = SDVO_OUTPUT_RGB1; - } if (intel_sdvo_connector_init(intel_sdvo_connector, intel_sdvo) < 0) { kfree(intel_sdvo_connector); @@ -2898,13 +2891,10 @@ intel_sdvo_lvds_init(struct intel_sdvo *intel_sdvo, int device) encoder->encoder_type = DRM_MODE_ENCODER_LVDS; connector->connector_type = DRM_MODE_CONNECTOR_LVDS; - if (device == 0) { - intel_sdvo->controlled_output |= SDVO_OUTPUT_LVDS0; + if (device == 0) intel_sdvo_connector->output_flag = SDVO_OUTPUT_LVDS0; - } else if (device == 1) { - intel_sdvo->controlled_output |= SDVO_OUTPUT_LVDS1; + else if (device == 1) intel_sdvo_connector->output_flag = SDVO_OUTPUT_LVDS1; - } if (intel_sdvo_connector_init(intel_sdvo_connector, intel_sdvo) < 0) { kfree(intel_sdvo_connector); @@ -2957,8 +2947,14 @@ static u16 intel_sdvo_filter_output_flags(u16 flags) static bool intel_sdvo_output_setup(struct intel_sdvo *intel_sdvo, u16 flags) { + struct drm_i915_private *i915 = to_i915(intel_sdvo->base.base.dev); + flags = intel_sdvo_filter_output_flags(flags); + intel_sdvo->controlled_output = flags; + + intel_sdvo_select_ddc_bus(i915, intel_sdvo); + if (flags & SDVO_OUTPUT_TMDS0) if (!intel_sdvo_dvi_init(intel_sdvo, 0)) return false; @@ -2999,7 +2995,6 @@ intel_sdvo_output_setup(struct intel_sdvo *intel_sdvo, u16 flags) if (flags == 0) { unsigned char bytes[2]; - intel_sdvo->controlled_output = 0; memcpy(bytes, &intel_sdvo->caps.output_flags, 2); DRM_DEBUG_KMS("%s: Unknown SDVO output type (0x%02x%02x)\n", SDVO_NAME(intel_sdvo), @@ -3411,8 +3406,6 @@ bool intel_sdvo_init(struct drm_i915_private *dev_priv, */ intel_sdvo->base.cloneable = 0; - intel_sdvo_select_ddc_bus(dev_priv, intel_sdvo); - /* Set the input timing to the screen. Assume always input 0. */ if (!intel_sdvo_set_target_input(intel_sdvo)) goto err_output; From c6678c8f4f3f8383fe2dff3455de3d504382638f Mon Sep 17 00:00:00 2001 From: Dokyung Song Date: Fri, 21 Oct 2022 15:13:59 +0900 Subject: [PATCH 0677/1477] wifi: brcmfmac: Fix potential buffer overflow in brcmf_fweh_event_worker() commit 6788ba8aed4e28e90f72d68a9d794e34eac17295 upstream. This patch fixes an intra-object buffer overflow in brcmfmac that occurs when the device provides a 'bsscfgidx' equal to or greater than the buffer size. The patch adds a check that leads to a safe failure if that is the case. This fixes CVE-2022-3628. UBSAN: array-index-out-of-bounds in drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c index 52 is out of range for type 'brcmf_if *[16]' CPU: 0 PID: 1898 Comm: kworker/0:2 Tainted: G O 5.14.0+ #132 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014 Workqueue: events brcmf_fweh_event_worker Call Trace: dump_stack_lvl+0x57/0x7d ubsan_epilogue+0x5/0x40 __ubsan_handle_out_of_bounds+0x69/0x80 ? memcpy+0x39/0x60 brcmf_fweh_event_worker+0xae1/0xc00 ? brcmf_fweh_call_event_handler.isra.0+0x100/0x100 ? rcu_read_lock_sched_held+0xa1/0xd0 ? rcu_read_lock_bh_held+0xb0/0xb0 ? lockdep_hardirqs_on_prepare+0x273/0x3e0 process_one_work+0x873/0x13e0 ? lock_release+0x640/0x640 ? pwq_dec_nr_in_flight+0x320/0x320 ? rwlock_bug.part.0+0x90/0x90 worker_thread+0x8b/0xd10 ? __kthread_parkme+0xd9/0x1d0 ? process_one_work+0x13e0/0x13e0 kthread+0x379/0x450 ? _raw_spin_unlock_irq+0x24/0x30 ? set_kthread_struct+0x100/0x100 ret_from_fork+0x1f/0x30 ================================================================================ general protection fault, probably for non-canonical address 0xe5601c0020023fff: 0000 [#1] SMP KASAN KASAN: maybe wild-memory-access in range [0x2b0100010011fff8-0x2b0100010011ffff] CPU: 0 PID: 1898 Comm: kworker/0:2 Tainted: G O 5.14.0+ #132 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014 Workqueue: events brcmf_fweh_event_worker RIP: 0010:brcmf_fweh_call_event_handler.isra.0+0x42/0x100 Code: 89 f5 53 48 89 fb 48 83 ec 08 e8 79 0b 38 fe 48 85 ed 74 7e e8 6f 0b 38 fe 48 89 ea 48 b8 00 00 00 00 00 fc ff df 48 c1 ea 03 <80> 3c 02 00 0f 85 8b 00 00 00 4c 8b 7d 00 44 89 e0 48 ba 00 00 00 RSP: 0018:ffffc9000259fbd8 EFLAGS: 00010207 RAX: dffffc0000000000 RBX: ffff888115d8cd50 RCX: 0000000000000000 RDX: 0560200020023fff RSI: ffffffff8304bc91 RDI: ffff888115d8cd50 RBP: 2b0100010011ffff R08: ffff888112340050 R09: ffffed1023549809 R10: ffff88811aa4c047 R11: ffffed1023549808 R12: 0000000000000045 R13: ffffc9000259fca0 R14: ffff888112340050 R15: ffff888112340000 FS: 0000000000000000(0000) GS:ffff88811aa00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000004053ccc0 CR3: 0000000112740000 CR4: 0000000000750ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: brcmf_fweh_event_worker+0x117/0xc00 ? brcmf_fweh_call_event_handler.isra.0+0x100/0x100 ? rcu_read_lock_sched_held+0xa1/0xd0 ? rcu_read_lock_bh_held+0xb0/0xb0 ? lockdep_hardirqs_on_prepare+0x273/0x3e0 process_one_work+0x873/0x13e0 ? lock_release+0x640/0x640 ? pwq_dec_nr_in_flight+0x320/0x320 ? rwlock_bug.part.0+0x90/0x90 worker_thread+0x8b/0xd10 ? __kthread_parkme+0xd9/0x1d0 ? process_one_work+0x13e0/0x13e0 kthread+0x379/0x450 ? _raw_spin_unlock_irq+0x24/0x30 ? set_kthread_struct+0x100/0x100 ret_from_fork+0x1f/0x30 Modules linked in: 88XXau(O) 88x2bu(O) ---[ end trace 41d302138f3ff55a ]--- RIP: 0010:brcmf_fweh_call_event_handler.isra.0+0x42/0x100 Code: 89 f5 53 48 89 fb 48 83 ec 08 e8 79 0b 38 fe 48 85 ed 74 7e e8 6f 0b 38 fe 48 89 ea 48 b8 00 00 00 00 00 fc ff df 48 c1 ea 03 <80> 3c 02 00 0f 85 8b 00 00 00 4c 8b 7d 00 44 89 e0 48 ba 00 00 00 RSP: 0018:ffffc9000259fbd8 EFLAGS: 00010207 RAX: dffffc0000000000 RBX: ffff888115d8cd50 RCX: 0000000000000000 RDX: 0560200020023fff RSI: ffffffff8304bc91 RDI: ffff888115d8cd50 RBP: 2b0100010011ffff R08: ffff888112340050 R09: ffffed1023549809 R10: ffff88811aa4c047 R11: ffffed1023549808 R12: 0000000000000045 R13: ffffc9000259fca0 R14: ffff888112340050 R15: ffff888112340000 FS: 0000000000000000(0000) GS:ffff88811aa00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000004053ccc0 CR3: 0000000112740000 CR4: 0000000000750ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Kernel panic - not syncing: Fatal exception Reported-by: Dokyung Song Reported-by: Jisoo Jang Reported-by: Minsuk Kang Reviewed-by: Arend van Spriel Cc: Signed-off-by: Dokyung Song Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20221021061359.GA550858@laguna Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c index 430d2cca98b3..1285d3685c4f 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c @@ -228,6 +228,10 @@ static void brcmf_fweh_event_worker(struct work_struct *work) brcmf_fweh_event_name(event->code), event->code, event->emsg.ifidx, event->emsg.bsscfgidx, event->emsg.addr); + if (event->emsg.bsscfgidx >= BRCMF_MAX_IFS) { + bphy_err(drvr, "invalid bsscfg index: %u\n", event->emsg.bsscfgidx); + goto event_free; + } /* convert event message */ emsg_be = &event->emsg; From bf506e366da4b6aa950852cd4d320538a9f73e8e Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Sat, 11 Sep 2021 10:40:08 +0300 Subject: [PATCH 0678/1477] ipc: remove memcg accounting for sops objects in do_semtimedop() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6a4746ba06191e23d30230738e94334b26590a8a upstream. Linus proposes to revert an accounting for sops objects in do_semtimedop() because it's really just a temporary buffer for a single semtimedop() system call. This object can consume up to 2 pages, syscall is sleeping one, size and duration can be controlled by user, and this allocation can be repeated by many thread at the same time. However Shakeel Butt pointed that there are much more popular objects with the same life time and similar memory consumption, the accounting of which was decided to be rejected for performance reasons. Considering at least 2 pages for task_struct and 2 pages for the kernel stack, a back of the envelope calculation gives a footprint amplification of <1.5 so this temporal buffer can be safely ignored. The factor would IMO be interesting if it was >> 2 (from the PoV of excessive (ab)use, fine-grained accounting seems to be currently unfeasible due to performance impact). Link: https://lore.kernel.org/lkml/90e254df-0dfe-f080-011e-b7c53ee7fd20@virtuozzo.com/ Fixes: 18319498fdd4 ("memcg: enable accounting of ipc resources") Signed-off-by: Vasily Averin Acked-by: Michal Hocko Reviewed-by: Michal Koutný Acked-by: Shakeel Butt Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- ipc/sem.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ipc/sem.c b/ipc/sem.c index d3b9b73cd9ca..2cb6515ef1dd 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -2001,8 +2001,7 @@ static long do_semtimedop(int semid, struct sembuf __user *tsops, if (nsops > ns->sc_semopm) return -E2BIG; if (nsops > SEMOPM_FAST) { - sops = kvmalloc_array(nsops, sizeof(*sops), - GFP_KERNEL_ACCOUNT); + sops = kvmalloc_array(nsops, sizeof(*sops), GFP_KERNEL); if (sops == NULL) return -ENOMEM; } From f5b40c0eb9ea3d8233b9a2e9af6784c81204d454 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 10 Nov 2022 18:14:30 +0100 Subject: [PATCH 0679/1477] Linux 5.10.154 Link: https://lore.kernel.org/r/20221108133340.718216105@linuxfoundation.org Tested-by: Florian Fainelli Tested-by: Salvatore Bonaccorso Tested-by: Hulk Robot Link: https://lore.kernel.org/r/20221109082223.141145957@linuxfoundation.org Tested-by: Rudi Heitbaum Tested-by: Jon Hunter Tested-by: Guenter Roeck Tested-by: Linux Kernel Functional Testing Tested-by: Florian Fainelli Tested-by: Pavel Machek (CIP) Tested-by: Allen Pais Tested-by: Shuah Khan Tested-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d1cd7539105d..43fecb404581 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 10 -SUBLEVEL = 153 +SUBLEVEL = 154 EXTRAVERSION = NAME = Dare mighty things From d2bc3376cd310697a8eafd86cd9867b9e2a40d21 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 30 Oct 2022 09:33:57 +0100 Subject: [PATCH 0680/1477] Revert "serial: 8250: Fix restoring termios speed after suspend" This reverts commit 8f740c11d89157bd74849ae388288075e05e2a34 which is commit 379a33786d489ab81885ff0b3935cfeb36137fea upstream. It breaks the build due to a dependant upstream patch being reverted earlier in the Android tree because of ABI breakages and it not being needed at all. So it is safe to remove here as well. Signed-off-by: Greg Kroah-Hartman Change-Id: Ibc77017d0655165eb6962bf722c7cbbd8cc17480 --- drivers/tty/serial/8250/8250_port.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 71d143c00248..3caa4aa193f6 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -3288,13 +3288,8 @@ static void serial8250_console_restore(struct uart_8250_port *up) unsigned int baud, quot, frac = 0; termios.c_cflag = port->cons->cflag; - termios.c_ispeed = port->cons->ispeed; - termios.c_ospeed = port->cons->ospeed; - if (port->state->port.tty && termios.c_cflag == 0) { + if (port->state->port.tty && termios.c_cflag == 0) termios.c_cflag = port->state->port.tty->termios.c_cflag; - termios.c_ispeed = port->state->port.tty->termios.c_ispeed; - termios.c_ospeed = port->state->port.tty->termios.c_ospeed; - } baud = serial8250_get_baud_rate(port, &termios, NULL); quot = serial8250_get_divisor(port, baud, &frac); From 7bcea6c5c90ac60c60ea8a727d0b18871b776afc Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 3 Nov 2022 02:31:52 +0100 Subject: [PATCH 0681/1477] ANDROID: gki_defconfig: remove CONFIG_INIT_STACK_ALL_ZERO=y Thanks to commit bdcb1d7cf285 ("hardening: Avoid harmless Clang option under CONFIG_INIT_STACK_ALL_ZERO"), this is now the default, no need to specify it explicitly. So remove it from the gki_defconfig files so that the build works properly again. Fixes: bdcb1d7cf285 ("hardening: Avoid harmless Clang option under CONFIG_INIT_STACK_ALL_ZERO") Signed-off-by: Greg Kroah-Hartman Change-Id: I32715acc4fcdd98f6ab05fa698571cf3e744db41 --- arch/arm64/configs/gki_defconfig | 1 - arch/x86/configs/gki_defconfig | 1 - 2 files changed, 2 deletions(-) diff --git a/arch/arm64/configs/gki_defconfig b/arch/arm64/configs/gki_defconfig index da8ab23b9ce5..cb0cf189b644 100644 --- a/arch/arm64/configs/gki_defconfig +++ b/arch/arm64/configs/gki_defconfig @@ -647,7 +647,6 @@ CONFIG_HARDENED_USERCOPY=y CONFIG_STATIC_USERMODEHELPER=y CONFIG_STATIC_USERMODEHELPER_PATH="" CONFIG_SECURITY_SELINUX=y -CONFIG_INIT_STACK_ALL_ZERO=y CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y CONFIG_CRYPTO_CHACHA20POLY1305=y CONFIG_CRYPTO_ADIANTUM=y diff --git a/arch/x86/configs/gki_defconfig b/arch/x86/configs/gki_defconfig index 14ea69c0d417..9d72cd46bc3b 100644 --- a/arch/x86/configs/gki_defconfig +++ b/arch/x86/configs/gki_defconfig @@ -580,7 +580,6 @@ CONFIG_HARDENED_USERCOPY=y CONFIG_STATIC_USERMODEHELPER=y CONFIG_STATIC_USERMODEHELPER_PATH="" CONFIG_SECURITY_SELINUX=y -CONFIG_INIT_STACK_ALL_ZERO=y CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y CONFIG_CRYPTO_CHACHA20POLY1305=y CONFIG_CRYPTO_ADIANTUM=y From 76eba54f0ddfb580240f150b90b87d57fb3924e3 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 20 Oct 2022 17:18:58 +0200 Subject: [PATCH 0682/1477] fuse: fix readdir cache race [ Upstream commit 9fa248c65bdbf5af0a2f74dd38575acfc8dfd2bf ] There's a race in fuse's readdir cache that can result in an uninitilized page being read. The page lock is supposed to prevent this from happening but in the following case it doesn't: Two fuse_add_dirent_to_cache() start out and get the same parameters (size=0,offset=0). One of them wins the race to create and lock the page, after which it fills in data, sets rdc.size and unlocks the page. In the meantime the page gets evicted from the cache before the other instance gets to run. That one also creates the page, but finds the size to be mismatched, bails out and leaves the uninitialized page in the cache. Fix by marking a filled page uptodate and ignoring non-uptodate pages. Reported-by: Frank Sorenson Fixes: 5d7bc7e8680c ("fuse: allow using readdir cache") Cc: # v4.20 Signed-off-by: Miklos Szeredi Signed-off-by: Sasha Levin --- fs/fuse/readdir.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c index bc267832310c..d5294e663df5 100644 --- a/fs/fuse/readdir.c +++ b/fs/fuse/readdir.c @@ -77,8 +77,10 @@ static void fuse_add_dirent_to_cache(struct file *file, goto unlock; addr = kmap_atomic(page); - if (!offset) + if (!offset) { clear_page(addr); + SetPageUptodate(page); + } memcpy(addr + offset, dirent, reclen); kunmap_atomic(addr); fi->rdc.size = (index << PAGE_SHIFT) + offset + reclen; @@ -516,6 +518,12 @@ retry_locked: page = find_get_page_flags(file->f_mapping, index, FGP_ACCESSED | FGP_LOCK); + /* Page gone missing, then re-added to cache, but not initialized? */ + if (page && !PageUptodate(page)) { + unlock_page(page); + put_page(page); + page = NULL; + } spin_lock(&fi->rdc.lock); if (!page) { /* From 925bf1ba760482591218ace2c8fe135155da2710 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 9 Sep 2022 11:20:23 +0200 Subject: [PATCH 0683/1477] hwspinlock: qcom: correct MMIO max register for newer SoCs [ Upstream commit 90cb380f9ceb811059340d06ff5fd0c0e93ecbe1 ] Newer ARMv8 Qualcomm SoCs using 0x1000 register stride have maximum register 0x20000 (32 mutexes * 0x1000). Fixes: 7a1e6fb1c606 ("hwspinlock: qcom: Allow mmio usage in addition to syscon") Signed-off-by: Krzysztof Kozlowski Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220909092035.223915-4-krzysztof.kozlowski@linaro.org Signed-off-by: Sasha Levin --- drivers/hwspinlock/qcom_hwspinlock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwspinlock/qcom_hwspinlock.c b/drivers/hwspinlock/qcom_hwspinlock.c index 364710966665..e49914664863 100644 --- a/drivers/hwspinlock/qcom_hwspinlock.c +++ b/drivers/hwspinlock/qcom_hwspinlock.c @@ -105,7 +105,7 @@ static const struct regmap_config tcsr_mutex_config = { .reg_bits = 32, .reg_stride = 4, .val_bits = 32, - .max_register = 0x40000, + .max_register = 0x20000, .fast_io = true, }; From 72ea2fc29962f3bb16eba80a2f0c88ffa6bb9e08 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 14 Oct 2022 12:25:06 +0300 Subject: [PATCH 0684/1477] phy: stm32: fix an error code in probe [ Upstream commit ca1c73628f5bd0c1ef6e46073cc3be2450605b06 ] If "index > usbphyc->nphys" is true then this returns success but it should return -EINVAL. Fixes: 94c358da3a05 ("phy: stm32: add support for STM32 USB PHY Controller (USBPHYC)") Signed-off-by: Dan Carpenter Reviewed-by: Amelie Delaunay Link: https://lore.kernel.org/r/Y0kq8j6S+5nDdMpr@kili Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/phy/st/phy-stm32-usbphyc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/phy/st/phy-stm32-usbphyc.c b/drivers/phy/st/phy-stm32-usbphyc.c index 2b3639cba51a..03fc567e9f18 100644 --- a/drivers/phy/st/phy-stm32-usbphyc.c +++ b/drivers/phy/st/phy-stm32-usbphyc.c @@ -393,6 +393,8 @@ static int stm32_usbphyc_probe(struct platform_device *pdev) ret = of_property_read_u32(child, "reg", &index); if (ret || index > usbphyc->nphys) { dev_err(&phy->dev, "invalid reg property: %d\n", ret); + if (!ret) + ret = -EINVAL; goto put_child; } From 914cb94e738ba6ebb482d61567fb50bf700e4f19 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 13 Oct 2022 19:41:51 +0200 Subject: [PATCH 0685/1477] wifi: cfg80211: silence a sparse RCU warning [ Upstream commit 03c0ad4b06c3566de624b4f4b78ac1a5d1e4c8e7 ] All we're going to do with this pointer is assign it to another __rcu pointer, but sparse can't see that, so use rcu_access_pointer() to silence the warning here. Fixes: c90b93b5b782 ("wifi: cfg80211: update hidden BSSes to avoid WARN_ON") Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/wireless/scan.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 22d169923261..15119c49c093 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -1669,7 +1669,9 @@ cfg80211_update_known_bss(struct cfg80211_registered_device *rdev, if (old == rcu_access_pointer(known->pub.ies)) rcu_assign_pointer(known->pub.ies, new->pub.beacon_ies); - cfg80211_update_hidden_bsses(known, new->pub.beacon_ies, old); + cfg80211_update_hidden_bsses(known, + rcu_access_pointer(new->pub.beacon_ies), + old); if (old) kfree_rcu((struct cfg80211_bss_ies *)old, rcu_head); From e1e12180321f416d83444f2cdc9259e0f5093d35 Mon Sep 17 00:00:00 2001 From: Arend van Spriel Date: Thu, 20 Oct 2022 13:40:40 +0200 Subject: [PATCH 0686/1477] wifi: cfg80211: fix memory leak in query_regdb_file() [ Upstream commit 57b962e627ec0ae53d4d16d7bd1033e27e67677a ] In the function query_regdb_file() the alpha2 parameter is duplicated using kmemdup() and subsequently freed in regdb_fw_cb(). However, request_firmware_nowait() can fail without calling regdb_fw_cb() and thus leak memory. Fixes: 007f6c5e6eb4 ("cfg80211: support loading regulatory database as firmware file") Signed-off-by: Arend van Spriel Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/wireless/reg.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index fd848609e656..a1e64d967bd3 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1064,6 +1064,8 @@ MODULE_FIRMWARE("regulatory.db"); static int query_regdb_file(const char *alpha2) { + int err; + ASSERT_RTNL(); if (regdb) @@ -1073,9 +1075,13 @@ static int query_regdb_file(const char *alpha2) if (!alpha2) return -ENOMEM; - return request_firmware_nowait(THIS_MODULE, true, "regulatory.db", - ®_pdev->dev, GFP_KERNEL, - (void *)alpha2, regdb_fw_cb); + err = request_firmware_nowait(THIS_MODULE, true, "regulatory.db", + ®_pdev->dev, GFP_KERNEL, + (void *)alpha2, regdb_fw_cb); + if (err) + kfree(alpha2); + + return err; } int reg_reload_regdb(void) From cc21dc48a78cc9e5af9a4d039cd456446a6e73ff Mon Sep 17 00:00:00 2001 From: Wang Yufen Date: Tue, 1 Nov 2022 09:31:36 +0800 Subject: [PATCH 0687/1477] bpf, sockmap: Fix the sk->sk_forward_alloc warning of sk_stream_kill_queues [ Upstream commit 8ec95b94716a1e4d126edc3fb2bc426a717e2dba ] When running `test_sockmap` selftests, the following warning appears: WARNING: CPU: 2 PID: 197 at net/core/stream.c:205 sk_stream_kill_queues+0xd3/0xf0 Call Trace: inet_csk_destroy_sock+0x55/0x110 tcp_rcv_state_process+0xd28/0x1380 ? tcp_v4_do_rcv+0x77/0x2c0 tcp_v4_do_rcv+0x77/0x2c0 __release_sock+0x106/0x130 __tcp_close+0x1a7/0x4e0 tcp_close+0x20/0x70 inet_release+0x3c/0x80 __sock_release+0x3a/0xb0 sock_close+0x14/0x20 __fput+0xa3/0x260 task_work_run+0x59/0xb0 exit_to_user_mode_prepare+0x1b3/0x1c0 syscall_exit_to_user_mode+0x19/0x50 do_syscall_64+0x48/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae The root case is in commit 84472b436e76 ("bpf, sockmap: Fix more uncharged while msg has more_data"), where I used msg->sg.size to replace the tosend, causing breakage: if (msg->apply_bytes && msg->apply_bytes < tosend) tosend = psock->apply_bytes; Fixes: 84472b436e76 ("bpf, sockmap: Fix more uncharged while msg has more_data") Reported-by: Jakub Sitnicki Signed-off-by: Wang Yufen Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Acked-by: Jakub Sitnicki Link: https://lore.kernel.org/bpf/1667266296-8794-1-git-send-email-wangyufen@huawei.com Signed-off-by: Sasha Levin --- net/ipv4/tcp_bpf.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index eaf2308c355a..809ee0f32d59 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -315,7 +315,7 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, { bool cork = false, enospc = sk_msg_full(msg); struct sock *sk_redir; - u32 tosend, delta = 0; + u32 tosend, origsize, sent, delta = 0; u32 eval = __SK_NONE; int ret; @@ -370,10 +370,12 @@ more_data: cork = true; psock->cork = NULL; } - sk_msg_return(sk, msg, msg->sg.size); + sk_msg_return(sk, msg, tosend); release_sock(sk); + origsize = msg->sg.size; ret = tcp_bpf_sendmsg_redir(sk_redir, msg, tosend, flags); + sent = origsize - msg->sg.size; if (eval == __SK_REDIRECT) sock_put(sk_redir); @@ -412,7 +414,7 @@ more_data: msg->sg.data[msg->sg.start].page_link && msg->sg.data[msg->sg.start].length) { if (eval == __SK_REDIRECT) - sk_mem_charge(sk, msg->sg.size); + sk_mem_charge(sk, tosend - sent); goto more_data; } } From 8c80b2fca4112d724dde477aed13f7b0510a2792 Mon Sep 17 00:00:00 2001 From: Pu Lehui Date: Wed, 2 Nov 2022 16:40:34 +0800 Subject: [PATCH 0688/1477] bpftool: Fix NULL pointer dereference when pin {PROG, MAP, LINK} without FILE [ Upstream commit 34de8e6e0e1f66e431abf4123934a2581cb5f133 ] When using bpftool to pin {PROG, MAP, LINK} without FILE, segmentation fault will occur. The reson is that the lack of FILE will cause strlen to trigger NULL pointer dereference. The corresponding stacktrace is shown below: do_pin do_pin_any do_pin_fd mount_bpffs_for_pin strlen(name) <- NULL pointer dereference Fix it by adding validation to the common process. Fixes: 75a1e792c335 ("tools: bpftool: Allow all prog/map handles for pinning objects") Signed-off-by: Pu Lehui Signed-off-by: Daniel Borkmann Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20221102084034.3342995-1-pulehui@huaweicloud.com Signed-off-by: Sasha Levin --- tools/bpf/bpftool/common.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 6ebf2b215ef4..eefa2b34e641 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -271,6 +271,9 @@ int do_pin_any(int argc, char **argv, int (*get_fd)(int *, char ***)) int err; int fd; + if (!REQ_ARGS(3)) + return -EINVAL; + fd = get_fd(&argc, &argv); if (fd < 0) return fd; From 8597b59e3d22b27849bd3e4f92a3d466774bfb04 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 28 Oct 2022 21:40:43 +0800 Subject: [PATCH 0689/1477] HID: hyperv: fix possible memory leak in mousevsc_probe() [ Upstream commit b5bcb94b0954a026bbd671741fdb00e7141f9c91 ] If hid_add_device() returns error, it should call hid_destroy_device() to free hid_dev which is allocated in hid_allocate_device(). Fixes: 74c4fb058083 ("HID: hv_mouse: Properly add the hid device") Signed-off-by: Yang Yingliang Reviewed-by: Wei Liu Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-hyperv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-hyperv.c b/drivers/hid/hid-hyperv.c index 978ee2aab2d4..b7704dd6809d 100644 --- a/drivers/hid/hid-hyperv.c +++ b/drivers/hid/hid-hyperv.c @@ -498,7 +498,7 @@ static int mousevsc_probe(struct hv_device *device, ret = hid_add_device(hid_dev); if (ret) - goto probe_err1; + goto probe_err2; ret = hid_parse(hid_dev); From 95b6ec733752b31bfd166c4609d2c1b5cdde9b47 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 10 Nov 2020 19:12:11 -0800 Subject: [PATCH 0690/1477] bpf: Support for pointers beyond pkt_end. [ Upstream commit 6d94e741a8ff818e5518da8257f5ca0aaed1f269 ] This patch adds the verifier support to recognize inlined branch conditions. The LLVM knows that the branch evaluates to the same value, but the verifier couldn't track it. Hence causing valid programs to be rejected. The potential LLVM workaround: https://reviews.llvm.org/D87428 can have undesired side effects, since LLVM doesn't know that skb->data/data_end are being compared. LLVM has to introduce extra boolean variable and use inline_asm trick to force easier for the verifier assembly. Instead teach the verifier to recognize that r1 = skb->data; r1 += 10; r2 = skb->data_end; if (r1 > r2) { here r1 points beyond packet_end and subsequent if (r1 > r2) // always evaluates to "true". } Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Tested-by: Jiri Olsa Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20201111031213.25109-2-alexei.starovoitov@gmail.com Stable-dep-of: f1db20814af5 ("bpf: Fix wrong reg type conversion in release_reference()") Signed-off-by: Sasha Levin --- include/linux/bpf_verifier.h | 2 +- kernel/bpf/verifier.c | 129 +++++++++++++++++++++++++++++------ 2 files changed, 108 insertions(+), 23 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 391bc1480dfb..f49165f9229c 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -45,7 +45,7 @@ struct bpf_reg_state { enum bpf_reg_type type; union { /* valid when type == PTR_TO_PACKET */ - u16 range; + int range; /* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE | * PTR_TO_MAP_VALUE_OR_NULL diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e4dcc23b52c0..510a54471f13 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2978,7 +2978,9 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off, regno); return -EACCES; } - err = __check_mem_access(env, regno, off, size, reg->range, + + err = reg->range < 0 ? -EINVAL : + __check_mem_access(env, regno, off, size, reg->range, zero_size_allowed); if (err) { verbose(env, "R%d offset is outside of the packet\n", regno); @@ -5018,6 +5020,32 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env) __clear_all_pkt_pointers(env, vstate->frame[i]); } +enum { + AT_PKT_END = -1, + BEYOND_PKT_END = -2, +}; + +static void mark_pkt_end(struct bpf_verifier_state *vstate, int regn, bool range_open) +{ + struct bpf_func_state *state = vstate->frame[vstate->curframe]; + struct bpf_reg_state *reg = &state->regs[regn]; + + if (reg->type != PTR_TO_PACKET) + /* PTR_TO_PACKET_META is not supported yet */ + return; + + /* The 'reg' is pkt > pkt_end or pkt >= pkt_end. + * How far beyond pkt_end it goes is unknown. + * if (!range_open) it's the case of pkt >= pkt_end + * if (range_open) it's the case of pkt > pkt_end + * hence this pointer is at least 1 byte bigger than pkt_end + */ + if (range_open) + reg->range = BEYOND_PKT_END; + else + reg->range = AT_PKT_END; +} + static void release_reg_references(struct bpf_verifier_env *env, struct bpf_func_state *state, int ref_obj_id) @@ -7193,7 +7221,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) static void __find_good_pkt_pointers(struct bpf_func_state *state, struct bpf_reg_state *dst_reg, - enum bpf_reg_type type, u16 new_range) + enum bpf_reg_type type, int new_range) { struct bpf_reg_state *reg; int i; @@ -7218,8 +7246,7 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate, enum bpf_reg_type type, bool range_right_open) { - u16 new_range; - int i; + int new_range, i; if (dst_reg->off < 0 || (dst_reg->off == 0 && range_right_open)) @@ -7470,6 +7497,67 @@ static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode, return is_branch64_taken(reg, val, opcode); } +static int flip_opcode(u32 opcode) +{ + /* How can we transform "a b" into "b a"? */ + static const u8 opcode_flip[16] = { + /* these stay the same */ + [BPF_JEQ >> 4] = BPF_JEQ, + [BPF_JNE >> 4] = BPF_JNE, + [BPF_JSET >> 4] = BPF_JSET, + /* these swap "lesser" and "greater" (L and G in the opcodes) */ + [BPF_JGE >> 4] = BPF_JLE, + [BPF_JGT >> 4] = BPF_JLT, + [BPF_JLE >> 4] = BPF_JGE, + [BPF_JLT >> 4] = BPF_JGT, + [BPF_JSGE >> 4] = BPF_JSLE, + [BPF_JSGT >> 4] = BPF_JSLT, + [BPF_JSLE >> 4] = BPF_JSGE, + [BPF_JSLT >> 4] = BPF_JSGT + }; + return opcode_flip[opcode >> 4]; +} + +static int is_pkt_ptr_branch_taken(struct bpf_reg_state *dst_reg, + struct bpf_reg_state *src_reg, + u8 opcode) +{ + struct bpf_reg_state *pkt; + + if (src_reg->type == PTR_TO_PACKET_END) { + pkt = dst_reg; + } else if (dst_reg->type == PTR_TO_PACKET_END) { + pkt = src_reg; + opcode = flip_opcode(opcode); + } else { + return -1; + } + + if (pkt->range >= 0) + return -1; + + switch (opcode) { + case BPF_JLE: + /* pkt <= pkt_end */ + fallthrough; + case BPF_JGT: + /* pkt > pkt_end */ + if (pkt->range == BEYOND_PKT_END) + /* pkt has at last one extra byte beyond pkt_end */ + return opcode == BPF_JGT; + break; + case BPF_JLT: + /* pkt < pkt_end */ + fallthrough; + case BPF_JGE: + /* pkt >= pkt_end */ + if (pkt->range == BEYOND_PKT_END || pkt->range == AT_PKT_END) + return opcode == BPF_JGE; + break; + } + return -1; +} + /* Adjusts the register min/max values in the case that the dst_reg is the * variable register that we are working on, and src_reg is a constant or we're * simply doing a BPF_K check. @@ -7640,23 +7728,7 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, u64 val, u32 val32, u8 opcode, bool is_jmp32) { - /* How can we transform "a b" into "b a"? */ - static const u8 opcode_flip[16] = { - /* these stay the same */ - [BPF_JEQ >> 4] = BPF_JEQ, - [BPF_JNE >> 4] = BPF_JNE, - [BPF_JSET >> 4] = BPF_JSET, - /* these swap "lesser" and "greater" (L and G in the opcodes) */ - [BPF_JGE >> 4] = BPF_JLE, - [BPF_JGT >> 4] = BPF_JLT, - [BPF_JLE >> 4] = BPF_JGE, - [BPF_JLT >> 4] = BPF_JGT, - [BPF_JSGE >> 4] = BPF_JSLE, - [BPF_JSGT >> 4] = BPF_JSLT, - [BPF_JSLE >> 4] = BPF_JSGE, - [BPF_JSLT >> 4] = BPF_JSGT - }; - opcode = opcode_flip[opcode >> 4]; + opcode = flip_opcode(opcode); /* This uses zero as "not present in table"; luckily the zero opcode, * BPF_JA, can't get here. */ @@ -7825,6 +7897,7 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn, /* pkt_data' > pkt_end, pkt_meta' > pkt_data */ find_good_pkt_pointers(this_branch, dst_reg, dst_reg->type, false); + mark_pkt_end(other_branch, insn->dst_reg, true); } else if ((dst_reg->type == PTR_TO_PACKET_END && src_reg->type == PTR_TO_PACKET) || (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && @@ -7832,6 +7905,7 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn, /* pkt_end > pkt_data', pkt_data > pkt_meta' */ find_good_pkt_pointers(other_branch, src_reg, src_reg->type, true); + mark_pkt_end(this_branch, insn->src_reg, false); } else { return false; } @@ -7844,6 +7918,7 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn, /* pkt_data' < pkt_end, pkt_meta' < pkt_data */ find_good_pkt_pointers(other_branch, dst_reg, dst_reg->type, true); + mark_pkt_end(this_branch, insn->dst_reg, false); } else if ((dst_reg->type == PTR_TO_PACKET_END && src_reg->type == PTR_TO_PACKET) || (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && @@ -7851,6 +7926,7 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn, /* pkt_end < pkt_data', pkt_data > pkt_meta' */ find_good_pkt_pointers(this_branch, src_reg, src_reg->type, false); + mark_pkt_end(other_branch, insn->src_reg, true); } else { return false; } @@ -7863,6 +7939,7 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn, /* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */ find_good_pkt_pointers(this_branch, dst_reg, dst_reg->type, true); + mark_pkt_end(other_branch, insn->dst_reg, false); } else if ((dst_reg->type == PTR_TO_PACKET_END && src_reg->type == PTR_TO_PACKET) || (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && @@ -7870,6 +7947,7 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn, /* pkt_end >= pkt_data', pkt_data >= pkt_meta' */ find_good_pkt_pointers(other_branch, src_reg, src_reg->type, false); + mark_pkt_end(this_branch, insn->src_reg, true); } else { return false; } @@ -7882,6 +7960,7 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn, /* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */ find_good_pkt_pointers(other_branch, dst_reg, dst_reg->type, false); + mark_pkt_end(this_branch, insn->dst_reg, true); } else if ((dst_reg->type == PTR_TO_PACKET_END && src_reg->type == PTR_TO_PACKET) || (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && @@ -7889,6 +7968,7 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn, /* pkt_end <= pkt_data', pkt_data <= pkt_meta' */ find_good_pkt_pointers(this_branch, src_reg, src_reg->type, true); + mark_pkt_end(other_branch, insn->src_reg, false); } else { return false; } @@ -7988,6 +8068,10 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, src_reg->var_off.value, opcode, is_jmp32); + } else if (reg_is_pkt_pointer_any(dst_reg) && + reg_is_pkt_pointer_any(src_reg) && + !is_jmp32) { + pred = is_pkt_ptr_branch_taken(dst_reg, src_reg, opcode); } if (pred >= 0) { @@ -7996,7 +8080,8 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, */ if (!__is_pointer_value(false, dst_reg)) err = mark_chain_precision(env, insn->dst_reg); - if (BPF_SRC(insn->code) == BPF_X && !err) + if (BPF_SRC(insn->code) == BPF_X && !err && + !__is_pointer_value(false, src_reg)) err = mark_chain_precision(env, insn->src_reg); if (err) return err; From 9069db2579e92a1ba604dd85e5de5206ea4beb6d Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sun, 4 Sep 2022 22:41:28 +0200 Subject: [PATCH 0691/1477] bpf: Add helper macro bpf_for_each_reg_in_vstate [ Upstream commit b239da34203f49c40b5d656220c39647c3ff0b3c ] For a lot of use cases in future patches, we will want to modify the state of registers part of some same 'group' (e.g. same ref_obj_id). It won't just be limited to releasing reference state, but setting a type flag dynamically based on certain actions, etc. Hence, we need a way to easily pass a callback to the function that iterates over all registers in current bpf_verifier_state in all frames upto (and including) the curframe. While in C++ we would be able to easily use a lambda to pass state and the callback together, sadly we aren't using C++ in the kernel. The next best thing to avoid defining a function for each case seems like statement expressions in GNU C. The kernel already uses them heavily, hence they can passed to the macro in the style of a lambda. The statement expression will then be substituted in the for loop bodies. Variables __state and __reg are set to current bpf_func_state and reg for each invocation of the expression inside the passed in verifier state. Then, convert mark_ptr_or_null_regs, clear_all_pkt_pointers, release_reference, find_good_pkt_pointers, find_equal_scalars to use bpf_for_each_reg_in_vstate. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20220904204145.3089-16-memxor@gmail.com Signed-off-by: Alexei Starovoitov Stable-dep-of: f1db20814af5 ("bpf: Fix wrong reg type conversion in release_reference()") Signed-off-by: Sasha Levin --- include/linux/bpf_verifier.h | 21 ++++++ kernel/bpf/verifier.c | 137 ++++++++--------------------------- 2 files changed, 50 insertions(+), 108 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index f49165f9229c..4d37c69e76b1 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -290,6 +290,27 @@ struct bpf_verifier_state { iter < frame->allocated_stack / BPF_REG_SIZE; \ iter++, reg = bpf_get_spilled_reg(iter, frame)) +/* Invoke __expr over regsiters in __vst, setting __state and __reg */ +#define bpf_for_each_reg_in_vstate(__vst, __state, __reg, __expr) \ + ({ \ + struct bpf_verifier_state *___vstate = __vst; \ + int ___i, ___j; \ + for (___i = 0; ___i <= ___vstate->curframe; ___i++) { \ + struct bpf_reg_state *___regs; \ + __state = ___vstate->frame[___i]; \ + ___regs = __state->regs; \ + for (___j = 0; ___j < MAX_BPF_REG; ___j++) { \ + __reg = &___regs[___j]; \ + (void)(__expr); \ + } \ + bpf_for_each_spilled_reg(___j, __state, __reg) { \ + if (!__reg) \ + continue; \ + (void)(__expr); \ + } \ + } \ + }) + /* linked list of verifier states used to prune search */ struct bpf_verifier_state_list { struct bpf_verifier_state state; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 510a54471f13..3a0f288f538c 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4993,31 +4993,15 @@ static int check_func_proto(const struct bpf_func_proto *fn, int func_id) /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END] * are now invalid, so turn them into unknown SCALAR_VALUE. */ -static void __clear_all_pkt_pointers(struct bpf_verifier_env *env, - struct bpf_func_state *state) -{ - struct bpf_reg_state *regs = state->regs, *reg; - int i; - - for (i = 0; i < MAX_BPF_REG; i++) - if (reg_is_pkt_pointer_any(®s[i])) - mark_reg_unknown(env, regs, i); - - bpf_for_each_spilled_reg(i, state, reg) { - if (!reg) - continue; - if (reg_is_pkt_pointer_any(reg)) - __mark_reg_unknown(env, reg); - } -} - static void clear_all_pkt_pointers(struct bpf_verifier_env *env) { - struct bpf_verifier_state *vstate = env->cur_state; - int i; + struct bpf_func_state *state; + struct bpf_reg_state *reg; - for (i = 0; i <= vstate->curframe; i++) - __clear_all_pkt_pointers(env, vstate->frame[i]); + bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({ + if (reg_is_pkt_pointer_any(reg)) + __mark_reg_unknown(env, reg); + })); } enum { @@ -5046,41 +5030,24 @@ static void mark_pkt_end(struct bpf_verifier_state *vstate, int regn, bool range reg->range = AT_PKT_END; } -static void release_reg_references(struct bpf_verifier_env *env, - struct bpf_func_state *state, - int ref_obj_id) -{ - struct bpf_reg_state *regs = state->regs, *reg; - int i; - - for (i = 0; i < MAX_BPF_REG; i++) - if (regs[i].ref_obj_id == ref_obj_id) - mark_reg_unknown(env, regs, i); - - bpf_for_each_spilled_reg(i, state, reg) { - if (!reg) - continue; - if (reg->ref_obj_id == ref_obj_id) - __mark_reg_unknown(env, reg); - } -} - /* The pointer with the specified id has released its reference to kernel * resources. Identify all copies of the same pointer and clear the reference. */ static int release_reference(struct bpf_verifier_env *env, int ref_obj_id) { - struct bpf_verifier_state *vstate = env->cur_state; + struct bpf_func_state *state; + struct bpf_reg_state *reg; int err; - int i; err = release_reference_state(cur_func(env), ref_obj_id); if (err) return err; - for (i = 0; i <= vstate->curframe; i++) - release_reg_references(env, vstate->frame[i], ref_obj_id); + bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({ + if (reg->ref_obj_id == ref_obj_id) + __mark_reg_unknown(env, reg); + })); return 0; } @@ -7219,34 +7186,14 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) return 0; } -static void __find_good_pkt_pointers(struct bpf_func_state *state, - struct bpf_reg_state *dst_reg, - enum bpf_reg_type type, int new_range) -{ - struct bpf_reg_state *reg; - int i; - - for (i = 0; i < MAX_BPF_REG; i++) { - reg = &state->regs[i]; - if (reg->type == type && reg->id == dst_reg->id) - /* keep the maximum range already checked */ - reg->range = max(reg->range, new_range); - } - - bpf_for_each_spilled_reg(i, state, reg) { - if (!reg) - continue; - if (reg->type == type && reg->id == dst_reg->id) - reg->range = max(reg->range, new_range); - } -} - static void find_good_pkt_pointers(struct bpf_verifier_state *vstate, struct bpf_reg_state *dst_reg, enum bpf_reg_type type, bool range_right_open) { - int new_range, i; + struct bpf_func_state *state; + struct bpf_reg_state *reg; + int new_range; if (dst_reg->off < 0 || (dst_reg->off == 0 && range_right_open)) @@ -7311,9 +7258,11 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate, * the range won't allow anything. * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16. */ - for (i = 0; i <= vstate->curframe; i++) - __find_good_pkt_pointers(vstate->frame[i], dst_reg, type, - new_range); + bpf_for_each_reg_in_vstate(vstate, state, reg, ({ + if (reg->type == type && reg->id == dst_reg->id) + /* keep the maximum range already checked */ + reg->range = max(reg->range, new_range); + })); } static int is_branch32_taken(struct bpf_reg_state *reg, u32 val, u8 opcode) @@ -7826,7 +7775,7 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state, reg->ref_obj_id = 0; } else if (!reg_may_point_to_spin_lock(reg)) { /* For not-NULL ptr, reg->ref_obj_id will be reset - * in release_reg_references(). + * in release_reference(). * * reg->id is still used by spin_lock ptr. Other * than spin_lock ptr type, reg->id can be reset. @@ -7836,22 +7785,6 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state, } } -static void __mark_ptr_or_null_regs(struct bpf_func_state *state, u32 id, - bool is_null) -{ - struct bpf_reg_state *reg; - int i; - - for (i = 0; i < MAX_BPF_REG; i++) - mark_ptr_or_null_reg(state, &state->regs[i], id, is_null); - - bpf_for_each_spilled_reg(i, state, reg) { - if (!reg) - continue; - mark_ptr_or_null_reg(state, reg, id, is_null); - } -} - /* The logic is similar to find_good_pkt_pointers(), both could eventually * be folded together at some point. */ @@ -7859,10 +7792,9 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno, bool is_null) { struct bpf_func_state *state = vstate->frame[vstate->curframe]; - struct bpf_reg_state *regs = state->regs; + struct bpf_reg_state *regs = state->regs, *reg; u32 ref_obj_id = regs[regno].ref_obj_id; u32 id = regs[regno].id; - int i; if (ref_obj_id && ref_obj_id == id && is_null) /* regs[regno] is in the " == NULL" branch. @@ -7871,8 +7803,9 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno, */ WARN_ON_ONCE(release_reference_state(state, id)); - for (i = 0; i <= vstate->curframe; i++) - __mark_ptr_or_null_regs(vstate->frame[i], id, is_null); + bpf_for_each_reg_in_vstate(vstate, state, reg, ({ + mark_ptr_or_null_reg(state, reg, id, is_null); + })); } static bool try_match_pkt_pointers(const struct bpf_insn *insn, @@ -7985,23 +7918,11 @@ static void find_equal_scalars(struct bpf_verifier_state *vstate, { struct bpf_func_state *state; struct bpf_reg_state *reg; - int i, j; - for (i = 0; i <= vstate->curframe; i++) { - state = vstate->frame[i]; - for (j = 0; j < MAX_BPF_REG; j++) { - reg = &state->regs[j]; - if (reg->type == SCALAR_VALUE && reg->id == known_reg->id) - *reg = *known_reg; - } - - bpf_for_each_spilled_reg(j, state, reg) { - if (!reg) - continue; - if (reg->type == SCALAR_VALUE && reg->id == known_reg->id) - *reg = *known_reg; - } - } + bpf_for_each_reg_in_vstate(vstate, state, reg, ({ + if (reg->type == SCALAR_VALUE && reg->id == known_reg->id) + *reg = *known_reg; + })); } static int check_cond_jmp_op(struct bpf_verifier_env *env, From cedd4f01f67be94735f15123158f485028571037 Mon Sep 17 00:00:00 2001 From: Youlin Li Date: Thu, 3 Nov 2022 17:34:39 +0800 Subject: [PATCH 0692/1477] bpf: Fix wrong reg type conversion in release_reference() [ Upstream commit f1db20814af532f85e091231223e5e4818e8464b ] Some helper functions will allocate memory. To avoid memory leaks, the verifier requires the eBPF program to release these memories by calling the corresponding helper functions. When a resource is released, all pointer registers corresponding to the resource should be invalidated. The verifier use release_references() to do this job, by apply __mark_reg_unknown() to each relevant register. It will give these registers the type of SCALAR_VALUE. A register that will contain a pointer value at runtime, but of type SCALAR_VALUE, which may allow the unprivileged user to get a kernel pointer by storing this register into a map. Using __mark_reg_not_init() while NOT allow_ptr_leaks can mitigate this problem. Fixes: fd978bf7fd31 ("bpf: Add reference tracking to verifier") Signed-off-by: Youlin Li Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20221103093440.3161-1-liulin063@gmail.com Signed-off-by: Sasha Levin --- kernel/bpf/verifier.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 3a0f288f538c..50364031eb4d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5045,8 +5045,12 @@ static int release_reference(struct bpf_verifier_env *env, return err; bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({ - if (reg->ref_obj_id == ref_obj_id) - __mark_reg_unknown(env, reg); + if (reg->ref_obj_id == ref_obj_id) { + if (!env->allow_ptr_leaks) + __mark_reg_not_init(env, reg); + else + __mark_reg_unknown(env, reg); + } })); return 0; From 50868de7dc4e7f0fcadd6029f32bf4387c102ee6 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Wed, 2 Nov 2022 17:53:25 +0100 Subject: [PATCH 0693/1477] net: gso: fix panic on frag_list with mixed head alloc types [ Upstream commit 9e4b7a99a03aefd37ba7bb1f022c8efab5019165 ] Since commit 3dcbdb134f32 ("net: gso: Fix skb_segment splat when splitting gso_size mangled skb having linear-headed frag_list"), it is allowed to change gso_size of a GRO packet. However, that commit assumes that "checking the first list_skb member suffices; i.e if either of the list_skb members have non head_frag head, then the first one has too". It turns out this assumption does not hold. We've seen BUG_ON being hit in skb_segment when skbs on the frag_list had differing head_frag with the vmxnet3 driver. This happens because __netdev_alloc_skb and __napi_alloc_skb can return a skb that is page backed or kmalloced depending on the requested size. As the result, the last small skb in the GRO packet can be kmalloced. There are three different locations where this can be fixed: (1) We could check head_frag in GRO and not allow GROing skbs with different head_frag. However, that would lead to performance regression on normal forward paths with unmodified gso_size, where !head_frag in the last packet is not a problem. (2) Set a flag in bpf_skb_net_grow and bpf_skb_net_shrink indicating that NETIF_F_SG is undesirable. That would need to eat a bit in sk_buff. Furthermore, that flag can be unset when all skbs on the frag_list are page backed. To retain good performance, bpf_skb_net_grow/shrink would have to walk the frag_list. (3) Walk the frag_list in skb_segment when determining whether NETIF_F_SG should be cleared. This of course slows things down. This patch implements (3). To limit the performance impact in skb_segment, the list is walked only for skbs with SKB_GSO_DODGY set that have gso_size changed. Normal paths thus will not hit it. We could check only the last skb but since we need to walk the whole list anyway, let's stay on the safe side. Fixes: 3dcbdb134f32 ("net: gso: Fix skb_segment splat when splitting gso_size mangled skb having linear-headed frag_list") Signed-off-by: Jiri Benc Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/e04426a6a91baf4d1081e1b478c82b5de25fdf21.1667407944.git.jbenc@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/core/skbuff.c | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 7bdcdad58dc8..06169889b0ca 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3809,23 +3809,25 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, int i = 0; int pos; - if (list_skb && !list_skb->head_frag && skb_headlen(list_skb) && - (skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY)) { - /* gso_size is untrusted, and we have a frag_list with a linear - * non head_frag head. - * - * (we assume checking the first list_skb member suffices; - * i.e if either of the list_skb members have non head_frag - * head, then the first one has too). - * - * If head_skb's headlen does not fit requested gso_size, it - * means that the frag_list members do NOT terminate on exact - * gso_size boundaries. Hence we cannot perform skb_frag_t page - * sharing. Therefore we must fallback to copying the frag_list - * skbs; we do so by disabling SG. - */ - if (mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb)) - features &= ~NETIF_F_SG; + if ((skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY) && + mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb)) { + struct sk_buff *check_skb; + + for (check_skb = list_skb; check_skb; check_skb = check_skb->next) { + if (skb_headlen(check_skb) && !check_skb->head_frag) { + /* gso_size is untrusted, and we have a frag_list with + * a linear non head_frag item. + * + * If head_skb's headlen does not fit requested gso_size, + * it means that the frag_list members do NOT terminate + * on exact gso_size boundaries. Hence we cannot perform + * skb_frag_t page sharing. Therefore we must fallback to + * copying the frag_list skbs; we do so by disabling SG. + */ + features &= ~NETIF_F_SG; + break; + } + } } __skb_push(head_skb, doffset); From 3b05d9073ae26c686d9d26d138164d1766cf470e Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 2 Nov 2022 22:33:13 +0100 Subject: [PATCH 0694/1477] macsec: delete new rxsc when offload fails [ Upstream commit 93a30947821c203d08865c4e17ea181c9668ce52 ] Currently we get an inconsistent state: - netlink returns the error to userspace - the RXSC is installed but not offloaded Then the device could get confused when we try to add an RXSA, because the RXSC isn't supposed to exist. Fixes: 3cf3227a21d1 ("net: macsec: hardware offloading infrastructure") Signed-off-by: Sabrina Dubroca Reviewed-by: Antoine Tenart Reviewed-by: Leon Romanovsky Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/macsec.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 70c5905a916b..65e0af28c950 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -1867,7 +1867,6 @@ static int macsec_add_rxsc(struct sk_buff *skb, struct genl_info *info) struct macsec_rx_sc *rx_sc; struct nlattr *tb_rxsc[MACSEC_RXSC_ATTR_MAX + 1]; struct macsec_secy *secy; - bool was_active; int ret; if (!attrs[MACSEC_ATTR_IFINDEX]) @@ -1895,7 +1894,6 @@ static int macsec_add_rxsc(struct sk_buff *skb, struct genl_info *info) return PTR_ERR(rx_sc); } - was_active = rx_sc->active; if (tb_rxsc[MACSEC_RXSC_ATTR_ACTIVE]) rx_sc->active = !!nla_get_u8(tb_rxsc[MACSEC_RXSC_ATTR_ACTIVE]); @@ -1922,7 +1920,8 @@ static int macsec_add_rxsc(struct sk_buff *skb, struct genl_info *info) return 0; cleanup: - rx_sc->active = was_active; + del_rx_sc(secy, sci); + free_rx_sc(rx_sc); rtnl_unlock(); return ret; } From 7f4456f0119b8014ce61c38912d74fa4fd24092c Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 2 Nov 2022 22:33:14 +0100 Subject: [PATCH 0695/1477] macsec: fix secy->n_rx_sc accounting [ Upstream commit 73a4b31c9d11f98ae3bc5286d5382930adb0e9c7 ] secy->n_rx_sc is supposed to be the number of _active_ rxsc's within a secy. This is then used by macsec_send_sci to help decide if we should add the SCI to the header or not. This logic is currently broken when we create a new RXSC and turn it off at creation, as create_rx_sc always sets ->active to true (and immediately uses that to increment n_rx_sc), and only later macsec_add_rxsc sets rx_sc->active. Fixes: c09440f7dcb3 ("macsec: introduce IEEE 802.1AE driver") Signed-off-by: Sabrina Dubroca Reviewed-by: Antoine Tenart Reviewed-by: Leon Romanovsky Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/macsec.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 65e0af28c950..a816fbbe23a7 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -1390,7 +1390,8 @@ static struct macsec_rx_sc *del_rx_sc(struct macsec_secy *secy, sci_t sci) return NULL; } -static struct macsec_rx_sc *create_rx_sc(struct net_device *dev, sci_t sci) +static struct macsec_rx_sc *create_rx_sc(struct net_device *dev, sci_t sci, + bool active) { struct macsec_rx_sc *rx_sc; struct macsec_dev *macsec; @@ -1414,7 +1415,7 @@ static struct macsec_rx_sc *create_rx_sc(struct net_device *dev, sci_t sci) } rx_sc->sci = sci; - rx_sc->active = true; + rx_sc->active = active; refcount_set(&rx_sc->refcnt, 1); secy = &macsec_priv(dev)->secy; @@ -1867,6 +1868,7 @@ static int macsec_add_rxsc(struct sk_buff *skb, struct genl_info *info) struct macsec_rx_sc *rx_sc; struct nlattr *tb_rxsc[MACSEC_RXSC_ATTR_MAX + 1]; struct macsec_secy *secy; + bool active = true; int ret; if (!attrs[MACSEC_ATTR_IFINDEX]) @@ -1888,15 +1890,15 @@ static int macsec_add_rxsc(struct sk_buff *skb, struct genl_info *info) secy = &macsec_priv(dev)->secy; sci = nla_get_sci(tb_rxsc[MACSEC_RXSC_ATTR_SCI]); - rx_sc = create_rx_sc(dev, sci); + if (tb_rxsc[MACSEC_RXSC_ATTR_ACTIVE]) + active = nla_get_u8(tb_rxsc[MACSEC_RXSC_ATTR_ACTIVE]); + + rx_sc = create_rx_sc(dev, sci, active); if (IS_ERR(rx_sc)) { rtnl_unlock(); return PTR_ERR(rx_sc); } - if (tb_rxsc[MACSEC_RXSC_ATTR_ACTIVE]) - rx_sc->active = !!nla_get_u8(tb_rxsc[MACSEC_RXSC_ATTR_ACTIVE]); - if (macsec_is_offloaded(netdev_priv(dev))) { const struct macsec_ops *ops; struct macsec_context ctx; From 9dc7503bae33046c6301562f7e03d5352e6f8c50 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 2 Nov 2022 22:33:15 +0100 Subject: [PATCH 0696/1477] macsec: fix detection of RXSCs when toggling offloading [ Upstream commit 80df4706357a5a06bbbc70273bf2611df1ceee04 ] macsec_is_configured incorrectly uses secy->n_rx_sc to check if some RXSCs exist. secy->n_rx_sc only counts the number of active RXSCs, but there can also be inactive SCs as well, which may be stored in the driver (in case we're disabling offloading), or would have to be pushed to the device (in case we're trying to enable offloading). As long as RXSCs active on creation and never turned off, the issue is not visible. Fixes: dcb780fb2795 ("net: macsec: add nla support for changing the offloading selection") Signed-off-by: Sabrina Dubroca Reviewed-by: Antoine Tenart Reviewed-by: Leon Romanovsky Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/macsec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index a816fbbe23a7..69108c1db130 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -2562,7 +2562,7 @@ static bool macsec_is_configured(struct macsec_dev *macsec) struct macsec_tx_sc *tx_sc = &secy->tx_sc; int i; - if (secy->n_rx_sc > 0) + if (secy->rx_sc) return true; for (i = 0; i < MACSEC_NUM_AN; i++) From adaa0f180de5236e086ddab6476c4364d922f1fd Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 2 Nov 2022 22:33:16 +0100 Subject: [PATCH 0697/1477] macsec: clear encryption keys from the stack after setting up offload [ Upstream commit aaab73f8fba4fd38f4d2617440d541a1c334e819 ] macsec_add_rxsa and macsec_add_txsa copy the key to an on-stack offloading context to pass it to the drivers, but leaves it there when it's done. Clear it with memzero_explicit as soon as it's not needed anymore. Fixes: 3cf3227a21d1 ("net: macsec: hardware offloading infrastructure") Signed-off-by: Sabrina Dubroca Reviewed-by: Antoine Tenart Reviewed-by: Leon Romanovsky Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/macsec.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 69108c1db130..f84e3cc0d3ec 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -1824,6 +1824,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info) secy->key_len); err = macsec_offload(ops->mdo_add_rxsa, &ctx); + memzero_explicit(ctx.sa.key, secy->key_len); if (err) goto cleanup; } @@ -2066,6 +2067,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info) secy->key_len); err = macsec_offload(ops->mdo_add_txsa, &ctx); + memzero_explicit(ctx.sa.key, secy->key_len); if (err) goto cleanup; } From 3401f964028ac941425b9b2c8ff8a022539ef44a Mon Sep 17 00:00:00 2001 From: Wang Yufen Date: Wed, 2 Nov 2022 17:41:19 +0800 Subject: [PATCH 0698/1477] net: tun: Fix memory leaks of napi_get_frags [ Upstream commit 1118b2049d77ca0b505775fc1a8d1909cf19a7ec ] kmemleak reports after running test_progs: unreferenced object 0xffff8881b1672dc0 (size 232): comm "test_progs", pid 394388, jiffies 4354712116 (age 841.975s) hex dump (first 32 bytes): e0 84 d7 a8 81 88 ff ff 80 2c 67 b1 81 88 ff ff .........,g..... 00 40 c5 9b 81 88 ff ff 00 00 00 00 00 00 00 00 .@.............. backtrace: [<00000000c8f01748>] napi_skb_cache_get+0xd4/0x150 [<0000000041c7fc09>] __napi_build_skb+0x15/0x50 [<00000000431c7079>] __napi_alloc_skb+0x26e/0x540 [<000000003ecfa30e>] napi_get_frags+0x59/0x140 [<0000000099b2199e>] tun_get_user+0x183d/0x3bb0 [tun] [<000000008a5adef0>] tun_chr_write_iter+0xc0/0x1b1 [tun] [<0000000049993ff4>] do_iter_readv_writev+0x19f/0x320 [<000000008f338ea2>] do_iter_write+0x135/0x630 [<000000008a3377a4>] vfs_writev+0x12e/0x440 [<00000000a6b5639a>] do_writev+0x104/0x280 [<00000000ccf065d8>] do_syscall_64+0x3b/0x90 [<00000000d776e329>] entry_SYSCALL_64_after_hwframe+0x63/0xcd The issue occurs in the following scenarios: tun_get_user() napi_gro_frags() napi_frags_finish() case GRO_NORMAL: gro_normal_one() list_add_tail(&skb->list, &napi->rx_list); <-- While napi->rx_count < READ_ONCE(gro_normal_batch), <-- gro_normal_list() is not called, napi->rx_list is not empty <-- not ask to complete the gro work, will cause memory leaks in <-- following tun_napi_del() ... tun_napi_del() netif_napi_del() __netif_napi_del() <-- &napi->rx_list is not empty, which caused memory leaks To fix, add napi_complete() after napi_gro_frags(). Fixes: 90e33d459407 ("tun: enable napi_gro_frags() for TUN/TAP driver") Signed-off-by: Wang Yufen Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/tun.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 0c09f8e9d383..83662f616b67 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1996,6 +1996,7 @@ drop: local_bh_disable(); napi_gro_frags(&tfile->napi); + napi_complete(&tfile->napi); local_bh_enable(); mutex_unlock(&tfile->napi_mutex); } else if (tfile->napi_enabled) { From 38147073c96dce8c7e142ce0e5f305a420a729ba Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Thu, 3 Nov 2022 19:33:26 -0400 Subject: [PATCH 0699/1477] bnxt_en: Fix possible crash in bnxt_hwrm_set_coal() [ Upstream commit 6d81ea3765dfa6c8a20822613c81edad1c4a16a0 ] During the error recovery sequence, the rtnl_lock is not held for the entire duration and some datastructures may be freed during the sequence. Check for the BNXT_STATE_OPEN flag instead of netif_running() to ensure that the device is fully operational before proceeding to reconfigure the coalescing settings. This will fix a possible crash like this: BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 PGD 0 P4D 0 Oops: 0000 [#1] SMP NOPTI CPU: 10 PID: 181276 Comm: ethtool Kdump: loaded Tainted: G IOE --------- - - 4.18.0-348.el8.x86_64 #1 Hardware name: Dell Inc. PowerEdge R740/0F9N89, BIOS 2.3.10 08/15/2019 RIP: 0010:bnxt_hwrm_set_coal+0x1fb/0x2a0 [bnxt_en] Code: c2 66 83 4e 22 08 66 89 46 1c e8 10 cb 00 00 41 83 c6 01 44 39 b3 68 01 00 00 0f 8e a3 00 00 00 48 8b 93 c8 00 00 00 49 63 c6 <48> 8b 2c c2 48 8b 85 b8 02 00 00 48 85 c0 74 2e 48 8b 74 24 08 f6 RSP: 0018:ffffb11c8dcaba50 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff8d168a8b0ac0 RCX: 00000000000000c5 RDX: 0000000000000000 RSI: ffff8d162f72c000 RDI: ffff8d168a8b0b28 RBP: 0000000000000000 R08: b6e1f68a12e9a7eb R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000037 R12: ffff8d168a8b109c R13: ffff8d168a8b10aa R14: 0000000000000000 R15: ffffffffc01ac4e0 FS: 00007f3852e4c740(0000) GS:ffff8d24c0080000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000041b3ee003 CR4: 00000000007706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: ethnl_set_coalesce+0x3ce/0x4c0 genl_family_rcv_msg_doit.isra.15+0x10f/0x150 genl_family_rcv_msg+0xb3/0x160 ? coalesce_fill_reply+0x480/0x480 genl_rcv_msg+0x47/0x90 ? genl_family_rcv_msg+0x160/0x160 netlink_rcv_skb+0x4c/0x120 genl_rcv+0x24/0x40 netlink_unicast+0x196/0x230 netlink_sendmsg+0x204/0x3d0 sock_sendmsg+0x4c/0x50 __sys_sendto+0xee/0x160 ? syscall_trace_enter+0x1d3/0x2c0 ? __audit_syscall_exit+0x249/0x2a0 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x5b/0x1a0 entry_SYSCALL_64_after_hwframe+0x65/0xca RIP: 0033:0x7f38524163bb Fixes: 2151fe0830fd ("bnxt_en: Handle RESET_NOTIFY async event from firmware.") Reviewed-by: Somnath Kotur Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index f8f775619520..81b63d1c2391 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -125,7 +125,7 @@ static int bnxt_set_coalesce(struct net_device *dev, } reset_coalesce: - if (netif_running(dev)) { + if (test_bit(BNXT_STATE_OPEN, &bp->state)) { if (update_stats) { rc = bnxt_close_nic(bp, true, false); if (!rc) From e2c5ee3b628f72729c369611290524d662424e16 Mon Sep 17 00:00:00 2001 From: Alex Barba Date: Thu, 3 Nov 2022 19:33:27 -0400 Subject: [PATCH 0700/1477] bnxt_en: fix potentially incorrect return value for ndo_rx_flow_steer [ Upstream commit 02597d39145bb0aa81d04bf39b6a913ce9a9d465 ] In the bnxt_en driver ndo_rx_flow_steer returns '0' whenever an entry that we are attempting to steer is already found. This is not the correct behavior. The return code should be the value/index that corresponds to the entry. Returning zero all the time causes the RFS records to be incorrect unless entry '0' is the correct one. As flows migrate to different cores this can create entries that are not correct. Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.") Reported-by: Akshay Navgire Signed-off-by: Alex Barba Signed-off-by: Andy Gospodarek Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index b818d5f342d5..8311473d537b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -12008,8 +12008,8 @@ static int bnxt_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, rcu_read_lock(); hlist_for_each_entry_rcu(fltr, head, hash) { if (bnxt_fltr_match(fltr, new_fltr)) { + rc = fltr->sw_id; rcu_read_unlock(); - rc = 0; goto err_free; } } From 8aae24b0ed767542e71159ba0756a1faea70a6b2 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Thu, 3 Nov 2022 14:28:30 -0400 Subject: [PATCH 0701/1477] net: fman: Unregister ethernet device on removal [ Upstream commit b7cbc6740bd6ad5d43345a2504f7e4beff0d709f ] When the mac device gets removed, it leaves behind the ethernet device. This will result in a segfault next time the ethernet device accesses mac_dev. Remove the ethernet device when we get removed to prevent this. This is not completely reversible, since some resources aren't cleaned up properly, but that can be addressed later. Fixes: 3933961682a3 ("fsl/fman: Add FMan MAC driver") Signed-off-by: Sean Anderson Link: https://lore.kernel.org/r/20221103182831.2248833-1-sean.anderson@seco.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/freescale/fman/mac.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c index 6eeccc11b76e..3312dc4083a0 100644 --- a/drivers/net/ethernet/freescale/fman/mac.c +++ b/drivers/net/ethernet/freescale/fman/mac.c @@ -884,12 +884,21 @@ _return: return err; } +static int mac_remove(struct platform_device *pdev) +{ + struct mac_device *mac_dev = platform_get_drvdata(pdev); + + platform_device_unregister(mac_dev->priv->eth_dev); + return 0; +} + static struct platform_driver mac_driver = { .driver = { .name = KBUILD_MODNAME, .of_match_table = mac_match, }, .probe = mac_probe, + .remove = mac_remove, }; builtin_platform_driver(mac_driver); From fcbd2b336834bd24e1d9454ad5737856470c10d7 Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Mon, 31 Oct 2022 19:25:36 +0800 Subject: [PATCH 0702/1477] capabilities: fix undefined behavior in bit shift for CAP_TO_MASK [ Upstream commit 46653972e3ea64f79e7f8ae3aa41a4d3fdb70a13 ] Shifting signed 32-bit value by 31 bits is undefined, so changing significant bit to unsigned. The UBSAN warning calltrace like below: UBSAN: shift-out-of-bounds in security/commoncap.c:1252:2 left shift of 1 by 31 places cannot be represented in type 'int' Call Trace: dump_stack_lvl+0x7d/0xa5 dump_stack+0x15/0x1b ubsan_epilogue+0xe/0x4e __ubsan_handle_shift_out_of_bounds+0x1e7/0x20c cap_task_prctl+0x561/0x6f0 security_task_prctl+0x5a/0xb0 __x64_sys_prctl+0x61/0x8f0 do_syscall_64+0x58/0x80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Fixes: e338d263a76a ("Add 64-bit capability support to the kernel") Signed-off-by: Gaosheng Cui Acked-by: Andrew G. Morgan Reviewed-by: Serge Hallyn Signed-off-by: Paul Moore Signed-off-by: Sasha Levin --- include/uapi/linux/capability.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/capability.h b/include/uapi/linux/capability.h index 2ddb4226cd23..43a44538ec8d 100644 --- a/include/uapi/linux/capability.h +++ b/include/uapi/linux/capability.h @@ -427,7 +427,7 @@ struct vfs_ns_cap_data { */ #define CAP_TO_INDEX(x) ((x) >> 5) /* 1 << 5 == bits in __u32 */ -#define CAP_TO_MASK(x) (1 << ((x) & 31)) /* mask for indexed __u32 */ +#define CAP_TO_MASK(x) (1U << ((x) & 31)) /* mask for indexed __u32 */ #endif /* _UAPI_LINUX_CAPABILITY_H */ From a60cc64db72f9fa87b82fbda12fb11270599f8c4 Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Tue, 1 Mar 2022 15:33:40 +0100 Subject: [PATCH 0703/1477] KVM: s390x: fix SCK locking [ Upstream commit c0573ba5c5a2244dc02060b1f374d4593c1d20b7 ] When handling the SCK instruction, the kvm lock is taken, even though the vcpu lock is already being held. The normal locking order is kvm lock first and then vcpu lock. This is can (and in some circumstances does) lead to deadlocks. The function kvm_s390_set_tod_clock is called both by the SCK handler and by some IOCTLs to set the clock. The IOCTLs will not hold the vcpu lock, so they can safely take the kvm lock. The SCK handler holds the vcpu lock, but will also somehow need to acquire the kvm lock without relinquishing the vcpu lock. The solution is to factor out the code to set the clock, and provide two wrappers. One is called like the original function and does the locking, the other is called kvm_s390_try_set_tod_clock and uses trylock to try to acquire the kvm lock. This new wrapper is then used in the SCK handler. If locking fails, -EAGAIN is returned, which is eventually propagated to userspace, thus also freeing the vcpu lock and allowing for forward progress. This is not the most efficient or elegant way to solve this issue, but the SCK instruction is deprecated and its performance is not critical. The goal of this patch is just to provide a simple but correct way to fix the bug. Fixes: 6a3f95a6b04c ("KVM: s390: Intercept SCK instruction") Signed-off-by: Claudio Imbrenda Reviewed-by: Christian Borntraeger Reviewed-by: Janis Schoetterl-Glausch Link: https://lore.kernel.org/r/20220301143340.111129-1-imbrenda@linux.ibm.com Cc: stable@vger.kernel.org Signed-off-by: Christian Borntraeger Stable-dep-of: 6973091d1b50 ("KVM: s390: pv: don't allow userspace to set the clock under PV") Signed-off-by: Sasha Levin --- arch/s390/kvm/kvm-s390.c | 19 ++++++++++++++++--- arch/s390/kvm/kvm-s390.h | 4 ++-- arch/s390/kvm/priv.c | 15 ++++++++++++++- 3 files changed, 32 insertions(+), 6 deletions(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index d8e9239c24ff..d12042ab5d54 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -3862,14 +3862,12 @@ retry: return 0; } -void kvm_s390_set_tod_clock(struct kvm *kvm, - const struct kvm_s390_vm_tod_clock *gtod) +static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) { struct kvm_vcpu *vcpu; struct kvm_s390_tod_clock_ext htod; int i; - mutex_lock(&kvm->lock); preempt_disable(); get_tod_clock_ext((char *)&htod); @@ -3890,9 +3888,24 @@ void kvm_s390_set_tod_clock(struct kvm *kvm, kvm_s390_vcpu_unblock_all(kvm); preempt_enable(); +} + +void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) +{ + mutex_lock(&kvm->lock); + __kvm_s390_set_tod_clock(kvm, gtod); mutex_unlock(&kvm->lock); } +int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) +{ + if (!mutex_trylock(&kvm->lock)) + return 0; + __kvm_s390_set_tod_clock(kvm, gtod); + mutex_unlock(&kvm->lock); + return 1; +} + /** * kvm_arch_fault_in_page - fault-in guest page if necessary * @vcpu: The corresponding virtual cpu diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index a3e9b71d426f..80bfe9c3364a 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -326,8 +326,8 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu); /* implemented in kvm-s390.c */ -void kvm_s390_set_tod_clock(struct kvm *kvm, - const struct kvm_s390_vm_tod_clock *gtod); +void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod); +int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod); long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable); int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr); int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 3b1a498e58d2..e34d518dd3d3 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -102,7 +102,20 @@ static int handle_set_clock(struct kvm_vcpu *vcpu) return kvm_s390_inject_prog_cond(vcpu, rc); VCPU_EVENT(vcpu, 3, "SCK: setting guest TOD to 0x%llx", gtod.tod); - kvm_s390_set_tod_clock(vcpu->kvm, >od); + /* + * To set the TOD clock the kvm lock must be taken, but the vcpu lock + * is already held in handle_set_clock. The usual lock order is the + * opposite. As SCK is deprecated and should not be used in several + * cases, for example when the multiple epoch facility or TOD clock + * steering facility is installed (see Principles of Operation), a + * slow path can be used. If the lock can not be taken via try_lock, + * the instruction will be retried via -EAGAIN at a later point in + * time. + */ + if (!kvm_s390_try_set_tod_clock(vcpu->kvm, >od)) { + kvm_s390_retry_instr(vcpu); + return -EAGAIN; + } kvm_s390_set_psw_cc(vcpu, 0); return 0; From 2bf8b1c111fff86317a61cdb92c3a5a10525e68a Mon Sep 17 00:00:00 2001 From: Nico Boehr Date: Tue, 11 Oct 2022 18:07:12 +0200 Subject: [PATCH 0704/1477] KVM: s390: pv: don't allow userspace to set the clock under PV [ Upstream commit 6973091d1b50ab4042f6a2d495f59e9db3662ab8 ] When running under PV, the guest's TOD clock is under control of the ultravisor and the hypervisor isn't allowed to change it. Hence, don't allow userspace to change the guest's TOD clock by returning -EOPNOTSUPP. When userspace changes the guest's TOD clock, KVM updates its kvm.arch.epoch field and, in addition, the epoch field in all state descriptions of all VCPUs. But, under PV, the ultravisor will ignore the epoch field in the state description and simply overwrite it on next SIE exit with the actual guest epoch. This leads to KVM having an incorrect view of the guest's TOD clock: it has updated its internal kvm.arch.epoch field, but the ultravisor ignores the field in the state description. Whenever a guest is now waiting for a clock comparator, KVM will incorrectly calculate the time when the guest should wake up, possibly causing the guest to sleep for much longer than expected. With this change, kvm_s390_set_tod() will now take the kvm->lock to be able to call kvm_s390_pv_is_protected(). Since kvm_s390_set_tod_clock() also takes kvm->lock, use __kvm_s390_set_tod_clock() instead. The function kvm_s390_set_tod_clock is now unused, hence remove it. Update the documentation to indicate the TOD clock attr calls can now return -EOPNOTSUPP. Fixes: 0f3035047140 ("KVM: s390: protvirt: Do only reset registers that are accessible") Reported-by: Marc Hartmayer Signed-off-by: Nico Boehr Reviewed-by: Claudio Imbrenda Reviewed-by: Janosch Frank Link: https://lore.kernel.org/r/20221011160712.928239-2-nrb@linux.ibm.com Message-Id: <20221011160712.928239-2-nrb@linux.ibm.com> Signed-off-by: Janosch Frank Signed-off-by: Sasha Levin --- Documentation/virt/kvm/devices/vm.rst | 3 +++ arch/s390/kvm/kvm-s390.c | 26 +++++++++++++++++--------- arch/s390/kvm/kvm-s390.h | 1 - 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/Documentation/virt/kvm/devices/vm.rst b/Documentation/virt/kvm/devices/vm.rst index 0aa5b1cfd700..60acc39e0e93 100644 --- a/Documentation/virt/kvm/devices/vm.rst +++ b/Documentation/virt/kvm/devices/vm.rst @@ -215,6 +215,7 @@ KVM_S390_VM_TOD_EXT). :Parameters: address of a buffer in user space to store the data (u8) to :Returns: -EFAULT if the given address is not accessible from kernel space; -EINVAL if setting the TOD clock extension to != 0 is not supported + -EOPNOTSUPP for a PV guest (TOD managed by the ultravisor) 3.2. ATTRIBUTE: KVM_S390_VM_TOD_LOW ----------------------------------- @@ -224,6 +225,7 @@ the POP (u64). :Parameters: address of a buffer in user space to store the data (u64) to :Returns: -EFAULT if the given address is not accessible from kernel space + -EOPNOTSUPP for a PV guest (TOD managed by the ultravisor) 3.3. ATTRIBUTE: KVM_S390_VM_TOD_EXT ----------------------------------- @@ -237,6 +239,7 @@ it, it is stored as 0 and not allowed to be set to a value != 0. (kvm_s390_vm_tod_clock) to :Returns: -EFAULT if the given address is not accessible from kernel space; -EINVAL if setting the TOD clock extension to != 0 is not supported + -EOPNOTSUPP for a PV guest (TOD managed by the ultravisor) 4. GROUP: KVM_S390_VM_CRYPTO ============================ diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index d12042ab5d54..59db85fb63e1 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1092,6 +1092,8 @@ static int kvm_s390_vm_get_migration(struct kvm *kvm, return 0; } +static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod); + static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) { struct kvm_s390_vm_tod_clock gtod; @@ -1101,7 +1103,7 @@ static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx) return -EINVAL; - kvm_s390_set_tod_clock(kvm, >od); + __kvm_s390_set_tod_clock(kvm, >od); VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx", gtod.epoch_idx, gtod.tod); @@ -1132,7 +1134,7 @@ static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) sizeof(gtod.tod))) return -EFAULT; - kvm_s390_set_tod_clock(kvm, >od); + __kvm_s390_set_tod_clock(kvm, >od); VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod); return 0; } @@ -1144,6 +1146,16 @@ static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr) if (attr->flags) return -EINVAL; + mutex_lock(&kvm->lock); + /* + * For protected guests, the TOD is managed by the ultravisor, so trying + * to change it will never bring the expected results. + */ + if (kvm_s390_pv_is_protected(kvm)) { + ret = -EOPNOTSUPP; + goto out_unlock; + } + switch (attr->attr) { case KVM_S390_VM_TOD_EXT: ret = kvm_s390_set_tod_ext(kvm, attr); @@ -1158,6 +1170,9 @@ static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr) ret = -ENXIO; break; } + +out_unlock: + mutex_unlock(&kvm->lock); return ret; } @@ -3890,13 +3905,6 @@ static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_t preempt_enable(); } -void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) -{ - mutex_lock(&kvm->lock); - __kvm_s390_set_tod_clock(kvm, gtod); - mutex_unlock(&kvm->lock); -} - int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) { if (!mutex_trylock(&kvm->lock)) diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 80bfe9c3364a..b6ff64796af9 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -326,7 +326,6 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu); /* implemented in kvm-s390.c */ -void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod); int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod); long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable); int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr); From bc4591a86b8fe322944776bb1c92a3f2daf31ebf Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Thu, 3 Nov 2022 17:05:37 +0800 Subject: [PATCH 0705/1477] net: lapbether: fix issue of dev reference count leakage in lapbeth_device_event() [ Upstream commit 531705a765493655472c993627106e19f7e5a6d2 ] When following tests are performed, it will cause dev reference counting leakage. a)ip link add bond2 type bond mode balance-rr b)ip link set bond2 up c)ifenslave -f bond2 rose1 d)ip link del bond2 When new bond device is created, the default type of the bond device is ether. And the bond device is up, lapbeth_device_event() receives the message and creates a new lapbeth device. In this case, the reference count value of dev is hold once. But after "ifenslave -f bond2 rose1" command is executed, the type of the bond device is changed to rose. When the bond device is unregistered, lapbeth_device_event() will not put the dev reference count. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Zhengchao Shao Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/wan/lapbether.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c index f6562a343cb4..b965eb6a4bb1 100644 --- a/drivers/net/wan/lapbether.c +++ b/drivers/net/wan/lapbether.c @@ -403,7 +403,7 @@ static int lapbeth_device_event(struct notifier_block *this, if (dev_net(dev) != &init_net) return NOTIFY_DONE; - if (!dev_is_ethdev(dev)) + if (!dev_is_ethdev(dev) && !lapbeth_get_x25_dev(dev)) return NOTIFY_DONE; switch (event) { From bcb3bb10695f555c66f18b195e4b41cdd14be450 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Thu, 3 Nov 2022 17:09:05 +0800 Subject: [PATCH 0706/1477] hamradio: fix issue of dev reference count leakage in bpq_device_event() [ Upstream commit 85cbaf032d3cd9f595152625eda5d4ecb1d6d78d ] When following tests are performed, it will cause dev reference counting leakage. a)ip link add bond2 type bond mode balance-rr b)ip link set bond2 up c)ifenslave -f bond2 rose1 d)ip link del bond2 When new bond device is created, the default type of the bond device is ether. And the bond device is up, bpq_device_event() receives the message and creates a new bpq device. In this case, the reference count value of dev is hold once. But after "ifenslave -f bond2 rose1" command is executed, the type of the bond device is changed to rose. When the bond device is unregistered, bpq_device_event() will not put the dev reference count. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Zhengchao Shao Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/hamradio/bpqether.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c index 1ad6085994b1..5c17c92add8a 100644 --- a/drivers/net/hamradio/bpqether.c +++ b/drivers/net/hamradio/bpqether.c @@ -533,7 +533,7 @@ static int bpq_device_event(struct notifier_block *this, if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; - if (!dev_is_ethdev(dev)) + if (!dev_is_ethdev(dev) && !bpq_get_ax25_dev(dev)) return NOTIFY_DONE; switch (event) { From f3aa8a7d9550a5f9ccbd4e0ed914a065d0741cc8 Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Thu, 3 Nov 2022 01:47:05 +0000 Subject: [PATCH 0707/1477] drm/vc4: Fix missing platform_unregister_drivers() call in vc4_drm_register() [ Upstream commit cf53db768a8790fdaae2fa3a81322b080285f7e5 ] A problem about modprobe vc4 failed is triggered with the following log given: [ 420.327987] Error: Driver 'vc4_hvs' is already registered, aborting... [ 420.333904] failed to register platform driver vc4_hvs_driver [vc4]: -16 modprobe: ERROR: could not insert 'vc4': Device or resource busy The reason is that vc4_drm_register() returns platform_driver_register() directly without checking its return value, if platform_driver_register() fails, it returns without unregistering all the vc4 drivers, resulting the vc4 can never be installed later. A simple call graph is shown as below: vc4_drm_register() platform_register_drivers() # all vc4 drivers are registered platform_driver_register() driver_register() bus_add_driver() priv = kzalloc(...) # OOM happened # return without unregister drivers Fixing this problem by checking the return value of platform_driver_register() and do platform_unregister_drivers() if error happened. Fixes: c8b75bca92cb ("drm/vc4: Add KMS support for Raspberry Pi.") Signed-off-by: Yuan Can Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20221103014705.109322-1-yuancan@huawei.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/vc4/vc4_drv.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index 52426bc8edb8..888aec1bbeee 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -404,7 +404,12 @@ static int __init vc4_drm_register(void) if (ret) return ret; - return platform_driver_register(&vc4_platform_driver); + ret = platform_driver_register(&vc4_platform_driver); + if (ret) + platform_unregister_drivers(component_drivers, + ARRAY_SIZE(component_drivers)); + + return ret; } static void __exit vc4_drm_unregister(void) From 02f8dfee7580b65449a67baa65cc2da4e5ffc473 Mon Sep 17 00:00:00 2001 From: Lu Wei Date: Fri, 4 Nov 2022 10:27:23 +0800 Subject: [PATCH 0708/1477] tcp: prohibit TCP_REPAIR_OPTIONS if data was already sent [ Upstream commit 0c175da7b0378445f5ef53904247cfbfb87e0b78 ] If setsockopt with option name of TCP_REPAIR_OPTIONS and opt_code of TCPOPT_SACK_PERM is called to enable sack after data is sent and dupacks are received , it will trigger a warning in function tcp_verify_left_out() as follows: ============================================ WARNING: CPU: 8 PID: 0 at net/ipv4/tcp_input.c:2132 tcp_timeout_mark_lost+0x154/0x160 tcp_enter_loss+0x2b/0x290 tcp_retransmit_timer+0x50b/0x640 tcp_write_timer_handler+0x1c8/0x340 tcp_write_timer+0xe5/0x140 call_timer_fn+0x3a/0x1b0 __run_timers.part.0+0x1bf/0x2d0 run_timer_softirq+0x43/0xb0 __do_softirq+0xfd/0x373 __irq_exit_rcu+0xf6/0x140 The warning is caused in the following steps: 1. a socket named socketA is created 2. socketA enters repair mode without build a connection 3. socketA calls connect() and its state is changed to TCP_ESTABLISHED directly 4. socketA leaves repair mode 5. socketA calls sendmsg() to send data, packets_out and sack_outs(dup ack receives) increase 6. socketA enters repair mode again 7. socketA calls setsockopt with TCPOPT_SACK_PERM to enable sack 8. retransmit timer expires, it calls tcp_timeout_mark_lost(), lost_out increases 9. sack_outs + lost_out > packets_out triggers since lost_out and sack_outs increase repeatly In function tcp_timeout_mark_lost(), tp->sacked_out will be cleared if Step7 not happen and the warning will not be triggered. As suggested by Denis and Eric, TCP_REPAIR_OPTIONS should be prohibited if data was already sent. socket-tcp tests in CRIU has been tested as follows: $ sudo ./test/zdtm.py run -t zdtm/static/socket-tcp* --keep-going \ --ignore-taint socket-tcp* represent all socket-tcp tests in test/zdtm/static/. Fixes: b139ba4e90dc ("tcp: Repair connection-time negotiated parameters") Signed-off-by: Lu Wei Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a7127364253c..cc588bc2b11d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3291,7 +3291,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname, case TCP_REPAIR_OPTIONS: if (!tp->repair) err = -EINVAL; - else if (sk->sk_state == TCP_ESTABLISHED) + else if (sk->sk_state == TCP_ESTABLISHED && !tp->bytes_sent) err = tcp_repair_options_est(sk, optval, optlen); else err = -EPERM; From a033b86c7f7621fde31f0364af8986f43b44914f Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Fri, 4 Nov 2022 11:32:16 +0100 Subject: [PATCH 0709/1477] ipv6: addrlabel: fix infoleak when sending struct ifaddrlblmsg to network [ Upstream commit c23fb2c82267638f9d206cb96bb93e1f93ad7828 ] When copying a `struct ifaddrlblmsg` to the network, __ifal_reserved remained uninitialized, resulting in a 1-byte infoleak: BUG: KMSAN: kernel-network-infoleak in __netdev_start_xmit ./include/linux/netdevice.h:4841 __netdev_start_xmit ./include/linux/netdevice.h:4841 netdev_start_xmit ./include/linux/netdevice.h:4857 xmit_one net/core/dev.c:3590 dev_hard_start_xmit+0x1dc/0x800 net/core/dev.c:3606 __dev_queue_xmit+0x17e8/0x4350 net/core/dev.c:4256 dev_queue_xmit ./include/linux/netdevice.h:3009 __netlink_deliver_tap_skb net/netlink/af_netlink.c:307 __netlink_deliver_tap+0x728/0xad0 net/netlink/af_netlink.c:325 netlink_deliver_tap net/netlink/af_netlink.c:338 __netlink_sendskb net/netlink/af_netlink.c:1263 netlink_sendskb+0x1d9/0x200 net/netlink/af_netlink.c:1272 netlink_unicast+0x56d/0xf50 net/netlink/af_netlink.c:1360 nlmsg_unicast ./include/net/netlink.h:1061 rtnl_unicast+0x5a/0x80 net/core/rtnetlink.c:758 ip6addrlbl_get+0xfad/0x10f0 net/ipv6/addrlabel.c:628 rtnetlink_rcv_msg+0xb33/0x1570 net/core/rtnetlink.c:6082 ... Uninit was created at: slab_post_alloc_hook+0x118/0xb00 mm/slab.h:742 slab_alloc_node mm/slub.c:3398 __kmem_cache_alloc_node+0x4f2/0x930 mm/slub.c:3437 __do_kmalloc_node mm/slab_common.c:954 __kmalloc_node_track_caller+0x117/0x3d0 mm/slab_common.c:975 kmalloc_reserve net/core/skbuff.c:437 __alloc_skb+0x27a/0xab0 net/core/skbuff.c:509 alloc_skb ./include/linux/skbuff.h:1267 nlmsg_new ./include/net/netlink.h:964 ip6addrlbl_get+0x490/0x10f0 net/ipv6/addrlabel.c:608 rtnetlink_rcv_msg+0xb33/0x1570 net/core/rtnetlink.c:6082 netlink_rcv_skb+0x299/0x550 net/netlink/af_netlink.c:2540 rtnetlink_rcv+0x26/0x30 net/core/rtnetlink.c:6109 netlink_unicast_kernel net/netlink/af_netlink.c:1319 netlink_unicast+0x9ab/0xf50 net/netlink/af_netlink.c:1345 netlink_sendmsg+0xebc/0x10f0 net/netlink/af_netlink.c:1921 ... This patch ensures that the reserved field is always initialized. Reported-by: syzbot+3553517af6020c4f2813f1003fe76ef3cbffe98d@syzkaller.appspotmail.com Fixes: 2a8cc6c89039 ("[IPV6] ADDRCONF: Support RFC3484 configurable address selection policy table.") Signed-off-by: Alexander Potapenko Reviewed-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv6/addrlabel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index 8a22486cf270..17ac45aa7194 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -437,6 +437,7 @@ static void ip6addrlbl_putmsg(struct nlmsghdr *nlh, { struct ifaddrlblmsg *ifal = nlmsg_data(nlh); ifal->ifal_family = AF_INET6; + ifal->__ifal_reserved = 0; ifal->ifal_prefixlen = prefixlen; ifal->ifal_flags = 0; ifal->ifal_index = ifindex; From d68fa77ee3d03bad6fe84e89759ddf7005f9e9c6 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Fri, 28 Oct 2022 16:56:50 +0800 Subject: [PATCH 0710/1477] can: af_can: fix NULL pointer dereference in can_rx_register() [ Upstream commit 8aa59e355949442c408408c2d836e561794c40a1 ] It causes NULL pointer dereference when testing as following: (a) use syscall(__NR_socket, 0x10ul, 3ul, 0) to create netlink socket. (b) use syscall(__NR_sendmsg, ...) to create bond link device and vxcan link device, and bind vxcan device to bond device (can also use ifenslave command to bind vxcan device to bond device). (c) use syscall(__NR_socket, 0x1dul, 3ul, 1) to create CAN socket. (d) use syscall(__NR_bind, ...) to bind the bond device to CAN socket. The bond device invokes the can-raw protocol registration interface to receive CAN packets. However, ml_priv is not allocated to the dev, dev_rcv_lists is assigned to NULL in can_rx_register(). In this case, it will occur the NULL pointer dereference issue. The following is the stack information: BUG: kernel NULL pointer dereference, address: 0000000000000008 PGD 122a4067 P4D 122a4067 PUD 1223c067 PMD 0 Oops: 0000 [#1] PREEMPT SMP RIP: 0010:can_rx_register+0x12d/0x1e0 Call Trace: raw_enable_filters+0x8d/0x120 raw_enable_allfilters+0x3b/0x130 raw_bind+0x118/0x4f0 __sys_bind+0x163/0x1a0 __x64_sys_bind+0x1e/0x30 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Fixes: 4e096a18867a ("net: introduce CAN specific pointer in the struct net_device") Signed-off-by: Zhengchao Shao Reviewed-by: Marc Kleine-Budde Link: https://lore.kernel.org/all/20221028085650.170470-1-shaozhengchao@huawei.com Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- net/can/af_can.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/can/af_can.c b/net/can/af_can.c index 1c95ede2c9a6..cf554e855521 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -451,7 +451,7 @@ int can_rx_register(struct net *net, struct net_device *dev, canid_t can_id, /* insert new receiver (dev,canid,mask) -> (func,data) */ - if (dev && dev->type != ARPHRD_CAN) + if (dev && (dev->type != ARPHRD_CAN || !can_get_ml_priv(dev))) return -ENODEV; if (dev && !net_eq(net, dev_net(dev))) From cb6d639bb1efaa4d6ec28a49a98cb23bc19559a7 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 4 Nov 2022 09:30:04 +0100 Subject: [PATCH 0711/1477] net: stmmac: dwmac-meson8b: fix meson8b_devm_clk_prepare_enable() [ Upstream commit ed4314f7729714d788698ade4f9905ee5378ebc0 ] There are two problems with meson8b_devm_clk_prepare_enable(), introduced in commit a54dc4a49045 ("net: stmmac: dwmac-meson8b: Make the clock enabling code re-usable"): - It doesn't pass the clk argument, but instead always the rgmii_tx_clk of the device. - It silently ignores the return value of devm_add_action_or_reset(). The former didn't become an actual bug until another user showed up in the next commit 9308c47640d5 ("net: stmmac: dwmac-meson8b: add support for the RX delay configuration"). The latter means the callers could end up with the clock not actually prepared/enabled. Fixes: a54dc4a49045 ("net: stmmac: dwmac-meson8b: Make the clock enabling code re-usable") Signed-off-by: Rasmus Villemoes Reviewed-by: Martin Blumenstingl Link: https://lore.kernel.org/r/20221104083004.2212520-1-linux@rasmusvillemoes.dk Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c index 752658ec7bee..50ef68497bce 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c @@ -261,11 +261,9 @@ static int meson8b_devm_clk_prepare_enable(struct meson8b_dwmac *dwmac, if (ret) return ret; - devm_add_action_or_reset(dwmac->dev, - (void(*)(void *))clk_disable_unprepare, - dwmac->rgmii_tx_clk); - - return 0; + return devm_add_action_or_reset(dwmac->dev, + (void(*)(void *))clk_disable_unprepare, + clk); } static int meson8b_init_prg_eth(struct meson8b_dwmac *dwmac) From fbb4e8e6dc7b38b3007354700f03c8ad2d9a2118 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Sat, 5 Nov 2022 17:02:45 +0800 Subject: [PATCH 0712/1477] net: broadcom: Fix BCMGENET Kconfig [ Upstream commit 8d820bc9d12b8beebca836cceaf2bbe68216c2f8 ] While BCMGENET select BROADCOM_PHY as y, but PTP_1588_CLOCK_OPTIONAL is m, kconfig warning and build errors: WARNING: unmet direct dependencies detected for BROADCOM_PHY Depends on [m]: NETDEVICES [=y] && PHYLIB [=y] && PTP_1588_CLOCK_OPTIONAL [=m] Selected by [y]: - BCMGENET [=y] && NETDEVICES [=y] && ETHERNET [=y] && NET_VENDOR_BROADCOM [=y] && HAS_IOMEM [=y] && ARCH_BCM2835 [=y] drivers/net/phy/broadcom.o: In function `bcm54xx_suspend': broadcom.c:(.text+0x6ac): undefined reference to `bcm_ptp_stop' drivers/net/phy/broadcom.o: In function `bcm54xx_phy_probe': broadcom.c:(.text+0x784): undefined reference to `bcm_ptp_probe' drivers/net/phy/broadcom.o: In function `bcm54xx_config_init': broadcom.c:(.text+0xd4c): undefined reference to `bcm_ptp_config_init' Fixes: 99addbe31f55 ("net: broadcom: Select BROADCOM_PHY for BCMGENET") Signed-off-by: YueHaibing Acked-by: Florian Fainelli Link: https://lore.kernel.org/r/20221105090245.8508-1-yuehaibing@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig index 7b79528d6eed..2b6d929d462f 100644 --- a/drivers/net/ethernet/broadcom/Kconfig +++ b/drivers/net/ethernet/broadcom/Kconfig @@ -69,7 +69,7 @@ config BCMGENET select BCM7XXX_PHY select MDIO_BCM_UNIMAC select DIMLIB - select BROADCOM_PHY if ARCH_BCM2835 + select BROADCOM_PHY if (ARCH_BCM2835 && PTP_1588_CLOCK_OPTIONAL) help This driver supports the built-in Ethernet MACs found in the Broadcom BCM7xxx Set Top Box family chipset. From f31dd158580940938f77514b87337a777520185a Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 4 Nov 2022 16:48:53 -0400 Subject: [PATCH 0713/1477] tipc: fix the msg->req tlv len check in tipc_nl_compat_name_table_dump_header [ Upstream commit 1c075b192fe41030457cd4a5f7dea730412bca40 ] This is a follow-up for commit 974cb0e3e7c9 ("tipc: fix uninit-value in tipc_nl_compat_name_table_dump") where it should have type casted sizeof(..) to int to work when TLV_GET_DATA_LEN() returns a negative value. syzbot reported a call trace because of it: BUG: KMSAN: uninit-value in ... tipc_nl_compat_name_table_dump+0x841/0xea0 net/tipc/netlink_compat.c:934 __tipc_nl_compat_dumpit+0xab2/0x1320 net/tipc/netlink_compat.c:238 tipc_nl_compat_dumpit+0x991/0xb50 net/tipc/netlink_compat.c:321 tipc_nl_compat_recv+0xb6e/0x1640 net/tipc/netlink_compat.c:1324 genl_family_rcv_msg_doit net/netlink/genetlink.c:731 [inline] genl_family_rcv_msg net/netlink/genetlink.c:775 [inline] genl_rcv_msg+0x103f/0x1260 net/netlink/genetlink.c:792 netlink_rcv_skb+0x3a5/0x6c0 net/netlink/af_netlink.c:2501 genl_rcv+0x3c/0x50 net/netlink/genetlink.c:803 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] netlink_unicast+0xf3b/0x1270 net/netlink/af_netlink.c:1345 netlink_sendmsg+0x1288/0x1440 net/netlink/af_netlink.c:1921 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg net/socket.c:734 [inline] Reported-by: syzbot+e5dbaaa238680ce206ea@syzkaller.appspotmail.com Fixes: 974cb0e3e7c9 ("tipc: fix uninit-value in tipc_nl_compat_name_table_dump") Signed-off-by: Xin Long Link: https://lore.kernel.org/r/ccd6a7ea801b15aec092c3b532a883b4c5708695.1667594933.git.lucien.xin@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/tipc/netlink_compat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 49e893313652..2d62932b5987 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -877,7 +877,7 @@ static int tipc_nl_compat_name_table_dump_header(struct tipc_nl_compat_msg *msg) }; ntq = (struct tipc_name_table_query *)TLV_DATA(msg->req); - if (TLV_GET_DATA_LEN(msg->req) < sizeof(struct tipc_name_table_query)) + if (TLV_GET_DATA_LEN(msg->req) < (int)sizeof(struct tipc_name_table_query)) return -EINVAL; depth = ntohl(ntq->depth); From 6e2ffae69d17da317a892cfd6a3c24c8a8bbaed3 Mon Sep 17 00:00:00 2001 From: Doug Brown Date: Mon, 5 Sep 2022 17:07:09 -0700 Subject: [PATCH 0714/1477] dmaengine: pxa_dma: use platform_get_irq_optional [ Upstream commit b3d726cb8497c6b12106fd617d46eef11763ea86 ] The first IRQ is required, but IRQs 1 through (nb_phy_chans - 1) are optional, because on some platforms (e.g. PXA168) there is a single IRQ shared between all channels. This change inhibits a flood of "IRQ index # not found" messages at startup. Tested on a PXA168-based device. Fixes: 7723f4c5ecdb ("driver core: platform: Add an error message to platform_get_irq*()") Signed-off-by: Doug Brown Link: https://lore.kernel.org/r/20220906000709.52705-1-doug@schmorgal.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/pxa_dma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/pxa_dma.c b/drivers/dma/pxa_dma.c index b4ef4f19f7de..68d9d60c051d 100644 --- a/drivers/dma/pxa_dma.c +++ b/drivers/dma/pxa_dma.c @@ -1249,14 +1249,14 @@ static int pxad_init_phys(struct platform_device *op, return -ENOMEM; for (i = 0; i < nb_phy_chans; i++) - if (platform_get_irq(op, i) > 0) + if (platform_get_irq_optional(op, i) > 0) nr_irq++; for (i = 0; i < nb_phy_chans; i++) { phy = &pdev->phys[i]; phy->base = pdev->base; phy->idx = i; - irq = platform_get_irq(op, i); + irq = platform_get_irq_optional(op, i); if ((nr_irq > 1) && (irq > 0)) ret = devm_request_irq(&op->dev, irq, pxad_chan_handler, From 0b7ee3d50f32d277bf024b4ddb4de54da43a3025 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 24 Oct 2022 21:50:09 +0200 Subject: [PATCH 0715/1477] dmaengine: mv_xor_v2: Fix a resource leak in mv_xor_v2_remove() [ Upstream commit 081195d17a0c4c636da2b869bd5809d42e8cbb13 ] A clk_prepare_enable() call in the probe is not balanced by a corresponding clk_disable_unprepare() in the remove function. Add the missing call. Fixes: 3cd2c313f1d6 ("dmaengine: mv_xor_v2: Fix clock resource by adding a register clock") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/e9e3837a680c9bd2438e4db2b83270c6c052d005.1666640987.git.christophe.jaillet@wanadoo.fr Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/mv_xor_v2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dma/mv_xor_v2.c b/drivers/dma/mv_xor_v2.c index 9b0d463f89bb..4800c596433a 100644 --- a/drivers/dma/mv_xor_v2.c +++ b/drivers/dma/mv_xor_v2.c @@ -899,6 +899,7 @@ static int mv_xor_v2_remove(struct platform_device *pdev) tasklet_kill(&xor_dev->irq_tasklet); clk_disable_unprepare(xor_dev->clk); + clk_disable_unprepare(xor_dev->reg_clk); return 0; } From 3a4a3c3b1fe6f9e2580abf1f5cf57f6c47c98111 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Mon, 7 Nov 2022 12:30:32 +0800 Subject: [PATCH 0716/1477] drivers: net: xgene: disable napi when register irq failed in xgene_enet_open() [ Upstream commit ce9e57feeed81d17d5e80ed86f516ff0d39c3867 ] When failed to register irq in xgene_enet_open() for opening device, napi isn't disabled. When open xgene device next time, it will reports a invalid opcode issue. Fix it. Only be compiled, not be tested. Fixes: aeb20b6b3f4e ("drivers: net: xgene: fix: ifconfig up/down crash") Signed-off-by: Zhengchao Shao Link: https://lore.kernel.org/r/20221107043032.357673-1-shaozhengchao@huawei.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/apm/xgene/xgene_enet_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index 78c7cbc372b0..71151f675a49 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -1004,8 +1004,10 @@ static int xgene_enet_open(struct net_device *ndev) xgene_enet_napi_enable(pdata); ret = xgene_enet_register_irq(ndev); - if (ret) + if (ret) { + xgene_enet_napi_disable(pdata); return ret; + } if (ndev->phydev) { phy_start(ndev->phydev); From cf4853880e2433909880145aad650f52554662bd Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Tue, 18 Oct 2022 14:26:04 +0530 Subject: [PATCH 0717/1477] perf stat: Fix printing os->prefix in CSV metrics output [ Upstream commit ad353b710c7493df3d4fc2d3a51819126bed2e81 ] 'perf stat' with CSV output option prints an extra empty string as first field in metrics output line. Sample output below: # ./perf stat -x, --per-socket -a -C 1 ls S0,1,1.78,msec,cpu-clock,1785146,100.00,0.973,CPUs utilized S0,1,26,,context-switches,1781750,100.00,0.015,M/sec S0,1,1,,cpu-migrations,1780526,100.00,0.561,K/sec S0,1,1,,page-faults,1779060,100.00,0.561,K/sec S0,1,875807,,cycles,1769826,100.00,0.491,GHz S0,1,85281,,stalled-cycles-frontend,1767512,100.00,9.74,frontend cycles idle S0,1,576839,,stalled-cycles-backend,1766260,100.00,65.86,backend cycles idle S0,1,288430,,instructions,1762246,100.00,0.33,insn per cycle ====> ,S0,1,,,,,,,2.00,stalled cycles per insn The above command line uses field separator as "," via "-x," option and per-socket option displays socket value as first field. But here the last line for "stalled cycles per insn" has "," in the beginning. Sample output using interval mode: # ./perf stat -I 1000 -x, --per-socket -a -C 1 ls 0.001813453,S0,1,1.87,msec,cpu-clock,1872052,100.00,0.002,CPUs utilized 0.001813453,S0,1,2,,context-switches,1868028,100.00,1.070,K/sec ------ 0.001813453,S0,1,85379,,instructions,1856754,100.00,0.32,insn per cycle ====> 0.001813453,,S0,1,,,,,,,1.34,stalled cycles per insn Above result also has an extra CSV separator after the timestamp. Patch addresses extra field separator in the beginning of the metric output line. The counter stats are displayed by function "perf_stat__print_shadow_stats" in code "util/stat-shadow.c". While printing the stats info for "stalled cycles per insn", function "new_line_csv" is used as new_line callback. The new_line_csv function has check for "os->prefix" and if prefix is not null, it will be printed along with cvs separator. Snippet from "new_line_csv": if (os->prefix) fprintf(os->fh, "%s%s", os->prefix, config->csv_sep); Here os->prefix gets printed followed by "," which is the cvs separator. The os->prefix is used in interval mode option ( -I ), to print time stamp on every new line. But prefix is already set to contain CSV separator when used in interval mode for CSV option. Reference: Function "static void print_interval" Snippet: sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, config->csv_sep); Also if prefix is not assigned (if not used with -I option), it gets set to empty string. Reference: function printout() in util/stat-display.c Snippet: .prefix = prefix ? prefix : "", Since prefix already set to contain cvs_sep in interval option, patch removes printing config->csv_sep in new_line_csv function to avoid printing extra field. After the patch: # ./perf stat -x, --per-socket -a -C 1 ls S0,1,2.04,msec,cpu-clock,2045202,100.00,1.013,CPUs utilized S0,1,2,,context-switches,2041444,100.00,979.289,/sec S0,1,0,,cpu-migrations,2040820,100.00,0.000,/sec S0,1,2,,page-faults,2040288,100.00,979.289,/sec S0,1,254589,,cycles,2036066,100.00,0.125,GHz S0,1,82481,,stalled-cycles-frontend,2032420,100.00,32.40,frontend cycles idle S0,1,113170,,stalled-cycles-backend,2031722,100.00,44.45,backend cycles idle S0,1,88766,,instructions,2030942,100.00,0.35,insn per cycle S0,1,,,,,,,1.27,stalled cycles per insn Fixes: 92a61f6412d3a09d ("perf stat: Implement CSV metrics output") Reported-by: Disha Goel Reviewed-By: Kajol Jain Signed-off-by: Athira Jajeev Tested-by: Disha Goel Cc: Andi Kleen Cc: Ian Rogers Cc: James Clark Cc: Jiri Olsa Cc: linuxppc-dev@lists.ozlabs.org Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Nageswara R Sastry Cc: Namhyung Kim Link: https://lore.kernel.org/r/20221018085605.63834-1-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/stat-display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 96fe9c1af336..4688e39de52a 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -203,7 +203,7 @@ static void new_line_csv(struct perf_stat_config *config, void *ctx) fputc('\n', os->fh); if (os->prefix) - fprintf(os->fh, "%s%s", os->prefix, config->csv_sep); + fprintf(os->fh, "%s", os->prefix); aggr_printout(config, os->evsel, os->id, os->nr); for (i = 0; i < os->nfields; i++) fputs(config->csv_sep, os->fh); From 5333cf1b7f6861912aff6263978d4781f9858e47 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Tue, 8 Nov 2022 10:56:07 +0800 Subject: [PATCH 0718/1477] net: marvell: prestera: fix memory leak in prestera_rxtx_switch_init() [ Upstream commit 519b58bbfa825f042fcf80261cc18e1e35f85ffd ] When prestera_sdma_switch_init() failed, the memory pointed to by sw->rxtx isn't released. Fix it. Only be compiled, not be tested. Fixes: 501ef3066c89 ("net: marvell: prestera: Add driver for Prestera family ASIC devices") Signed-off-by: Zhengchao Shao Reviewed-by: Vadym Kochan Link: https://lore.kernel.org/r/20221108025607.338450-1-shaozhengchao@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/prestera/prestera_rxtx.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/prestera/prestera_rxtx.c b/drivers/net/ethernet/marvell/prestera/prestera_rxtx.c index 2a13c318048c..59a3ea02b8ad 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_rxtx.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_rxtx.c @@ -771,6 +771,7 @@ tx_done: int prestera_rxtx_switch_init(struct prestera_switch *sw) { struct prestera_rxtx *rxtx; + int err; rxtx = kzalloc(sizeof(*rxtx), GFP_KERNEL); if (!rxtx) @@ -778,7 +779,11 @@ int prestera_rxtx_switch_init(struct prestera_switch *sw) sw->rxtx = rxtx; - return prestera_sdma_switch_init(sw); + err = prestera_sdma_switch_init(sw); + if (err) + kfree(rxtx); + + return err; } void prestera_rxtx_switch_fini(struct prestera_switch *sw) From bdd282bba72de096145d75ef9db40792c9c710ea Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Mon, 7 Nov 2022 18:14:43 +0800 Subject: [PATCH 0719/1477] net: nixge: disable napi when enable interrupts failed in nixge_open() [ Upstream commit b06334919c7a068d54ba5b219c05e919d89943f7 ] When failed to enable interrupts in nixge_open() for opening device, napi isn't disabled. When open nixge device next time, it will reports a invalid opcode issue. Fix it. Only be compiled, not be tested. Fixes: 492caffa8a1a ("net: ethernet: nixge: Add support for National Instruments XGE netdev") Signed-off-by: Zhengchao Shao Link: https://lore.kernel.org/r/20221107101443.120205-1-shaozhengchao@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/ni/nixge.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/ni/nixge.c b/drivers/net/ethernet/ni/nixge.c index a6861df9904f..9c48fd85c418 100644 --- a/drivers/net/ethernet/ni/nixge.c +++ b/drivers/net/ethernet/ni/nixge.c @@ -899,6 +899,7 @@ static int nixge_open(struct net_device *ndev) err_rx_irq: free_irq(priv->tx_irq, ndev); err_tx_irq: + napi_disable(&priv->napi); phy_stop(phy); phy_disconnect(phy); tasklet_kill(&priv->dma_err_tasklet); From eb6fa0ac2a9c14b700ee7a2bb55736a0bd14a099 Mon Sep 17 00:00:00 2001 From: Roy Novich Date: Wed, 2 Nov 2022 23:55:38 -0700 Subject: [PATCH 0720/1477] net/mlx5: Allow async trigger completion execution on single CPU systems MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 2808b37b59288ad8f1897e3546c2296df3384b65 ] For a single CPU system, the kernel thread executing mlx5_cmd_flush() never releases the CPU but calls down_trylock(&cmd→sem) in a busy loop. On a single processor system, this leads to a deadlock as the kernel thread which executes mlx5_cmd_invoke() never gets scheduled. Fix this, by adding the cond_resched() call to the loop, allow the command completion kernel thread to execute. Fixes: 8e715cd613a1 ("net/mlx5: Set command entry semaphore up once got index free") Signed-off-by: Alexander Schmidt Signed-off-by: Roy Novich Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 6612b2c0be48..cf07318048df 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -1687,12 +1687,17 @@ void mlx5_cmd_flush(struct mlx5_core_dev *dev) struct mlx5_cmd *cmd = &dev->cmd; int i; - for (i = 0; i < cmd->max_reg_cmds; i++) - while (down_trylock(&cmd->sem)) + for (i = 0; i < cmd->max_reg_cmds; i++) { + while (down_trylock(&cmd->sem)) { mlx5_cmd_trigger_completions(dev); + cond_resched(); + } + } - while (down_trylock(&cmd->pages_sem)) + while (down_trylock(&cmd->pages_sem)) { mlx5_cmd_trigger_completions(dev); + cond_resched(); + } /* Unlock cmdif */ up(&cmd->pages_sem); From 3b27e20601abe48ae4baf9cb0cb0e3dcfdff4525 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Wed, 2 Nov 2022 23:55:46 -0700 Subject: [PATCH 0721/1477] net/mlx5e: E-Switch, Fix comparing termination table instance [ Upstream commit f4f4096b410e8d31c3f07f39de3b17d144edd53d ] The pkt_reformat pointer being saved under flow_act and not dest attribute in the termination table instance. Fix the comparison pointers. Also fix returning success if one pkt_reformat pointer is null and the other is not. Fixes: 249ccc3c95bd ("net/mlx5e: Add support for offloading traffic from uplink to uplink") Signed-off-by: Roi Dayan Reviewed-by: Chris Mi Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- .../mellanox/mlx5/core/eswitch_offloads_termtbl.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c index 1cbb330b9f42..6c865cb7f445 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c @@ -30,9 +30,9 @@ mlx5_eswitch_termtbl_hash(struct mlx5_flow_act *flow_act, sizeof(dest->vport.num), hash); hash = jhash((const void *)&dest->vport.vhca_id, sizeof(dest->vport.num), hash); - if (dest->vport.pkt_reformat) - hash = jhash(dest->vport.pkt_reformat, - sizeof(*dest->vport.pkt_reformat), + if (flow_act->pkt_reformat) + hash = jhash(flow_act->pkt_reformat, + sizeof(*flow_act->pkt_reformat), hash); return hash; } @@ -53,9 +53,11 @@ mlx5_eswitch_termtbl_cmp(struct mlx5_flow_act *flow_act1, if (ret) return ret; - return dest1->vport.pkt_reformat && dest2->vport.pkt_reformat ? - memcmp(dest1->vport.pkt_reformat, dest2->vport.pkt_reformat, - sizeof(*dest1->vport.pkt_reformat)) : 0; + if (flow_act1->pkt_reformat && flow_act2->pkt_reformat) + return memcmp(flow_act1->pkt_reformat, flow_act2->pkt_reformat, + sizeof(*flow_act1->pkt_reformat)); + + return !(flow_act1->pkt_reformat == flow_act2->pkt_reformat); } static int From fd52dd2d6e2f5e34bed00e654d06e02c6bef39d4 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Wed, 9 Nov 2022 09:15:37 +0800 Subject: [PATCH 0722/1477] net: cpsw: disable napi in cpsw_ndo_open() [ Upstream commit 6d47b53fb3f363a74538a1dbd09954af3d8d4131 ] When failed to create xdp rxqs or fill rx channels in cpsw_ndo_open() for opening device, napi isn't disabled. When open cpsw device next time, it will report a invalid opcode issue. Compiled tested only. Fixes: d354eb85d618 ("drivers: net: cpsw: dual_emac: simplify napi usage") Signed-off-by: Zhengchao Shao Link: https://lore.kernel.org/r/20221109011537.96975-1-shaozhengchao@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/ti/cpsw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index b0f00b4edd94..5af0f9f8c097 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -864,6 +864,8 @@ static int cpsw_ndo_open(struct net_device *ndev) err_cleanup: if (!cpsw->usage_count) { + napi_disable(&cpsw->napi_rx); + napi_disable(&cpsw->napi_tx); cpdma_ctlr_stop(cpsw->dma); cpsw_destroy_xdp_rxqs(cpsw); } From 38aa7ed8c2c3cace4b542f2a3369271878b45e74 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Wed, 9 Nov 2022 10:14:51 +0800 Subject: [PATCH 0723/1477] net: cxgb3_main: disable napi when bind qsets failed in cxgb_up() [ Upstream commit d75aed1428da787cbe42bc073d76f1354f364d92 ] When failed to bind qsets in cxgb_up() for opening device, napi isn't disabled. When open cxgb3 device next time, it will trigger a BUG_ON() in napi_enable(). Compile tested only. Fixes: 48c4b6dbb7e2 ("cxgb3 - fix port up/down error path") Signed-off-by: Zhengchao Shao Link: https://lore.kernel.org/r/20221109021451.121490-1-shaozhengchao@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c index 84ad7261e243..8a167eea288c 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c +++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c @@ -1302,6 +1302,7 @@ static int cxgb_up(struct adapter *adap) if (ret < 0) { CH_ERR(adap, "failed to bind qsets, err %d\n", ret); t3_intr_disable(adap); + quiesce_rx(adap); free_irq_resources(adap); err = ret; goto out; From 4ad684ba028cfb5c4bf924e46855ef11c15aa5ae Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Wed, 9 Nov 2022 09:21:00 +0800 Subject: [PATCH 0724/1477] cxgb4vf: shut down the adapter when t4vf_update_port_info() failed in cxgb4vf_open() [ Upstream commit c6092ea1e6d7bd12acd881f6aa2b5054cd70e096 ] When t4vf_update_port_info() failed in cxgb4vf_open(), resources applied during adapter goes up are not cleared. Fix it. Only be compiled, not be tested. Fixes: 18d79f721e0a ("cxgb4vf: Update port information in cxgb4vf_open()") Signed-off-by: Zhengchao Shao Link: https://lore.kernel.org/r/20221109012100.99132-1-shaozhengchao@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c index 2820a0bb971b..5e1e46425014 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c @@ -858,7 +858,7 @@ static int cxgb4vf_open(struct net_device *dev) */ err = t4vf_update_port_info(pi); if (err < 0) - return err; + goto err_unwind; /* * Note that this interface is up and start everything up ... From 1a4e495edfe25b42c0b59f809a8df97ad5a6eed3 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 8 Nov 2022 16:34:58 +0100 Subject: [PATCH 0725/1477] net: phy: mscc: macsec: clear encryption keys when freeing a flow [ Upstream commit 1b16b3fdf675cca15a537572bac50cc5354368fc ] Commit aaab73f8fba4 ("macsec: clear encryption keys from the stack after setting up offload") made sure to clean encryption keys from the stack after setting up offloading, but the MSCC PHY driver made a copy, kept it in the flow data and did not clear it when freeing a flow. Fix this. Fixes: 28c5107aa904 ("net: phy: mscc: macsec support") Signed-off-by: Antoine Tenart Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/phy/mscc/mscc_macsec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/phy/mscc/mscc_macsec.c b/drivers/net/phy/mscc/mscc_macsec.c index b7b2521c73fb..c00eef457b85 100644 --- a/drivers/net/phy/mscc/mscc_macsec.c +++ b/drivers/net/phy/mscc/mscc_macsec.c @@ -632,6 +632,7 @@ static void vsc8584_macsec_free_flow(struct vsc8531_private *priv, list_del(&flow->list); clear_bit(flow->index, bitmap); + memzero_explicit(flow->key, sizeof(flow->key)); kfree(flow); } From 05b222843457391683889d319afb8998c10f9562 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 8 Nov 2022 16:34:59 +0100 Subject: [PATCH 0726/1477] net: atlantic: macsec: clear encryption keys from the stack [ Upstream commit 879785def0f5e71d54399de0f8a5cb399db14171 ] Commit aaab73f8fba4 ("macsec: clear encryption keys from the stack after setting up offload") made sure to clean encryption keys from the stack after setting up offloading, but the atlantic driver made a copy and did not clear it. Fix this. [4 Fixes tags below, all part of the same series, no need to split this] Fixes: 9ff40a751a6f ("net: atlantic: MACSec ingress offload implementation") Fixes: b8f8a0b7b5cb ("net: atlantic: MACSec ingress offload HW bindings") Fixes: 27736563ce32 ("net: atlantic: MACSec egress offload implementation") Fixes: 9d106c6dd81b ("net: atlantic: MACSec egress offload HW bindings") Signed-off-by: Antoine Tenart Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- .../net/ethernet/aquantia/atlantic/aq_macsec.c | 2 ++ .../aquantia/atlantic/macsec/macsec_api.c | 18 +++++++++++------- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c b/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c index 7c6e0811f2e6..ee823a18294c 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c @@ -585,6 +585,7 @@ static int aq_update_txsa(struct aq_nic_s *nic, const unsigned int sc_idx, ret = aq_mss_set_egress_sakey_record(hw, &key_rec, sa_idx); + memzero_explicit(&key_rec, sizeof(key_rec)); return ret; } @@ -932,6 +933,7 @@ static int aq_update_rxsa(struct aq_nic_s *nic, const unsigned int sc_idx, ret = aq_mss_set_ingress_sakey_record(hw, &sa_key_record, sa_idx); + memzero_explicit(&sa_key_record, sizeof(sa_key_record)); return ret; } diff --git a/drivers/net/ethernet/aquantia/atlantic/macsec/macsec_api.c b/drivers/net/ethernet/aquantia/atlantic/macsec/macsec_api.c index 36c7cf05630a..431924959520 100644 --- a/drivers/net/ethernet/aquantia/atlantic/macsec/macsec_api.c +++ b/drivers/net/ethernet/aquantia/atlantic/macsec/macsec_api.c @@ -757,6 +757,7 @@ set_ingress_sakey_record(struct aq_hw_s *hw, u16 table_index) { u16 packed_record[18]; + int ret; if (table_index >= NUMROWS_INGRESSSAKEYRECORD) return -EINVAL; @@ -789,9 +790,12 @@ set_ingress_sakey_record(struct aq_hw_s *hw, packed_record[16] = rec->key_len & 0x3; - return set_raw_ingress_record(hw, packed_record, 18, 2, - ROWOFFSET_INGRESSSAKEYRECORD + - table_index); + ret = set_raw_ingress_record(hw, packed_record, 18, 2, + ROWOFFSET_INGRESSSAKEYRECORD + + table_index); + + memzero_explicit(packed_record, sizeof(packed_record)); + return ret; } int aq_mss_set_ingress_sakey_record(struct aq_hw_s *hw, @@ -1739,14 +1743,14 @@ static int set_egress_sakey_record(struct aq_hw_s *hw, ret = set_raw_egress_record(hw, packed_record, 8, 2, ROWOFFSET_EGRESSSAKEYRECORD + table_index); if (unlikely(ret)) - return ret; + goto clear_key; ret = set_raw_egress_record(hw, packed_record + 8, 8, 2, ROWOFFSET_EGRESSSAKEYRECORD + table_index - 32); - if (unlikely(ret)) - return ret; - return 0; +clear_key: + memzero_explicit(packed_record, sizeof(packed_record)); + return ret; } int aq_mss_set_egress_sakey_record(struct aq_hw_s *hw, From 56d3b5531bf64009adb616f06ad30695b7e70018 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Wed, 9 Nov 2022 10:37:41 +0800 Subject: [PATCH 0727/1477] ethernet: s2io: disable napi when start nic failed in s2io_card_up() [ Upstream commit 0348c1ab980c1d43fb37b758d4b760990c066cb5 ] When failed to start nic or add interrupt service routine in s2io_card_up() for opening device, napi isn't disabled. When open s2io device next time, it will trigger a BUG_ON()in napi_enable(). Compile tested only. Fixes: 5f490c968056 ("S2io: Fixed synchronization between scheduling of napi with card reset and close") Signed-off-by: Zhengchao Shao Link: https://lore.kernel.org/r/20221109023741.131552-1-shaozhengchao@huawei.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/neterion/s2io.c | 29 +++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c index 3cae8449fadb..8a30be698f99 100644 --- a/drivers/net/ethernet/neterion/s2io.c +++ b/drivers/net/ethernet/neterion/s2io.c @@ -7114,9 +7114,8 @@ static int s2io_card_up(struct s2io_nic *sp) if (ret) { DBG_PRINT(ERR_DBG, "%s: Out of memory in Open\n", dev->name); - s2io_reset(sp); - free_rx_buffers(sp); - return -ENOMEM; + ret = -ENOMEM; + goto err_fill_buff; } DBG_PRINT(INFO_DBG, "Buf in ring:%d is %d:\n", i, ring->rx_bufs_left); @@ -7154,18 +7153,16 @@ static int s2io_card_up(struct s2io_nic *sp) /* Enable Rx Traffic and interrupts on the NIC */ if (start_nic(sp)) { DBG_PRINT(ERR_DBG, "%s: Starting NIC failed\n", dev->name); - s2io_reset(sp); - free_rx_buffers(sp); - return -ENODEV; + ret = -ENODEV; + goto err_out; } /* Add interrupt service routine */ if (s2io_add_isr(sp) != 0) { if (sp->config.intr_type == MSI_X) s2io_rem_isr(sp); - s2io_reset(sp); - free_rx_buffers(sp); - return -ENODEV; + ret = -ENODEV; + goto err_out; } timer_setup(&sp->alarm_timer, s2io_alarm_handle, 0); @@ -7185,6 +7182,20 @@ static int s2io_card_up(struct s2io_nic *sp) } return 0; + +err_out: + if (config->napi) { + if (config->intr_type == MSI_X) { + for (i = 0; i < sp->config.rx_ring_num; i++) + napi_disable(&sp->mac_control.rings[i].napi); + } else { + napi_disable(&sp->napi); + } + } +err_fill_buff: + s2io_reset(sp); + free_rx_buffers(sp); + return ret; } /** From dd7beaec8b48916acd98d01dd24e75e6fdba3d9e Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Wed, 9 Nov 2022 10:54:32 +0800 Subject: [PATCH 0728/1477] net: mv643xx_eth: disable napi when init rxq or txq failed in mv643xx_eth_open() [ Upstream commit f111606b63ff2282428ffbac0447c871eb957b6c ] When failed to init rxq or txq in mv643xx_eth_open() for opening device, napi isn't disabled. When open mv643xx_eth device next time, it will trigger a BUG_ON() in napi_enable(). Compile tested only. Fixes: 2257e05c1705 ("mv643xx_eth: get rid of receive-side locking") Signed-off-by: Zhengchao Shao Link: https://lore.kernel.org/r/20221109025432.80900-1-shaozhengchao@huawei.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/mv643xx_eth.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index 90e6111ce534..735b76effc49 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -2471,6 +2471,7 @@ out_free: for (i = 0; i < mp->rxq_count; i++) rxq_deinit(mp->rxq + i); out: + napi_disable(&mp->napi); free_irq(dev->irq, dev); return err; From 59ec132386a07ac09165f96226fe6fcb44b54b37 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Wed, 9 Nov 2022 12:40:16 +0800 Subject: [PATCH 0729/1477] ethernet: tundra: free irq when alloc ring failed in tsi108_open() [ Upstream commit acce40037041f97baad18142bb253064491ebde3 ] When alloc tx/rx ring failed in tsi108_open(), it doesn't free irq. Fix it. Fixes: 5e123b844a1c ("[PATCH] Add tsi108/9 On Chip Ethernet device driver support") Signed-off-by: Zhengchao Shao Link: https://lore.kernel.org/r/20221109044016.126866-1-shaozhengchao@huawei.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/tundra/tsi108_eth.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/tundra/tsi108_eth.c b/drivers/net/ethernet/tundra/tsi108_eth.c index c62f474b6d08..fcebd2418dbd 100644 --- a/drivers/net/ethernet/tundra/tsi108_eth.c +++ b/drivers/net/ethernet/tundra/tsi108_eth.c @@ -1302,12 +1302,15 @@ static int tsi108_open(struct net_device *dev) data->rxring = dma_alloc_coherent(&data->pdev->dev, rxring_size, &data->rxdma, GFP_KERNEL); - if (!data->rxring) + if (!data->rxring) { + free_irq(data->irq_num, dev); return -ENOMEM; + } data->txring = dma_alloc_coherent(&data->pdev->dev, txring_size, &data->txdma, GFP_KERNEL); if (!data->txring) { + free_irq(data->irq_num, dev); dma_free_coherent(&data->pdev->dev, rxring_size, data->rxring, data->rxdma); return -ENOMEM; From 956e0216a19994443c90ba2ea6b0b284c9c4f9cb Mon Sep 17 00:00:00 2001 From: Chuang Wang Date: Wed, 9 Nov 2022 17:07:34 +0800 Subject: [PATCH 0730/1477] net: macvlan: fix memory leaks of macvlan_common_newlink [ Upstream commit 23569b5652ee8e8e55a12f7835f59af6f3cefc30 ] kmemleak reports memory leaks in macvlan_common_newlink, as follows: ip link add link eth0 name .. type macvlan mode source macaddr add kmemleak reports: unreferenced object 0xffff8880109bb140 (size 64): comm "ip", pid 284, jiffies 4294986150 (age 430.108s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 b8 aa 5a 12 80 88 ff ff ..........Z..... 80 1b fa 0d 80 88 ff ff 1e ff ac af c7 c1 6b 6b ..............kk backtrace: [] kmem_cache_alloc_trace+0x1c7/0x300 [] macvlan_hash_add_source+0x45/0xc0 [] macvlan_changelink_sources+0xd7/0x170 [] macvlan_common_newlink+0x38c/0x5a0 [] macvlan_newlink+0xe/0x20 [] __rtnl_newlink+0x7af/0xa50 [] rtnl_newlink+0x48/0x70 ... In the scenario where the macvlan mode is configured as 'source', macvlan_changelink_sources() will be execured to reconfigure list of remote source mac addresses, at the same time, if register_netdevice() return an error, the resource generated by macvlan_changelink_sources() is not cleaned up. Using this patch, in the case of an error, it will execute macvlan_flush_sources() to ensure that the resource is cleaned up. Fixes: aa5fd0fb7748 ("driver: macvlan: Destroy new macvlan port if macvlan_common_newlink failed.") Signed-off-by: Chuang Wang Link: https://lore.kernel.org/r/20221109090735.690500-1-nashuiliang@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/macvlan.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index c8d803d3616c..6b269a72388b 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1509,8 +1509,10 @@ destroy_macvlan_port: /* the macvlan port may be freed by macvlan_uninit when fail to register. * so we destroy the macvlan port only when it's valid. */ - if (create && macvlan_port_get_rtnl(lowerdev)) + if (create && macvlan_port_get_rtnl(lowerdev)) { + macvlan_flush_sources(port, vlan); macvlan_port_destroy(port->dev); + } return err; } EXPORT_SYMBOL_GPL(macvlan_common_newlink); From e56d18a976dda653194218df6d40d8122c775712 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Sat, 29 Oct 2022 19:34:50 +0800 Subject: [PATCH 0731/1477] riscv: process: fix kernel info leakage [ Upstream commit 6510c78490c490a6636e48b61eeaa6fb65981f4b ] thread_struct's s[12] may contain random kernel memory content, which may be finally leaked to userspace. This is a security hole. Fix it by clearing the s[12] array in thread_struct when fork. As for kthread case, it's better to clear the s[12] array as well. Fixes: 7db91e57a0ac ("RISC-V: Task implementation") Signed-off-by: Jisheng Zhang Tested-by: Guo Ren Link: https://lore.kernel.org/r/20221029113450.4027-1-jszhang@kernel.org Reviewed-by: Guo Ren Link: https://lore.kernel.org/r/CAJF2gTSdVyAaM12T%2B7kXAdRPGS4VyuO08X1c7paE-n4Fr8OtRA@mail.gmail.com/ Signed-off-by: Palmer Dabbelt Signed-off-by: Sasha Levin --- arch/riscv/kernel/process.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index dd5f985b1f40..9a8b2e60adcf 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -111,6 +111,8 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg, { struct pt_regs *childregs = task_pt_regs(p); + memset(&p->thread.s, 0, sizeof(p->thread.s)); + /* p->thread holds context to be restored by __switch_to() */ if (unlikely(p->flags & PF_KTHREAD)) { /* Kernel thread */ From ecf78af5141f079fffddddf708ffd311699cc131 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Tue, 1 Nov 2022 02:29:43 +0800 Subject: [PATCH 0732/1477] riscv: vdso: fix build with llvm [ Upstream commit 50f4dd657a0fcf90aa8da8dc2794a8100ff4c37c ] Even after commit 89fd4a1df829 ("riscv: jump_label: mark arguments as const to satisfy asm constraints"), building with CC_OPTIMIZE_FOR_SIZE + LLVM=1 can reproduce below build error: CC arch/riscv/kernel/vdso/vgettimeofday.o In file included from :4: In file included from lib/vdso/gettimeofday.c:5: In file included from include/vdso/datapage.h:17: In file included from include/vdso/processor.h:10: In file included from arch/riscv/include/asm/vdso/processor.h:7: In file included from include/linux/jump_label.h:112: arch/riscv/include/asm/jump_label.h:42:3: error: invalid operand for inline asm constraint 'i' " .option push \n\t" ^ 1 error generated. I think the problem is when "-Os" is passed as CFLAGS, it's removed by "CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os" which is introduced in commit e05d57dcb8c7 ("riscv: Fixup __vdso_gettimeofday broke dynamic ftrace"), thus no optimization at all for vgettimeofday.c arm64 does remove "-Os" as well, but it forces "-O2" after removing "-Os". I compared the generated vgettimeofday.o with "-O2" and "-Os", I think no big performance difference. So let's tell the kbuild not to remove "-Os" rather than follow arm64 style. vdso related performance can be improved a lot when building kernel with CC_OPTIMIZE_FOR_SIZE after this commit, ("-Os" VS no optimization) Fixes: e05d57dcb8c7 ("riscv: Fixup __vdso_gettimeofday broke dynamic ftrace") Signed-off-by: Jisheng Zhang Tested-by: Conor Dooley Link: https://lore.kernel.org/r/20221031182943.2453-1-jszhang@kernel.org Signed-off-by: Palmer Dabbelt Signed-off-by: Sasha Levin --- arch/riscv/kernel/vdso/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 24d936c147cd..926ab3960f9e 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -30,7 +30,7 @@ obj-y += vdso.o vdso-syms.o CPPFLAGS_vdso.lds += -P -C -U$(ARCH) # Disable -pg to prevent insert call site -CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os +CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) # Disable profiling and instrumentation for VDSO code GCOV_PROFILE := n From d7716240bca508fb9e74d00d96304bc30acc3d6c Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Sat, 31 Oct 2020 14:01:12 +0800 Subject: [PATCH 0733/1477] riscv: Enable CMA support [ Upstream commit da815582cf4594e96defa1cddb72cd00b1e7aac5 ] riscv has selected HAVE_DMA_CONTIGUOUS, but doesn't call dma_contiguous_reserve(). This calls dma_contiguous_reserve(), which enables CMA. Signed-off-by: Kefeng Wang Signed-off-by: Palmer Dabbelt Stable-dep-of: 50e63dd8ed92 ("riscv: fix reserved memory setup") Signed-off-by: Sasha Levin --- arch/riscv/mm/init.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index e8921e78a292..56314e82f051 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -41,13 +42,14 @@ struct pt_alloc_ops { #endif }; +static phys_addr_t dma32_phys_limit __ro_after_init; + static void __init zone_sizes_init(void) { unsigned long max_zone_pfns[MAX_NR_ZONES] = { 0, }; #ifdef CONFIG_ZONE_DMA32 - max_zone_pfns[ZONE_DMA32] = PFN_DOWN(min(4UL * SZ_1G, - (unsigned long) PFN_PHYS(max_low_pfn))); + max_zone_pfns[ZONE_DMA32] = PFN_DOWN(dma32_phys_limit); #endif max_zone_pfns[ZONE_NORMAL] = max_low_pfn; @@ -193,6 +195,7 @@ void __init setup_bootmem(void) max_pfn = PFN_DOWN(dram_end); max_low_pfn = max_pfn; + dma32_phys_limit = min(4UL * SZ_1G, (unsigned long)PFN_PHYS(max_low_pfn)); set_max_mapnr(max_low_pfn); #ifdef CONFIG_BLK_DEV_INITRD @@ -206,6 +209,7 @@ void __init setup_bootmem(void) memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va)); early_init_fdt_scan_reserved_mem(); + dma_contiguous_reserve(dma32_phys_limit); memblock_allow_resize(); memblock_dump_all(); } From 0cf9cb06149398daab565586b56825418567a7e1 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Wed, 18 Nov 2020 16:38:27 -0800 Subject: [PATCH 0734/1477] riscv: Separate memory init from paging init [ Upstream commit cbd34f4bb37d62d8a027f54205bff07e73340da4 ] Currently, we perform some memory init functions in paging init. But, that will be an issue for NUMA support where DT needs to be flattened before numa initialization and memblock_present can only be called after numa initialization. Move memory initialization related functions to a separate function. Signed-off-by: Atish Patra Reviewed-by: Greentime Hu Reviewed-by: Anup Patel Reviewed-by: Palmer Dabbelt Signed-off-by: Palmer Dabbelt Stable-dep-of: 50e63dd8ed92 ("riscv: fix reserved memory setup") Signed-off-by: Sasha Levin --- arch/riscv/include/asm/pgtable.h | 1 + arch/riscv/kernel/setup.c | 1 + arch/riscv/mm/init.c | 6 +++++- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 73e8b5e5bb65..b16304fdf448 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -470,6 +470,7 @@ extern void *dtb_early_va; extern uintptr_t dtb_early_pa; void setup_bootmem(void); void paging_init(void); +void misc_mem_init(void); #define FIRST_USER_ADDRESS 0 diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index cc85858f7fe8..57e1ab036edf 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -96,6 +96,7 @@ void __init setup_arch(char **cmdline_p) else pr_err("No DTB found in kernel mappings\n"); #endif + misc_mem_init(); #ifdef CONFIG_SWIOTLB swiotlb_init(1); diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 56314e82f051..b6ab6a18dc1a 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -669,8 +669,12 @@ static void __init resource_init(void) void __init paging_init(void) { setup_vm_final(); - sparse_init(); setup_zero_page(); +} + +void __init misc_mem_init(void) +{ + sparse_init(); zone_sizes_init(); resource_init(); } From 94ab8f88feb75e3b1486102c0c9c550f37d9d137 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Mon, 7 Nov 2022 15:15:25 +0000 Subject: [PATCH 0735/1477] riscv: fix reserved memory setup [ Upstream commit 50e63dd8ed92045eb70a72d7ec725488320fb68b ] Currently, RISC-V sets up reserved memory using the "early" copy of the device tree. As a result, when trying to get a reserved memory region using of_reserved_mem_lookup(), the pointer to reserved memory regions is using the early, pre-virtual-memory address which causes a kernel panic when trying to use the buffer's name: Unable to handle kernel paging request at virtual address 00000000401c31ac Oops [#1] Modules linked in: CPU: 0 PID: 0 Comm: swapper Not tainted 6.0.0-rc1-00001-g0d9d6953d834 #1 Hardware name: Microchip PolarFire-SoC Icicle Kit (DT) epc : string+0x4a/0xea ra : vsnprintf+0x1e4/0x336 epc : ffffffff80335ea0 ra : ffffffff80338936 sp : ffffffff81203be0 gp : ffffffff812e0a98 tp : ffffffff8120de40 t0 : 0000000000000000 t1 : ffffffff81203e28 t2 : 7265736572203a46 s0 : ffffffff81203c20 s1 : ffffffff81203e28 a0 : ffffffff81203d22 a1 : 0000000000000000 a2 : ffffffff81203d08 a3 : 0000000081203d21 a4 : ffffffffffffffff a5 : 00000000401c31ac a6 : ffff0a00ffffff04 a7 : ffffffffffffffff s2 : ffffffff81203d08 s3 : ffffffff81203d00 s4 : 0000000000000008 s5 : ffffffff000000ff s6 : 0000000000ffffff s7 : 00000000ffffff00 s8 : ffffffff80d9821a s9 : ffffffff81203d22 s10: 0000000000000002 s11: ffffffff80d9821c t3 : ffffffff812f3617 t4 : ffffffff812f3617 t5 : ffffffff812f3618 t6 : ffffffff81203d08 status: 0000000200000100 badaddr: 00000000401c31ac cause: 000000000000000d [] vsnprintf+0x1e4/0x336 [] vprintk_store+0xf6/0x344 [] vprintk_emit+0x56/0x192 [] vprintk_default+0x16/0x1e [] vprintk+0x72/0x80 [] _printk+0x36/0x50 [] print_reserved_mem+0x1c/0x24 [] paging_init+0x528/0x5bc [] setup_arch+0xd0/0x592 [] start_kernel+0x82/0x73c early_init_fdt_scan_reserved_mem() takes no arguments as it operates on initial_boot_params, which is populated by early_init_dt_verify(). On RISC-V, early_init_dt_verify() is called twice. Once, directly, in setup_arch() if CONFIG_BUILTIN_DTB is not enabled and once indirectly, very early in the boot process, by parse_dtb() when it calls early_init_dt_scan_nodes(). This first call uses dtb_early_va to set initial_boot_params, which is not usable later in the boot process when early_init_fdt_scan_reserved_mem() is called. On arm64 for example, the corresponding call to early_init_dt_scan_nodes() uses fixmap addresses and doesn't suffer the same fate. Move early_init_fdt_scan_reserved_mem() further along the boot sequence, after the direct call to early_init_dt_verify() in setup_arch() so that the names use the correct virtual memory addresses. The above supposed that CONFIG_BUILTIN_DTB was not set, but should work equally in the case where it is - unflatted_and_copy_device_tree() also updates initial_boot_params. Reported-by: Valentina Fernandez Reported-by: Evgenii Shatokhin Link: https://lore.kernel.org/linux-riscv/f8e67f82-103d-156c-deb0-d6d6e2756f5e@microchip.com/ Fixes: 922b0375fc93 ("riscv: Fix memblock reservation for device tree blob") Signed-off-by: Conor Dooley Tested-by: Evgenii Shatokhin Link: https://lore.kernel.org/r/20221107151524.3941467-1-conor.dooley@microchip.com Signed-off-by: Palmer Dabbelt Signed-off-by: Sasha Levin --- arch/riscv/kernel/setup.c | 1 + arch/riscv/mm/init.c | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 57e1ab036edf..8e78a8ab6a34 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -96,6 +96,7 @@ void __init setup_arch(char **cmdline_p) else pr_err("No DTB found in kernel mappings\n"); #endif + early_init_fdt_scan_reserved_mem(); misc_mem_init(); #ifdef CONFIG_SWIOTLB diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index b6ab6a18dc1a..6c2f38aac544 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -208,7 +208,6 @@ void __init setup_bootmem(void) */ memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va)); - early_init_fdt_scan_reserved_mem(); dma_contiguous_reserve(dma32_phys_limit); memblock_allow_resize(); memblock_dump_all(); From 475fd3991a0d98e7b43b51e64b1eb92d0a885430 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 6 Nov 2022 15:53:54 +0100 Subject: [PATCH 0736/1477] arm64: efi: Fix handling of misaligned runtime regions and drop warning commit 9b9eaee9828fe98b030cf43ac50065a54a2f5d52 upstream. Currently, when mapping the EFI runtime regions in the EFI page tables, we complain about misaligned regions in a rather noisy way, using WARN(). Not only does this produce a lot of irrelevant clutter in the log, it is factually incorrect, as misaligned runtime regions are actually allowed by the EFI spec as long as they don't require conflicting memory types within the same 64k page. So let's drop the warning, and tweak the code so that we - take both the start and end of the region into account when checking for misalignment - only revert to RWX mappings for non-code regions if misaligned code regions are also known to exist. Cc: Acked-by: Linus Torvalds Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/efi.c | 52 +++++++++++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 18 deletions(-) diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index fa02efb28e88..c5685179db5a 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -12,6 +12,14 @@ #include +static bool region_is_misaligned(const efi_memory_desc_t *md) +{ + if (PAGE_SIZE == EFI_PAGE_SIZE) + return false; + return !PAGE_ALIGNED(md->phys_addr) || + !PAGE_ALIGNED(md->num_pages << EFI_PAGE_SHIFT); +} + /* * Only regions of type EFI_RUNTIME_SERVICES_CODE need to be * executable, everything else can be mapped with the XN bits @@ -25,14 +33,22 @@ static __init pteval_t create_mapping_protection(efi_memory_desc_t *md) if (type == EFI_MEMORY_MAPPED_IO) return PROT_DEVICE_nGnRE; - if (WARN_ONCE(!PAGE_ALIGNED(md->phys_addr), - "UEFI Runtime regions are not aligned to 64 KB -- buggy firmware?")) + if (region_is_misaligned(md)) { + static bool __initdata code_is_misaligned; + /* - * If the region is not aligned to the page size of the OS, we - * can not use strict permissions, since that would also affect - * the mapping attributes of the adjacent regions. + * Regions that are not aligned to the OS page size cannot be + * mapped with strict permissions, as those might interfere + * with the permissions that are needed by the adjacent + * region's mapping. However, if we haven't encountered any + * misaligned runtime code regions so far, we can safely use + * non-executable permissions for non-code regions. */ - return pgprot_val(PAGE_KERNEL_EXEC); + code_is_misaligned |= (type == EFI_RUNTIME_SERVICES_CODE); + + return code_is_misaligned ? pgprot_val(PAGE_KERNEL_EXEC) + : pgprot_val(PAGE_KERNEL); + } /* R-- */ if ((attr & (EFI_MEMORY_XP | EFI_MEMORY_RO)) == @@ -62,19 +78,16 @@ int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md) bool page_mappings_only = (md->type == EFI_RUNTIME_SERVICES_CODE || md->type == EFI_RUNTIME_SERVICES_DATA); - if (!PAGE_ALIGNED(md->phys_addr) || - !PAGE_ALIGNED(md->num_pages << EFI_PAGE_SHIFT)) { - /* - * If the end address of this region is not aligned to page - * size, the mapping is rounded up, and may end up sharing a - * page frame with the next UEFI memory region. If we create - * a block entry now, we may need to split it again when mapping - * the next region, and support for that is going to be removed - * from the MMU routines. So avoid block mappings altogether in - * that case. - */ + /* + * If this region is not aligned to the page size used by the OS, the + * mapping will be rounded outwards, and may end up sharing a page + * frame with an adjacent runtime memory region. Given that the page + * table descriptor covering the shared page will be rewritten when the + * adjacent region gets mapped, we must avoid block mappings here so we + * don't have to worry about splitting them when that happens. + */ + if (region_is_misaligned(md)) page_mappings_only = true; - } create_pgd_mapping(mm, md->phys_addr, md->virt_addr, md->num_pages << EFI_PAGE_SHIFT, @@ -101,6 +114,9 @@ int __init efi_set_mapping_permissions(struct mm_struct *mm, BUG_ON(md->type != EFI_RUNTIME_SERVICES_CODE && md->type != EFI_RUNTIME_SERVICES_DATA); + if (region_is_misaligned(md)) + return 0; + /* * Calling apply_to_page_range() is only safe on regions that are * guaranteed to be mapped down to pages. Since we are only called From 4631cb040645eff4626d02a2a318863a3e53e2af Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Thu, 3 Nov 2022 15:10:53 +0000 Subject: [PATCH 0737/1477] MIPS: jump_label: Fix compat branch range check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 64ac0befe75bdfaffc396c2b4a0ed5ae6920eeee upstream. Cast upper bound of branch range to long to do signed compare, avoid negative offset trigger this warning. Fixes: 9b6584e35f40 ("MIPS: jump_label: Use compact branches for >= r6") Signed-off-by: Jiaxun Yang Cc: stable@vger.kernel.org Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Thomas Bogendoerfer Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/jump_label.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kernel/jump_label.c b/arch/mips/kernel/jump_label.c index 662c8db9f45b..9f5b1247b4ba 100644 --- a/arch/mips/kernel/jump_label.c +++ b/arch/mips/kernel/jump_label.c @@ -56,7 +56,7 @@ void arch_jump_label_transform(struct jump_entry *e, * The branch offset must fit in the instruction's 26 * bit field. */ - WARN_ON((offset >= BIT(25)) || + WARN_ON((offset >= (long)BIT(25)) || (offset < -(long)BIT(25))); insn.j_format.opcode = bc6_op; From b55e64d0a3a3341d0a416478fa367b74a98cf821 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 26 Oct 2022 12:42:03 -0700 Subject: [PATCH 0738/1477] mmc: cqhci: Provide helper for resetting both SDHCI and CQHCI commit ebb5fd38f41132e6924cb33b647337f4a5d5360c upstream. Several SDHCI drivers need to deactivate command queueing in their reset hook (see sdhci_cqhci_reset() / sdhci-pci-core.c, for example), and several more are coming. Those reset implementations have some small subtleties (e.g., ordering of initialization of SDHCI vs. CQHCI might leave us resetting with a NULL ->cqe_private), and are often identical across different host drivers. We also don't want to force a dependency between SDHCI and CQHCI, or vice versa; non-SDHCI drivers use CQHCI, and SDHCI drivers might support command queueing through some other means. So, implement a small helper, to avoid repeating the same mistakes in different drivers. Simply stick it in a header, because it's so small it doesn't deserve its own module right now, and inlining to each driver is pretty reasonable. This is marked for -stable, as it is an important prerequisite patch for several SDHCI controller bugfixes that follow. Cc: Signed-off-by: Brian Norris Acked-by: Adrian Hunter Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20221026124150.v4.1.Ie85faa09432bfe1b0890d8c24ff95e17f3097317@changeid Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-cqhci.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 drivers/mmc/host/sdhci-cqhci.h diff --git a/drivers/mmc/host/sdhci-cqhci.h b/drivers/mmc/host/sdhci-cqhci.h new file mode 100644 index 000000000000..cf8e7ba71bbd --- /dev/null +++ b/drivers/mmc/host/sdhci-cqhci.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright 2022 The Chromium OS Authors + * + * Support that applies to the combination of SDHCI and CQHCI, while not + * expressing a dependency between the two modules. + */ + +#ifndef __MMC_HOST_SDHCI_CQHCI_H__ +#define __MMC_HOST_SDHCI_CQHCI_H__ + +#include "cqhci.h" +#include "sdhci.h" + +static inline void sdhci_and_cqhci_reset(struct sdhci_host *host, u8 mask) +{ + if ((host->mmc->caps2 & MMC_CAP2_CQE) && (mask & SDHCI_RESET_ALL) && + host->mmc->cqe_private) + cqhci_deactivate(host->mmc); + + sdhci_reset(host, mask); +} + +#endif /* __MMC_HOST_SDHCI_CQHCI_H__ */ From 3f558930add76be2a0e23991256c6a7eb46b4131 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 26 Oct 2022 12:42:04 -0700 Subject: [PATCH 0739/1477] mmc: sdhci-of-arasan: Fix SDHCI_RESET_ALL for CQHCI commit 5d249ac37fc2396e8acc1adb0650cdacae5a990d upstream. SDHCI_RESET_ALL resets will reset the hardware CQE state, but we aren't tracking that properly in software. When out of sync, we may trigger various timeouts. It's not typical to perform resets while CQE is enabled, but one particular case I hit commonly enough: mmc_suspend() -> mmc_power_off(). Typically we will eventually deactivate CQE (cqhci_suspend() -> cqhci_deactivate()), but that's not guaranteed -- in particular, if we perform a partial (e.g., interrupted) system suspend. The same bug was already found and fixed for two other drivers, in v5.7 and v5.9: 5cf583f1fb9c ("mmc: sdhci-msm: Deactivate CQE during SDHC reset") df57d73276b8 ("mmc: sdhci-pci: Fix SDHCI_RESET_ALL for CQHCI for Intel GLK-based controllers") The latter is especially prescient, saying "other drivers using CQHCI might benefit from a similar change, if they also have CQHCI reset by SDHCI_RESET_ALL." So like these other patches, deactivate CQHCI when resetting the controller. Do this via the new sdhci_and_cqhci_reset() helper. This patch depends on (and should not compile without) the patch entitled "mmc: cqhci: Provide helper for resetting both SDHCI and CQHCI". Fixes: 84362d79f436 ("mmc: sdhci-of-arasan: Add CQHCI support for arasan,sdhci-5.1") Cc: Signed-off-by: Brian Norris Reviewed-by: Guenter Roeck Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/20221026124150.v4.2.I29f6a2189e84e35ad89c1833793dca9e36c64297@changeid Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-of-arasan.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c index fc38db64a6b4..9da49dc15248 100644 --- a/drivers/mmc/host/sdhci-of-arasan.c +++ b/drivers/mmc/host/sdhci-of-arasan.c @@ -25,6 +25,7 @@ #include #include "cqhci.h" +#include "sdhci-cqhci.h" #include "sdhci-pltfm.h" #define SDHCI_ARASAN_VENDOR_REGISTER 0x78 @@ -359,7 +360,7 @@ static void sdhci_arasan_reset(struct sdhci_host *host, u8 mask) struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct sdhci_arasan_data *sdhci_arasan = sdhci_pltfm_priv(pltfm_host); - sdhci_reset(host, mask); + sdhci_and_cqhci_reset(host, mask); if (sdhci_arasan->quirks & SDHCI_ARASAN_QUIRK_FORCE_CDTEST) { ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL); From 0a8d4531a0d58fa4b118f657ee38024154d4385a Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 26 Oct 2022 12:42:08 -0700 Subject: [PATCH 0740/1477] mmc: sdhci_am654: Fix SDHCI_RESET_ALL for CQHCI commit 162503fd1c3a1d4e14dbe7f399c1d1bec1c8abbc upstream. [[ NOTE: this is completely untested by the author, but included solely because, as noted in commit df57d73276b8 ("mmc: sdhci-pci: Fix SDHCI_RESET_ALL for CQHCI for Intel GLK-based controllers"), "other drivers using CQHCI might benefit from a similar change, if they also have CQHCI reset by SDHCI_RESET_ALL." We've now seen the same bug on at least MSM, Arasan, and Intel hardware. ]] SDHCI_RESET_ALL resets will reset the hardware CQE state, but we aren't tracking that properly in software. When out of sync, we may trigger various timeouts. It's not typical to perform resets while CQE is enabled, but this may occur in some suspend or error recovery scenarios. Include this fix by way of the new sdhci_and_cqhci_reset() helper. This patch depends on (and should not compile without) the patch entitled "mmc: cqhci: Provide helper for resetting both SDHCI and CQHCI". Fixes: f545702b74f9 ("mmc: sdhci_am654: Add Support for Command Queuing Engine to J721E") Signed-off-by: Brian Norris Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20221026124150.v4.6.I35ca9d6220ba48304438b992a76647ca8e5b126f@changeid Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci_am654.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/host/sdhci_am654.c b/drivers/mmc/host/sdhci_am654.c index 7cab9d831afb..24cd6d3dc647 100644 --- a/drivers/mmc/host/sdhci_am654.c +++ b/drivers/mmc/host/sdhci_am654.c @@ -15,6 +15,7 @@ #include #include "cqhci.h" +#include "sdhci-cqhci.h" #include "sdhci-pltfm.h" /* CTL_CFG Registers */ @@ -378,7 +379,7 @@ static void sdhci_am654_reset(struct sdhci_host *host, u8 mask) struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct sdhci_am654_data *sdhci_am654 = sdhci_pltfm_priv(pltfm_host); - sdhci_reset(host, mask); + sdhci_and_cqhci_reset(host, mask); if (sdhci_am654->quirks & SDHCI_AM654_QUIRK_FORCE_CDTEST) { ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL); @@ -464,7 +465,7 @@ static struct sdhci_ops sdhci_am654_ops = { .set_clock = sdhci_am654_set_clock, .write_b = sdhci_am654_write_b, .irq = sdhci_am654_cqhci_irq, - .reset = sdhci_reset, + .reset = sdhci_and_cqhci_reset, }; static const struct sdhci_pltfm_data sdhci_am654_pdata = { @@ -494,7 +495,7 @@ static struct sdhci_ops sdhci_j721e_8bit_ops = { .set_clock = sdhci_am654_set_clock, .write_b = sdhci_am654_write_b, .irq = sdhci_am654_cqhci_irq, - .reset = sdhci_reset, + .reset = sdhci_and_cqhci_reset, }; static const struct sdhci_pltfm_data sdhci_j721e_8bit_pdata = { From ea6787e482add0093b6049f9e29e6b05ea88e4ff Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 26 Oct 2022 12:42:07 -0700 Subject: [PATCH 0741/1477] mmc: sdhci-tegra: Fix SDHCI_RESET_ALL for CQHCI commit 836078449464e6af3b66ae6652dae79af176f21e upstream. [[ NOTE: this is completely untested by the author, but included solely because, as noted in commit df57d73276b8 ("mmc: sdhci-pci: Fix SDHCI_RESET_ALL for CQHCI for Intel GLK-based controllers"), "other drivers using CQHCI might benefit from a similar change, if they also have CQHCI reset by SDHCI_RESET_ALL." We've now seen the same bug on at least MSM, Arasan, and Intel hardware. ]] SDHCI_RESET_ALL resets will reset the hardware CQE state, but we aren't tracking that properly in software. When out of sync, we may trigger various timeouts. It's not typical to perform resets while CQE is enabled, but this may occur in some suspend or error recovery scenarios. Include this fix by way of the new sdhci_and_cqhci_reset() helper. This patch depends on (and should not compile without) the patch entitled "mmc: cqhci: Provide helper for resetting both SDHCI and CQHCI". Fixes: 3c4019f97978 ("mmc: tegra: HW Command Queue Support for Tegra SDMMC") Signed-off-by: Brian Norris Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20221026124150.v4.5.I418c9eaaf754880fcd2698113e8c3ef821a944d7@changeid Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-tegra.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c index 67211fc42d24..d8fd2b5efd38 100644 --- a/drivers/mmc/host/sdhci-tegra.c +++ b/drivers/mmc/host/sdhci-tegra.c @@ -24,6 +24,7 @@ #include #include +#include "sdhci-cqhci.h" #include "sdhci-pltfm.h" #include "cqhci.h" @@ -361,7 +362,7 @@ static void tegra_sdhci_reset(struct sdhci_host *host, u8 mask) const struct sdhci_tegra_soc_data *soc_data = tegra_host->soc_data; u32 misc_ctrl, clk_ctrl, pad_ctrl; - sdhci_reset(host, mask); + sdhci_and_cqhci_reset(host, mask); if (!(mask & SDHCI_RESET_ALL)) return; From 181cfff57bdca50850eb8f841c5b0338b5b6d30d Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Tue, 8 Nov 2022 16:47:46 +0800 Subject: [PATCH 0742/1477] ALSA: hda/hdmi - enable runtime pm for more AMD display audio commit fdcc4c22b7ab20e90b97f8bc6225d876b72b8f16 upstream. We are able to power down the GPU and audio via the GPU driver so flag these asics as supporting runtime pm. Signed-off-by: Evan Quan Cc: Link: https://lore.kernel.org/r/20221108084746.583058-1-evan.quan@amd.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_intel.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 26dfa8558792..494bfd2135a9 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2749,6 +2749,9 @@ static const struct pci_device_id azx_ids[] = { { PCI_DEVICE(0x1002, 0xab28), .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS | AZX_DCAPS_PM_RUNTIME }, + { PCI_DEVICE(0x1002, 0xab30), + .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS | + AZX_DCAPS_PM_RUNTIME }, { PCI_DEVICE(0x1002, 0xab38), .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS | AZX_DCAPS_PM_RUNTIME }, From 380d64168da41b17dc4faaf5c5e780793566ca09 Mon Sep 17 00:00:00 2001 From: Xian Wang Date: Fri, 4 Nov 2022 13:29:13 -0700 Subject: [PATCH 0743/1477] ALSA: hda/ca0132: add quirk for EVGA Z390 DARK commit 0c423e2ffa7edd3f8f9bcf17ce73fa9c7509b99e upstream. The Z390 DARK mainboard uses a CA0132 audio controller. The quirk is needed to enable surround sound and 3.5mm headphone jack handling in the front audio connector as well as in the rear of the board when in stereo mode. Page 97 of the linked manual contains instructions to setup the controller. Signed-off-by: Xian Wang Cc: stable@vger.kernel.org Link: https://www.evga.com/support/manuals/files/131-CS-E399.pdf Link: https://lore.kernel.org/r/20221104202913.13904-1-dev@xianwang.io Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_ca0132.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c index f774b2ac9720..82f14c3f642b 100644 --- a/sound/pci/hda/patch_ca0132.c +++ b/sound/pci/hda/patch_ca0132.c @@ -1272,6 +1272,7 @@ static const struct snd_pci_quirk ca0132_quirks[] = { SND_PCI_QUIRK(0x1458, 0xA026, "Gigabyte G1.Sniper Z97", QUIRK_R3DI), SND_PCI_QUIRK(0x1458, 0xA036, "Gigabyte GA-Z170X-Gaming 7", QUIRK_R3DI), SND_PCI_QUIRK(0x3842, 0x1038, "EVGA X99 Classified", QUIRK_R3DI), + SND_PCI_QUIRK(0x3842, 0x1055, "EVGA Z390 DARK", QUIRK_R3DI), SND_PCI_QUIRK(0x1102, 0x0013, "Recon3D", QUIRK_R3D), SND_PCI_QUIRK(0x1102, 0x0018, "Recon3D", QUIRK_R3D), SND_PCI_QUIRK(0x1102, 0x0051, "Sound Blaster AE-5", QUIRK_AE5), From 3a79f9568de08657fcdbc41d6fc4c0ca145a7a2b Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Thu, 10 Nov 2022 22:45:39 +0800 Subject: [PATCH 0744/1477] ALSA: hda: fix potential memleak in 'add_widget_node' commit 9a5523f72bd2b0d66eef3d58810c6eb7b5ffc143 upstream. As 'kobject_add' may allocated memory for 'kobject->name' when return error. And in this function, if call 'kobject_add' failed didn't free kobject. So call 'kobject_put' to recycling resources. Signed-off-by: Ye Bin Cc: Link: https://lore.kernel.org/r/20221110144539.2989354-1-yebin@huaweicloud.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/hda/hdac_sysfs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/hda/hdac_sysfs.c b/sound/hda/hdac_sysfs.c index e56e83325903..bcf302f5115a 100644 --- a/sound/hda/hdac_sysfs.c +++ b/sound/hda/hdac_sysfs.c @@ -346,8 +346,10 @@ static int add_widget_node(struct kobject *parent, hda_nid_t nid, return -ENOMEM; kobject_init(kobj, &widget_ktype); err = kobject_add(kobj, parent, "%02x", nid); - if (err < 0) + if (err < 0) { + kobject_put(kobj); return err; + } err = sysfs_create_group(kobj, group); if (err < 0) { kobject_put(kobj); From a414a6d6ef3ca3ba68a75d7d03b1e0e731e00aa4 Mon Sep 17 00:00:00 2001 From: Edson Juliano Drosdeck Date: Wed, 9 Nov 2022 13:17:32 -0400 Subject: [PATCH 0745/1477] ALSA: hda/realtek: Add Positivo C6300 model quirk commit 79e28f2ab3440e08f5fbf65648b008341c37b496 upstream. Positivo Master C6300 (1849:a233) require quirk for anabling headset-mic Signed-off-by: Edson Juliano Drosdeck Cc: Link: https://lore.kernel.org/r/20221109171732.5417-1-edson.drosdeck@gmail.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 60e3bc124836..e3f6b930ad4a 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9161,6 +9161,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x511f, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), SND_PCI_QUIRK(0x17aa, 0x9e54, "LENOVO NB", ALC269_FIXUP_LENOVO_EAPD), SND_PCI_QUIRK(0x1849, 0x1233, "ASRock NUC Box 1100", ALC233_FIXUP_NO_AUDIO_JACK), + SND_PCI_QUIRK(0x1849, 0xa233, "Positivo Master C6300", ALC269_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x19e5, 0x3204, "Huawei MACH-WX9", ALC256_FIXUP_HUAWEI_MACH_WX9_PINS), SND_PCI_QUIRK(0x19e5, 0x320f, "Huawei WRT-WX9 ", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1b35, 0x1235, "CZC B20", ALC269_FIXUP_CZC_B20), From 2032c2d32b2afd030343cde491d23224421d5f4b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 8 Nov 2022 15:07:21 +0100 Subject: [PATCH 0746/1477] ALSA: usb-audio: Add quirk entry for M-Audio Micro commit 2f01a612d4758b45f775dbb88a49cf534ba47275 upstream. M-Audio Micro (0762:201a) defines the descriptor as vendor-specific, while the content seems class-compliant. Just overriding the probe makes the device working. Reported-by: Ash Logan Cc: Link: https://lore.kernel.org/r/7ecd4417-d860-4773-c1c1-b07433342390@heyquark.com Link: https://lore.kernel.org/r/20221108140721.24248-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/quirks-table.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index a51591f68ae6..6a78813b63f5 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -2028,6 +2028,10 @@ YAMAHA_DEVICE(0x7010, "UB99"), } } }, +{ + /* M-Audio Micro */ + USB_DEVICE_VENDOR_SPEC(0x0763, 0x201a), +}, { USB_DEVICE_VENDOR_SPEC(0x0763, 0x2030), .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { From f6896fb69d50909170a3812f71808b682c66d82b Mon Sep 17 00:00:00 2001 From: Jussi Laako Date: Wed, 9 Nov 2022 00:12:41 +0200 Subject: [PATCH 0747/1477] ALSA: usb-audio: Add DSD support for Accuphase DAC-60 commit 8cbd4725ffff3eface1f5f3397af02acad5b2831 upstream. Accuphase DAC-60 option card supports native DSD up to DSD256, but doesn't have support for auto-detection. Explicitly enable DSD support for the correct altsetting. Signed-off-by: Jussi Laako Cc: Link: https://lore.kernel.org/r/20221108221241.1220878-1-jussi@sonarnerd.net Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 04a691bc560c..752422147fb3 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1744,6 +1744,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip, /* XMOS based USB DACs */ switch (chip->usb_id) { case USB_ID(0x1511, 0x0037): /* AURALiC VEGA */ + case USB_ID(0x21ed, 0xd75a): /* Accuphase DAC-60 option card */ case USB_ID(0x2522, 0x0012): /* LH Labs VI DAC Infinity */ case USB_ID(0x2772, 0x0230): /* Pro-Ject Pre Box S2 Digital */ if (fp->altsetting == 2) From 516f9f23008b1e3033df7944f5d616b95076acd3 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 8 Nov 2022 10:49:34 -0700 Subject: [PATCH 0748/1477] vmlinux.lds.h: Fix placement of '.data..decrypted' section commit 000f8870a47bdc36730357883b6aef42bced91ee upstream. Commit d4c639990036 ("vmlinux.lds.h: Avoid orphan section with !SMP") fixed an orphan section warning by adding the '.data..decrypted' section to the linker script under the PERCPU_DECRYPTED_SECTION define but that placement introduced a panic with !SMP, as the percpu sections are not instantiated with that configuration so attempting to access variables defined with DEFINE_PER_CPU_DECRYPTED() will result in a page fault. Move the '.data..decrypted' section to the DATA_MAIN define so that the variables in it are properly instantiated at boot time with CONFIG_SMP=n. Cc: stable@vger.kernel.org Fixes: d4c639990036 ("vmlinux.lds.h: Avoid orphan section with !SMP") Link: https://lore.kernel.org/cbbd3548-880c-d2ca-1b67-5bb93b291d5f@huawei.com/ Debugged-by: Ard Biesheuvel Reported-by: Zhao Wenhui Tested-by: xiafukun Signed-off-by: Nathan Chancellor Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20221108174934.3384275-1-nathan@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/asm-generic/vmlinux.lds.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index a774361f28d4..d233f9e4b9c6 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -328,6 +328,7 @@ #define DATA_DATA \ *(.xiptext) \ *(DATA_MAIN) \ + *(.data..decrypted) \ *(.ref.data) \ *(.data..shared_aligned) /* percpu related */ \ MEM_KEEP(init.data*) \ @@ -972,7 +973,6 @@ #ifdef CONFIG_AMD_MEM_ENCRYPT #define PERCPU_DECRYPTED_SECTION \ . = ALIGN(PAGE_SIZE); \ - *(.data..decrypted) \ *(.data..percpu..decrypted) \ . = ALIGN(PAGE_SIZE); #else From 344ddbd688d869aff2831e1345755d1b163000f4 Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Mon, 7 Nov 2022 13:02:29 +0900 Subject: [PATCH 0749/1477] ata: libata-scsi: fix SYNCHRONIZE CACHE (16) command failure commit ea045fd344cb15c164e9ffc8b8cffb6883df8475 upstream. SAT SCSI/ATA Translation specification requires SCSI SYNCHRONIZE CACHE (10) and (16) commands both shall be translated to ATA flush command. Also, ZBC Zoned Block Commands specification mandates SYNCHRONIZE CACHE (16) command support. However, libata translates only SYNCHRONIZE CACHE (10). This results in SYNCHRONIZE CACHE (16) command failures on SATA drives and then libata translation does not conform to ZBC. To avoid the failure, add support for SYNCHRONIZE CACHE (16). Signed-off-by: Shin'ichiro Kawasaki Cc: stable@vger.kernel.org Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Signed-off-by: Damien Le Moal Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-scsi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index a0e788b64821..459ece666c62 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -3303,6 +3303,7 @@ static unsigned int ata_scsiop_maint_in(struct ata_scsi_args *args, u8 *rbuf) case REPORT_LUNS: case REQUEST_SENSE: case SYNCHRONIZE_CACHE: + case SYNCHRONIZE_CACHE_16: case REZERO_UNIT: case SEEK_6: case SEEK_10: @@ -3969,6 +3970,7 @@ static inline ata_xlat_func_t ata_get_xlat_func(struct ata_device *dev, u8 cmd) return ata_scsi_write_same_xlat; case SYNCHRONIZE_CACHE: + case SYNCHRONIZE_CACHE_16: if (ata_try_flush_cache(dev)) return ata_scsi_flush_xlat; break; @@ -4215,6 +4217,7 @@ void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd) * turning this into a no-op. */ case SYNCHRONIZE_CACHE: + case SYNCHRONIZE_CACHE_16: fallthrough; /* no-op's, complete with success */ From 1d4ff73062096c21b47954d2996b4df259777bda Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Sat, 29 Oct 2022 13:49:12 +0900 Subject: [PATCH 0750/1477] nilfs2: fix deadlock in nilfs_count_free_blocks() commit 8ac932a4921a96ca52f61935dbba64ea87bbd5dc upstream. A semaphore deadlock can occur if nilfs_get_block() detects metadata corruption while locating data blocks and a superblock writeback occurs at the same time: task 1 task 2 ------ ------ * A file operation * nilfs_truncate() nilfs_get_block() down_read(rwsem A) <-- nilfs_bmap_lookup_contig() ... generic_shutdown_super() nilfs_put_super() * Prepare to write superblock * down_write(rwsem B) <-- nilfs_cleanup_super() * Detect b-tree corruption * nilfs_set_log_cursor() nilfs_bmap_convert_error() nilfs_count_free_blocks() __nilfs_error() down_read(rwsem A) <-- nilfs_set_error() down_write(rwsem B) <-- *** DEADLOCK *** Here, nilfs_get_block() readlocks rwsem A (= NILFS_MDT(dat_inode)->mi_sem) and then calls nilfs_bmap_lookup_contig(), but if it fails due to metadata corruption, __nilfs_error() is called from nilfs_bmap_convert_error() inside the lock section. Since __nilfs_error() calls nilfs_set_error() unless the filesystem is read-only and nilfs_set_error() attempts to writelock rwsem B (= nilfs->ns_sem) to write back superblock exclusively, hierarchical lock acquisition occurs in the order rwsem A -> rwsem B. Now, if another task starts updating the superblock, it may writelock rwsem B during the lock sequence above, and can deadlock trying to readlock rwsem A in nilfs_count_free_blocks(). However, there is actually no need to take rwsem A in nilfs_count_free_blocks() because it, within the lock section, only reads a single integer data on a shared struct with nilfs_sufile_get_ncleansegs(). This has been the case after commit aa474a220180 ("nilfs2: add local variable to cache the number of clean segments"), that is, even before this bug was introduced. So, this resolves the deadlock problem by just not taking the semaphore in nilfs_count_free_blocks(). Link: https://lkml.kernel.org/r/20221029044912.9139-1-konishi.ryusuke@gmail.com Fixes: e828949e5b42 ("nilfs2: call nilfs_error inside bmap routines") Signed-off-by: Ryusuke Konishi Reported-by: syzbot+45d6ce7b7ad7ef455d03@syzkaller.appspotmail.com Tested-by: Ryusuke Konishi Cc: [2.6.38+ Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/nilfs2/the_nilfs.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index c20ebecd7bc2..ce103dd39b89 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -690,9 +690,7 @@ int nilfs_count_free_blocks(struct the_nilfs *nilfs, sector_t *nblocks) { unsigned long ncleansegs; - down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); ncleansegs = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile); - up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); *nblocks = (sector_t)ncleansegs * nilfs->ns_blocks_per_segment; return 0; } From 4feedde5486c07ea79787839153a71ca71329c7d Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 4 Nov 2022 23:29:59 +0900 Subject: [PATCH 0751/1477] nilfs2: fix use-after-free bug of ns_writer on remount commit 8cccf05fe857a18ee26e20d11a8455a73ffd4efd upstream. If a nilfs2 filesystem is downgraded to read-only due to metadata corruption on disk and is remounted read/write, or if emergency read-only remount is performed, detaching a log writer and synchronizing the filesystem can be done at the same time. In these cases, use-after-free of the log writer (hereinafter nilfs->ns_writer) can happen as shown in the scenario below: Task1 Task2 -------------------------------- ------------------------------ nilfs_construct_segment nilfs_segctor_sync init_wait init_waitqueue_entry add_wait_queue schedule nilfs_remount (R/W remount case) nilfs_attach_log_writer nilfs_detach_log_writer nilfs_segctor_destroy kfree finish_wait _raw_spin_lock_irqsave __raw_spin_lock_irqsave do_raw_spin_lock debug_spin_lock_before <-- use-after-free While Task1 is sleeping, nilfs->ns_writer is freed by Task2. After Task1 waked up, Task1 accesses nilfs->ns_writer which is already freed. This scenario diagram is based on the Shigeru Yoshida's post [1]. This patch fixes the issue by not detaching nilfs->ns_writer on remount so that this UAF race doesn't happen. Along with this change, this patch also inserts a few necessary read-only checks with superblock instance where only the ns_writer pointer was used to check if the filesystem is read-only. Link: https://syzkaller.appspot.com/bug?id=79a4c002e960419ca173d55e863bd09e8112df8b Link: https://lkml.kernel.org/r/20221103141759.1836312-1-syoshida@redhat.com [1] Link: https://lkml.kernel.org/r/20221104142959.28296-1-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Reported-by: syzbot+f816fa82f8783f7a02bb@syzkaller.appspotmail.com Reported-by: Shigeru Yoshida Tested-by: Ryusuke Konishi Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/nilfs2/segment.c | 15 ++++++++------- fs/nilfs2/super.c | 2 -- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 545f764d70b1..5ee4973525f0 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -322,7 +322,7 @@ void nilfs_relax_pressure_in_lock(struct super_block *sb) struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_sc_info *sci = nilfs->ns_writer; - if (!sci || !sci->sc_flush_request) + if (sb_rdonly(sb) || unlikely(!sci) || !sci->sc_flush_request) return; set_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags); @@ -2248,7 +2248,7 @@ int nilfs_construct_segment(struct super_block *sb) struct nilfs_transaction_info *ti; int err; - if (!sci) + if (sb_rdonly(sb) || unlikely(!sci)) return -EROFS; /* A call inside transactions causes a deadlock. */ @@ -2287,7 +2287,7 @@ int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode, struct nilfs_transaction_info ti; int err = 0; - if (!sci) + if (sb_rdonly(sb) || unlikely(!sci)) return -EROFS; nilfs_transaction_lock(sb, &ti, 0); @@ -2783,11 +2783,12 @@ int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root) if (nilfs->ns_writer) { /* - * This happens if the filesystem was remounted - * read/write after nilfs_error degenerated it into a - * read-only mount. + * This happens if the filesystem is made read-only by + * __nilfs_error or nilfs_remount and then remounted + * read/write. In these cases, reuse the existing + * writer. */ - nilfs_detach_log_writer(sb); + return 0; } nilfs->ns_writer = nilfs_segctor_new(sb, root); diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index b9d30e8c43b0..7a41c9727c9e 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -1133,8 +1133,6 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb)) goto out; if (*flags & SB_RDONLY) { - /* Shutting down log writer */ - nilfs_detach_log_writer(sb); sb->s_flags |= SB_RDONLY; /* From b5ee579fcb147c0939017eb78b6c75f7d79a6719 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 28 Oct 2022 16:50:26 +0100 Subject: [PATCH 0752/1477] drm/i915/dmabuf: fix sg_table handling in map_dma_buf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f90daa975911961b65070ec72bd7dd8d448f9ef7 upstream. We need to iterate over the original entries here for the sg_table, pulling out the struct page for each one, to be remapped. However currently this incorrectly iterates over the final dma mapped entries, which is likely just one gigantic sg entry if the iommu is enabled, leading to us only mapping the first struct page (and any physically contiguous pages following it), even if there is potentially lots more data to follow. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/7306 Fixes: 1286ff739773 ("i915: add dmabuf/prime buffer sharing support.") Signed-off-by: Matthew Auld Cc: Lionel Landwerlin Cc: Tvrtko Ursulin Cc: Ville Syrjälä Cc: Michael J. Ruhl Cc: # v3.5+ Reviewed-by: Michael J. Ruhl Link: https://patchwork.freedesktop.org/patch/msgid/20221028155029.494736-1-matthew.auld@intel.com (cherry picked from commit 28d52f99bbca7227008cf580c9194c9b3516968e) Signed-off-by: Tvrtko Ursulin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c index 8dd295dbe241..dd35d3d7ad03 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -36,13 +36,13 @@ static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachme goto err_unpin_pages; } - ret = sg_alloc_table(st, obj->mm.pages->nents, GFP_KERNEL); + ret = sg_alloc_table(st, obj->mm.pages->orig_nents, GFP_KERNEL); if (ret) goto err_free; src = obj->mm.pages->sgl; dst = st->sgl; - for (i = 0; i < obj->mm.pages->nents; i++) { + for (i = 0; i < obj->mm.pages->orig_nents; i++) { sg_set_page(dst, sg_page(src), src->length, 0); dst = sg_next(dst); src = sg_next(src); From 8fa0c22ef8242f2fbe722f2f1ac39df6a95faa73 Mon Sep 17 00:00:00 2001 From: Jorge Lopez Date: Fri, 28 Oct 2022 10:55:27 -0500 Subject: [PATCH 0753/1477] platform/x86: hp_wmi: Fix rfkill causing soft blocked wifi commit 1598bfa8e1faa932de42e1ee7628a1c4c4263f0a upstream. After upgrading BIOS to U82 01.02.01 Rev.A, the console is flooded strange char "^@" which printed out every second and makes login nearly impossible. Also the below messages were shown both in console and journal/dmesg every second: usb 1-3: Device not responding to setup address. usb 1-3: device not accepting address 4, error -71 usb 1-3: device descriptor read/all, error -71 usb usb1-port3: unable to enumerate USB device Wifi is soft blocked by checking rfkill. When unblocked manually, after few seconds it would be soft blocked again. So I was suspecting something triggered rfkill to soft block wifi. At the end it was fixed by removing hp_wmi module. The root cause is the way hp-wmi driver handles command 1B on post-2009 BIOS. In pre-2009 BIOS, command 1Bh return 0x4 to indicate that BIOS no longer controls the power for the wireless devices. Signed-off-by: Jorge Lopez Link: https://bugzilla.kernel.org/show_bug.cgi?id=216468 Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20221028155527.7724-1-jorge.lopez2@hp.com Cc: stable@vger.kernel.org Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/hp-wmi.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c index 012639f6d335..519b2ab84a63 100644 --- a/drivers/platform/x86/hp-wmi.c +++ b/drivers/platform/x86/hp-wmi.c @@ -900,8 +900,16 @@ static int __init hp_wmi_bios_setup(struct platform_device *device) wwan_rfkill = NULL; rfkill2_count = 0; - if (hp_wmi_rfkill_setup(device)) - hp_wmi_rfkill2_setup(device); + /* + * In pre-2009 BIOS, command 1Bh return 0x4 to indicate that + * BIOS no longer controls the power for the wireless + * devices. All features supported by this command will no + * longer be supported. + */ + if (!hp_wmi_bios_2009_later()) { + if (hp_wmi_rfkill_setup(device)) + hp_wmi_rfkill2_setup(device); + } thermal_policy_setup(device); From 9c0accfa5a358b89afabd4d4e82ca6eaca06e72c Mon Sep 17 00:00:00 2001 From: Zhang Xiaoxu Date: Tue, 1 Nov 2022 10:53:54 +0800 Subject: [PATCH 0754/1477] btrfs: selftests: fix wrong error check in btrfs_free_dummy_root() commit 9b2f20344d450137d015b380ff0c2e2a6a170135 upstream. The btrfs_alloc_dummy_root() uses ERR_PTR as the error return value rather than NULL, if error happened, there will be a NULL pointer dereference: BUG: KASAN: null-ptr-deref in btrfs_free_dummy_root+0x21/0x50 [btrfs] Read of size 8 at addr 000000000000002c by task insmod/258926 CPU: 2 PID: 258926 Comm: insmod Tainted: G W 6.1.0-rc2+ #5 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-1.fc33 04/01/2014 Call Trace: dump_stack_lvl+0x34/0x44 kasan_report+0xb7/0x140 kasan_check_range+0x145/0x1a0 btrfs_free_dummy_root+0x21/0x50 [btrfs] btrfs_test_free_space_cache+0x1a8c/0x1add [btrfs] btrfs_run_sanity_tests+0x65/0x80 [btrfs] init_btrfs_fs+0xec/0x154 [btrfs] do_one_initcall+0x87/0x2a0 do_init_module+0xdf/0x320 load_module+0x3006/0x3390 __do_sys_finit_module+0x113/0x1b0 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 Fixes: aaedb55bc08f ("Btrfs: add tests for btrfs_get_extent") CC: stable@vger.kernel.org # 4.9+ Reviewed-by: Anand Jain Signed-off-by: Zhang Xiaoxu Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tests/btrfs-tests.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c index 999c14e5d0bd..0599566c66b0 100644 --- a/fs/btrfs/tests/btrfs-tests.c +++ b/fs/btrfs/tests/btrfs-tests.c @@ -192,7 +192,7 @@ void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info) void btrfs_free_dummy_root(struct btrfs_root *root) { - if (!root) + if (IS_ERR_OR_NULL(root)) return; /* Will be freed by btrfs_free_fs_roots */ if (WARN_ON(test_bit(BTRFS_ROOT_IN_RADIX, &root->state))) From 4ea3aa3b983b18863719960bb03568fc7eff29a7 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 26 Oct 2022 12:42:06 -0700 Subject: [PATCH 0755/1477] mms: sdhci-esdhc-imx: Fix SDHCI_RESET_ALL for CQHCI commit fb1dec44c6750bb414f47b929c8c175a1a127c31 upstream. [[ NOTE: this is completely untested by the author, but included solely because, as noted in commit df57d73276b8 ("mmc: sdhci-pci: Fix SDHCI_RESET_ALL for CQHCI for Intel GLK-based controllers"), "other drivers using CQHCI might benefit from a similar change, if they also have CQHCI reset by SDHCI_RESET_ALL." We've now seen the same bug on at least MSM, Arasan, and Intel hardware. ]] SDHCI_RESET_ALL resets will reset the hardware CQE state, but we aren't tracking that properly in software. When out of sync, we may trigger various timeouts. It's not typical to perform resets while CQE is enabled, but this may occur in some suspend or error recovery scenarios. Include this fix by way of the new sdhci_and_cqhci_reset() helper. This patch depends on (and should not compile without) the patch entitled "mmc: cqhci: Provide helper for resetting both SDHCI and CQHCI". Fixes: bb6e358169bf ("mmc: sdhci-esdhc-imx: add CMDQ support") Signed-off-by: Brian Norris Reviewed-by: Haibo Chen Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20221026124150.v4.4.I7d01f9ad11bacdc9213dee61b7918982aea39115@changeid Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-esdhc-imx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index be4e5cdda1fa..de42b5473f97 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -26,6 +26,7 @@ #include #include #include +#include "sdhci-cqhci.h" #include "sdhci-pltfm.h" #include "sdhci-esdhc.h" #include "cqhci.h" @@ -1243,7 +1244,7 @@ static void esdhc_set_uhs_signaling(struct sdhci_host *host, unsigned timing) static void esdhc_reset(struct sdhci_host *host, u8 mask) { - sdhci_reset(host, mask); + sdhci_and_cqhci_reset(host, mask); sdhci_writel(host, host->ier, SDHCI_INT_ENABLE); sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE); From 03f9582a6a2ebd25a440896475c968428c4b63e7 Mon Sep 17 00:00:00 2001 From: ZhangPeng Date: Wed, 9 Nov 2022 01:35:42 +0000 Subject: [PATCH 0756/1477] udf: Fix a slab-out-of-bounds write bug in udf_find_entry() commit c8af247de385ce49afabc3bf1cf4fd455c94bfe8 upstream. Syzbot reported a slab-out-of-bounds Write bug: loop0: detected capacity change from 0 to 2048 ================================================================== BUG: KASAN: slab-out-of-bounds in udf_find_entry+0x8a5/0x14f0 fs/udf/namei.c:253 Write of size 105 at addr ffff8880123ff896 by task syz-executor323/3610 CPU: 0 PID: 3610 Comm: syz-executor323 Not tainted 6.1.0-rc2-syzkaller-00105-gb229b6ca5abb #0 Hardware name: Google Compute Engine/Google Compute Engine, BIOS Google 10/11/2022 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x1b1/0x28e lib/dump_stack.c:106 print_address_description+0x74/0x340 mm/kasan/report.c:284 print_report+0x107/0x1f0 mm/kasan/report.c:395 kasan_report+0xcd/0x100 mm/kasan/report.c:495 kasan_check_range+0x2a7/0x2e0 mm/kasan/generic.c:189 memcpy+0x3c/0x60 mm/kasan/shadow.c:66 udf_find_entry+0x8a5/0x14f0 fs/udf/namei.c:253 udf_lookup+0xef/0x340 fs/udf/namei.c:309 lookup_open fs/namei.c:3391 [inline] open_last_lookups fs/namei.c:3481 [inline] path_openat+0x10e6/0x2df0 fs/namei.c:3710 do_filp_open+0x264/0x4f0 fs/namei.c:3740 do_sys_openat2+0x124/0x4e0 fs/open.c:1310 do_sys_open fs/open.c:1326 [inline] __do_sys_creat fs/open.c:1402 [inline] __se_sys_creat fs/open.c:1396 [inline] __x64_sys_creat+0x11f/0x160 fs/open.c:1396 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7ffab0d164d9 Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 c0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007ffe1a7e6bb8 EFLAGS: 00000246 ORIG_RAX: 0000000000000055 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007ffab0d164d9 RDX: 00007ffab0d164d9 RSI: 0000000000000000 RDI: 0000000020000180 RBP: 00007ffab0cd5a10 R08: 0000000000000000 R09: 0000000000000000 R10: 00005555573552c0 R11: 0000000000000246 R12: 00007ffab0cd5aa0 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 Allocated by task 3610: kasan_save_stack mm/kasan/common.c:45 [inline] kasan_set_track+0x3d/0x60 mm/kasan/common.c:52 ____kasan_kmalloc mm/kasan/common.c:371 [inline] __kasan_kmalloc+0x97/0xb0 mm/kasan/common.c:380 kmalloc include/linux/slab.h:576 [inline] udf_find_entry+0x7b6/0x14f0 fs/udf/namei.c:243 udf_lookup+0xef/0x340 fs/udf/namei.c:309 lookup_open fs/namei.c:3391 [inline] open_last_lookups fs/namei.c:3481 [inline] path_openat+0x10e6/0x2df0 fs/namei.c:3710 do_filp_open+0x264/0x4f0 fs/namei.c:3740 do_sys_openat2+0x124/0x4e0 fs/open.c:1310 do_sys_open fs/open.c:1326 [inline] __do_sys_creat fs/open.c:1402 [inline] __se_sys_creat fs/open.c:1396 [inline] __x64_sys_creat+0x11f/0x160 fs/open.c:1396 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd The buggy address belongs to the object at ffff8880123ff800 which belongs to the cache kmalloc-256 of size 256 The buggy address is located 150 bytes inside of 256-byte region [ffff8880123ff800, ffff8880123ff900) The buggy address belongs to the physical page: page:ffffea000048ff80 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x123fe head:ffffea000048ff80 order:1 compound_mapcount:0 compound_pincount:0 flags: 0xfff00000010200(slab|head|node=0|zone=1|lastcpupid=0x7ff) raw: 00fff00000010200 ffffea00004b8500 dead000000000003 ffff888012041b40 raw: 0000000000000000 0000000080100010 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected page_owner tracks the page as allocated page last allocated via order 0, migratetype Unmovable, gfp_mask 0x0(), pid 1, tgid 1 (swapper/0), ts 1841222404, free_ts 0 create_dummy_stack mm/page_owner.c:67 [inline] register_early_stack+0x77/0xd0 mm/page_owner.c:83 init_page_owner+0x3a/0x731 mm/page_owner.c:93 kernel_init_freeable+0x41c/0x5d5 init/main.c:1629 kernel_init+0x19/0x2b0 init/main.c:1519 page_owner free stack trace missing Memory state around the buggy address: ffff8880123ff780: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff8880123ff800: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 >ffff8880123ff880: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 06 ^ ffff8880123ff900: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff8880123ff980: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ================================================================== Fix this by changing the memory size allocated for copy_name from UDF_NAME_LEN(254) to UDF_NAME_LEN_CS0(255), because the total length (lfi) of subsequent memcpy can be up to 255. CC: stable@vger.kernel.org Reported-by: syzbot+69c9fdccc6dd08961d34@syzkaller.appspotmail.com Fixes: 066b9cded00b ("udf: Use separate buffer for copying split names") Signed-off-by: ZhangPeng Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20221109013542.442790-1-zhangpeng362@huawei.com Signed-off-by: Greg Kroah-Hartman --- fs/udf/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 9f3aced46c68..aff5ca32e4f6 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -241,7 +241,7 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir, poffset - lfi); else { if (!copy_name) { - copy_name = kmalloc(UDF_NAME_LEN, + copy_name = kmalloc(UDF_NAME_LEN_CS0, GFP_NOFS); if (!copy_name) { fi = ERR_PTR(-ENOMEM); From 0b692d41ee5c88097ecf5dbb37c59083044c996a Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Wed, 2 Nov 2022 11:07:28 -0500 Subject: [PATCH 0757/1477] mm/memremap.c: map FS_DAX device memory as decrypted commit 867400af90f1f953ff9e10b1b87ecaf9369a7eb8 upstream. virtio_pmem use devm_memremap_pages() to map the device memory. By default this memory is mapped as encrypted with SEV. Guest reboot changes the current encryption key and guest no longer properly decrypts the FSDAX device meta data. Mark the corresponding device memory region for FSDAX devices (mapped with memremap_pages) as decrypted to retain the persistent memory property. Link: https://lkml.kernel.org/r/20221102160728.3184016-1-pankaj.gupta@amd.com Fixes: b7b3c01b19159 ("mm/memremap_pages: support multiple ranges per invocation") Signed-off-by: Pankaj Gupta Cc: Dan Williams Cc: Tom Lendacky Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/memremap.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/memremap.c b/mm/memremap.c index 2455bac89506..299aad0d26e5 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -348,6 +348,7 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) WARN(1, "File system DAX not supported\n"); return ERR_PTR(-EINVAL); } + params.pgprot = pgprot_decrypted(params.pgprot); break; case MEMORY_DEVICE_GENERIC: break; From f8e0edeaa0f2b860bdbbf0aafb4492533043d650 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Fri, 4 Nov 2022 08:50:00 +0100 Subject: [PATCH 0758/1477] can: j1939: j1939_send_one(): fix missing CAN header initialization commit 3eb3d283e8579a22b81dd2ac3987b77465b2a22f upstream. The read access to struct canxl_frame::len inside of a j1939 created skbuff revealed a missing initialization of reserved and later filled elements in struct can_frame. This patch initializes the 8 byte CAN header with zero. Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Cc: Oleksij Rempel Link: https://lore.kernel.org/linux-can/20221104052235.GA6474@pengutronix.de Reported-by: syzbot+d168ec0caca4697e03b1@syzkaller.appspotmail.com Signed-off-by: Oliver Hartkopp Link: https://lore.kernel.org/all/20221104075000.105414-1-socketcan@hartkopp.net Cc: stable@vger.kernel.org Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- net/can/j1939/main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c index ca75d1b8f415..9da8fbc81c04 100644 --- a/net/can/j1939/main.c +++ b/net/can/j1939/main.c @@ -332,6 +332,9 @@ int j1939_send_one(struct j1939_priv *priv, struct sk_buff *skb) /* re-claim the CAN_HDR from the SKB */ cf = skb_push(skb, J1939_CAN_HDR); + /* initialize header structure */ + memset(cf, 0, J1939_CAN_HDR); + /* make it a full can frame again */ skb_put(skb, J1939_CAN_FTR + (8 - dlc)); From d37dfb9357e935c8701e80429f5f7c416fb946a3 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 8 Jun 2022 13:18:39 -0700 Subject: [PATCH 0759/1477] cert host tools: Stop complaining about deprecated OpenSSL functions commit 6bfb56e93bcef41859c2d5ab234ffd80b691be35 upstream. OpenSSL 3.0 deprecated the OpenSSL's ENGINE API. That is as may be, but the kernel build host tools still use it. Disable the warning about deprecated declarations until somebody who cares fixes it. Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- scripts/extract-cert.c | 7 +++++++ scripts/sign-file.c | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/scripts/extract-cert.c b/scripts/extract-cert.c index 3bc48c726c41..79ecbbfe37cd 100644 --- a/scripts/extract-cert.c +++ b/scripts/extract-cert.c @@ -23,6 +23,13 @@ #include #include +/* + * OpenSSL 3.0 deprecates the OpenSSL's ENGINE API. + * + * Remove this if/when that API is no longer used + */ +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" + #define PKEY_ID_PKCS7 2 static __attribute__((noreturn)) diff --git a/scripts/sign-file.c b/scripts/sign-file.c index fbd34b8e8f57..7434e9ea926e 100644 --- a/scripts/sign-file.c +++ b/scripts/sign-file.c @@ -29,6 +29,13 @@ #include #include +/* + * OpenSSL 3.0 deprecates the OpenSSL's ENGINE API. + * + * Remove this if/when that API is no longer used + */ +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" + /* * Use CMS if we have openssl-1.0.0 or newer available - otherwise we have to * assume that it's not available and its header file is missing and that we From 4b51cce72ab7d173fd511917396c832b912b7634 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:02:35 +0300 Subject: [PATCH 0760/1477] dmaengine: at_hdmac: Fix at_lli struct definition commit f1171bbdd2ba2a50ee64bb198a78c268a5baf5f1 upstream. Those hardware registers are all of 32 bits, while dma_addr_t ca be of type u64 or u32 depending on CONFIG_ARCH_DMA_ADDR_T_64BIT. Force u32 to comply with what the hardware expects. Fixes: dc78baa2b90b ("dmaengine: at_hdmac: new driver for the Atmel AHB DMA Controller") Signed-off-by: Tudor Ambarus Cc: stable@vger.kernel.org Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-2-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/at_hdmac_regs.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/dma/at_hdmac_regs.h b/drivers/dma/at_hdmac_regs.h index 80fc2fe8c77e..8dc82c7b1dcf 100644 --- a/drivers/dma/at_hdmac_regs.h +++ b/drivers/dma/at_hdmac_regs.h @@ -164,13 +164,13 @@ /* LLI == Linked List Item; aka DMA buffer descriptor */ struct at_lli { /* values that are not changed by hardware */ - dma_addr_t saddr; - dma_addr_t daddr; + u32 saddr; + u32 daddr; /* value that may get written back: */ - u32 ctrla; + u32 ctrla; /* more values that are not changed by hardware */ - u32 ctrlb; - dma_addr_t dscr; /* chain to next lli */ + u32 ctrlb; + u32 dscr; /* chain to next lli */ }; /** From 24f9e93e506aa366172385ef2e367add05de422b Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:02:36 +0300 Subject: [PATCH 0761/1477] dmaengine: at_hdmac: Don't start transactions at tx_submit level commit 7176a6a8982d311e50a7c1168868d26e65bbba19 upstream. tx_submit is supposed to push the current transaction descriptor to a pending queue, waiting for issue_pending() to be called. issue_pending() must start the transfer, not tx_submit(), thus remove atc_dostart() from atc_tx_submit(). Clients of at_xdmac that assume that tx_submit() starts the transfer must be updated and call dma_async_issue_pending() if they miss to call it. The vdbg print was moved to after the lock is released. It is desirable to do the prints without the lock held if possible, and because the if statement disappears there's no reason why to do the print while holding the lock. Fixes: dc78baa2b90b ("dmaengine: at_hdmac: new driver for the Atmel AHB DMA Controller") Reported-by: Peter Rosin Signed-off-by: Tudor Ambarus Cc: stable@vger.kernel.org Link: https://lore.kernel.org/lkml/13c6c9a2-6db5-c3bf-349b-4c127ad3496a@axentia.se/ Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-3-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/at_hdmac.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 7eaee5b705b1..1b0f753a4ba2 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -672,19 +672,11 @@ static dma_cookie_t atc_tx_submit(struct dma_async_tx_descriptor *tx) spin_lock_irqsave(&atchan->lock, flags); cookie = dma_cookie_assign(tx); - if (list_empty(&atchan->active_list)) { - dev_vdbg(chan2dev(tx->chan), "tx_submit: started %u\n", - desc->txd.cookie); - atc_dostart(atchan, desc); - list_add_tail(&desc->desc_node, &atchan->active_list); - } else { - dev_vdbg(chan2dev(tx->chan), "tx_submit: queued %u\n", - desc->txd.cookie); - list_add_tail(&desc->desc_node, &atchan->queue); - } - + list_add_tail(&desc->desc_node, &atchan->queue); spin_unlock_irqrestore(&atchan->lock, flags); + dev_vdbg(chan2dev(tx->chan), "tx_submit: queued %u\n", + desc->txd.cookie); return cookie; } From ad4cbe8e9c3a2e0b3c5eacd1f74ea6835835f38d Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:02:37 +0300 Subject: [PATCH 0762/1477] dmaengine: at_hdmac: Start transfer for cyclic channels in issue_pending commit 8a47221fc28417ff8a32a4f92d4448a56c3cf7e1 upstream. Cyclic channels must too call issue_pending in order to start a transfer. Start the transfer in issue_pending regardless of the type of channel. This wrongly worked before, because in the past the transfer was started at tx_submit level when only a desc in the transfer list. Fixes: 53830cc75974 ("dmaengine: at_hdmac: add cyclic DMA operation support") Reported-by: Peter Rosin Signed-off-by: Tudor Ambarus Cc: stable@vger.kernel.org Link: https://lore.kernel.org/lkml/13c6c9a2-6db5-c3bf-349b-4c127ad3496a@axentia.se/ Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-4-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/at_hdmac.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 1b0f753a4ba2..bdbf7a65450b 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -1509,10 +1509,6 @@ static void atc_issue_pending(struct dma_chan *chan) dev_vdbg(chan2dev(chan), "issue_pending\n"); - /* Not needed for cyclic transfers */ - if (atc_chan_is_cyclic(atchan)) - return; - atc_advance_work(atchan); } From 7078e935b4102a63a2473dbeeeb13dd9fefc33fb Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:02:38 +0300 Subject: [PATCH 0763/1477] dmaengine: at_hdmac: Fix premature completion of desc in issue_pending commit fcd37565efdaffeac179d0f0ce980ac79bfdf569 upstream. Multiple calls to atc_issue_pending() could result in a premature completion of a descriptor from the atchan->active list, as the method always completed the first active descriptor from the list. Instead, issue_pending() should just take the first transaction descriptor from the pending queue, move it to active_list and start the transfer. Fixes: dc78baa2b90b ("dmaengine: at_hdmac: new driver for the Atmel AHB DMA Controller") Reported-by: Peter Rosin Signed-off-by: Tudor Ambarus Cc: stable@vger.kernel.org Link: https://lore.kernel.org/lkml/13c6c9a2-6db5-c3bf-349b-4c127ad3496a@axentia.se/ Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-5-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/at_hdmac.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index bdbf7a65450b..d441c3ffcd63 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -1500,16 +1500,26 @@ atc_tx_status(struct dma_chan *chan, } /** - * atc_issue_pending - try to finish work + * atc_issue_pending - takes the first transaction descriptor in the pending + * queue and starts the transfer. * @chan: target DMA channel */ static void atc_issue_pending(struct dma_chan *chan) { - struct at_dma_chan *atchan = to_at_dma_chan(chan); + struct at_dma_chan *atchan = to_at_dma_chan(chan); + struct at_desc *desc; + unsigned long flags; dev_vdbg(chan2dev(chan), "issue_pending\n"); - atc_advance_work(atchan); + spin_lock_irqsave(&atchan->lock, flags); + if (atc_chan_is_enabled(atchan) || list_empty(&atchan->queue)) + return spin_unlock_irqrestore(&atchan->lock, flags); + + desc = atc_first_queued(atchan); + list_move_tail(&desc->desc_node, &atchan->active_list); + atc_dostart(atchan, desc); + spin_unlock_irqrestore(&atchan->lock, flags); } /** From ee356822618ee86cfab8fac1bec0bf66fb98ea2e Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:02:39 +0300 Subject: [PATCH 0764/1477] dmaengine: at_hdmac: Do not call the complete callback on device_terminate_all commit f645f85ae1104f8bd882f962ac0a69a1070076dd upstream. The method was wrong because it violated the dmaengine API. For aborted transfers the complete callback should not be called. Fix the behavior and do not call the complete callback on device_terminate_all. Fixes: 808347f6a317 ("dmaengine: at_hdmac: add DMA slave transfers") Reported-by: Peter Rosin Signed-off-by: Tudor Ambarus Cc: stable@vger.kernel.org Link: https://lore.kernel.org/lkml/13c6c9a2-6db5-c3bf-349b-4c127ad3496a@axentia.se/ Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-6-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/at_hdmac.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index d441c3ffcd63..69d470ea7f13 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -1410,11 +1410,8 @@ static int atc_terminate_all(struct dma_chan *chan) struct at_dma_chan *atchan = to_at_dma_chan(chan); struct at_dma *atdma = to_at_dma(chan->device); int chan_id = atchan->chan_common.chan_id; - struct at_desc *desc, *_desc; unsigned long flags; - LIST_HEAD(list); - dev_vdbg(chan2dev(chan), "%s\n", __func__); /* @@ -1433,15 +1430,11 @@ static int atc_terminate_all(struct dma_chan *chan) cpu_relax(); /* active_list entries will end up before queued entries */ - list_splice_init(&atchan->queue, &list); - list_splice_init(&atchan->active_list, &list); + list_splice_tail_init(&atchan->queue, &atchan->free_list); + list_splice_tail_init(&atchan->active_list, &atchan->free_list); spin_unlock_irqrestore(&atchan->lock, flags); - /* Flush all pending and queued descriptors */ - list_for_each_entry_safe(desc, _desc, &list, desc_node) - atc_chain_complete(atchan, desc); - clear_bit(ATC_IS_PAUSED, &atchan->status); /* if channel dedicated to cyclic operations, free it */ clear_bit(ATC_IS_CYCLIC, &atchan->status); From 9b69060a725dbc8ee3f3d270279d6b2d9e39079b Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:02:40 +0300 Subject: [PATCH 0765/1477] dmaengine: at_hdmac: Protect atchan->status with the channel lock commit 6e5ad28d16f082efeae3d0bd2e31f24bed218019 upstream. Now that the complete callback call was removed from device_terminate_all(), we can protect the atchan->status with the channel lock. The atomic bitops on atchan->status do not substitute proper locking on the status, as one could still modify the status after the lock was dropped in atc_terminate_all() but before the atomic bitops were executed. Fixes: 078a6506141a ("dmaengine: at_hdmac: Fix deadlocks") Reported-by: Peter Rosin Signed-off-by: Tudor Ambarus Cc: stable@vger.kernel.org Link: https://lore.kernel.org/lkml/13c6c9a2-6db5-c3bf-349b-4c127ad3496a@axentia.se/ Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-7-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/at_hdmac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 69d470ea7f13..99d6bb5634df 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -1433,12 +1433,12 @@ static int atc_terminate_all(struct dma_chan *chan) list_splice_tail_init(&atchan->queue, &atchan->free_list); list_splice_tail_init(&atchan->active_list, &atchan->free_list); - spin_unlock_irqrestore(&atchan->lock, flags); - clear_bit(ATC_IS_PAUSED, &atchan->status); /* if channel dedicated to cyclic operations, free it */ clear_bit(ATC_IS_CYCLIC, &atchan->status); + spin_unlock_irqrestore(&atchan->lock, flags); + return 0; } From 1582cc3b480501b6144c724306d2c8d8dd4b84e3 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:02:41 +0300 Subject: [PATCH 0766/1477] dmaengine: at_hdmac: Fix concurrency problems by removing atc_complete_all() commit c6babed879fbe82796a601bf097649e07382db46 upstream. atc_complete_all() had concurrency bugs, thus remove it: 1/ atc_complete_all() in its entirety was buggy, as when the atchan->queue list (the one that contains descriptors that are not yet issued to the hardware) contained descriptors, it fired just the first from the atchan->queue, but moved all the desc from atchan->queue to atchan->active_list and considered them all as fired. This could result in calling the completion of a descriptor that was not yet issued to the hardware. 2/ when in tasklet at atc_advance_work() time, atchan->active_list was queried without holding the lock of the chan. This can result in atchan->active_list concurrency problems between the tasklet and issue_pending(). Fixes: dc78baa2b90b ("dmaengine: at_hdmac: new driver for the Atmel AHB DMA Controller") Reported-by: Peter Rosin Signed-off-by: Tudor Ambarus Cc: stable@vger.kernel.org Link: https://lore.kernel.org/lkml/13c6c9a2-6db5-c3bf-349b-4c127ad3496a@axentia.se/ Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-8-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/at_hdmac.c | 49 ++++-------------------------------------- 1 file changed, 4 insertions(+), 45 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 99d6bb5634df..3f3b8c040499 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -466,42 +466,6 @@ atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc) dma_run_dependencies(txd); } -/** - * atc_complete_all - finish work for all transactions - * @atchan: channel to complete transactions for - * - * Eventually submit queued descriptors if any - * - * Assume channel is idle while calling this function - * Called with atchan->lock held and bh disabled - */ -static void atc_complete_all(struct at_dma_chan *atchan) -{ - struct at_desc *desc, *_desc; - LIST_HEAD(list); - unsigned long flags; - - dev_vdbg(chan2dev(&atchan->chan_common), "complete all\n"); - - spin_lock_irqsave(&atchan->lock, flags); - - /* - * Submit queued descriptors ASAP, i.e. before we go through - * the completed ones. - */ - if (!list_empty(&atchan->queue)) - atc_dostart(atchan, atc_first_queued(atchan)); - /* empty active_list now it is completed */ - list_splice_init(&atchan->active_list, &list); - /* empty queue list by moving descriptors (if any) to active_list */ - list_splice_init(&atchan->queue, &atchan->active_list); - - spin_unlock_irqrestore(&atchan->lock, flags); - - list_for_each_entry_safe(desc, _desc, &list, desc_node) - atc_chain_complete(atchan, desc); -} - /** * atc_advance_work - at the end of a transaction, move forward * @atchan: channel where the transaction ended @@ -509,25 +473,20 @@ static void atc_complete_all(struct at_dma_chan *atchan) static void atc_advance_work(struct at_dma_chan *atchan) { unsigned long flags; - int ret; dev_vdbg(chan2dev(&atchan->chan_common), "advance_work\n"); spin_lock_irqsave(&atchan->lock, flags); - ret = atc_chan_is_enabled(atchan); + if (atc_chan_is_enabled(atchan) || list_empty(&atchan->active_list)) + return spin_unlock_irqrestore(&atchan->lock, flags); spin_unlock_irqrestore(&atchan->lock, flags); - if (ret) - return; - - if (list_empty(&atchan->active_list) || - list_is_singular(&atchan->active_list)) - return atc_complete_all(atchan); atc_chain_complete(atchan, atc_first_active(atchan)); /* advance work */ spin_lock_irqsave(&atchan->lock, flags); - atc_dostart(atchan, atc_first_active(atchan)); + if (!list_empty(&atchan->active_list)) + atc_dostart(atchan, atc_first_active(atchan)); spin_unlock_irqrestore(&atchan->lock, flags); } From 7f07cecc7411d8f634fd23766368c92264fc2d73 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:02:42 +0300 Subject: [PATCH 0767/1477] dmaengine: at_hdmac: Fix concurrency over descriptor commit 06988949df8c3007ad82036d3606d8ae72ed9000 upstream. The descriptor was added to the free_list before calling the callback, which could result in reissuing of the same descriptor and calling of a single callback for both. Move the decriptor to the free list after the callback is invoked. Fixes: dc78baa2b90b ("dmaengine: at_hdmac: new driver for the Atmel AHB DMA Controller") Reported-by: Peter Rosin Signed-off-by: Tudor Ambarus Cc: stable@vger.kernel.org Link: https://lore.kernel.org/lkml/13c6c9a2-6db5-c3bf-349b-4c127ad3496a@axentia.se/ Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-9-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/at_hdmac.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 3f3b8c040499..01dfc8dbfc83 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -450,11 +450,8 @@ atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc) desc->memset_buffer = false; } - /* move children to free_list */ - list_splice_init(&desc->tx_list, &atchan->free_list); - /* move myself to free_list */ - list_move(&desc->desc_node, &atchan->free_list); - + /* Remove transfer node from the active list. */ + list_del_init(&desc->desc_node); spin_unlock_irqrestore(&atchan->lock, flags); dma_descriptor_unmap(txd); @@ -464,6 +461,13 @@ atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc) dmaengine_desc_get_callback_invoke(txd, NULL); dma_run_dependencies(txd); + + spin_lock_irqsave(&atchan->lock, flags); + /* move children to free_list */ + list_splice_init(&desc->tx_list, &atchan->free_list); + /* add myself to free_list */ + list_add(&desc->desc_node, &atchan->free_list); + spin_unlock_irqrestore(&atchan->lock, flags); } /** From 0f603bf553a70af22ced122d706feeae2bb6ed76 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:02:43 +0300 Subject: [PATCH 0768/1477] dmaengine: at_hdmac: Free the memset buf without holding the chan lock commit 6ba826cbb57d675f447b59323204d1473bbd5593 upstream. There's no need to hold the channel lock when freeing the memset buf, as the operation has already completed. Free the memset buf without holding the channel lock. Fixes: 4d112426c344 ("dmaengine: hdmac: Add memset capabilities") Signed-off-by: Tudor Ambarus Cc: stable@vger.kernel.org Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-10-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/at_hdmac.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 01dfc8dbfc83..61e1c555cede 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -443,13 +443,6 @@ atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc) if (!atc_chan_is_cyclic(atchan)) dma_cookie_complete(txd); - /* If the transfer was a memset, free our temporary buffer */ - if (desc->memset_buffer) { - dma_pool_free(atdma->memset_pool, desc->memset_vaddr, - desc->memset_paddr); - desc->memset_buffer = false; - } - /* Remove transfer node from the active list. */ list_del_init(&desc->desc_node); spin_unlock_irqrestore(&atchan->lock, flags); @@ -468,6 +461,13 @@ atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc) /* add myself to free_list */ list_add(&desc->desc_node, &atchan->free_list); spin_unlock_irqrestore(&atchan->lock, flags); + + /* If the transfer was a memset, free our temporary buffer */ + if (desc->memset_buffer) { + dma_pool_free(atdma->memset_pool, desc->memset_vaddr, + desc->memset_paddr); + desc->memset_buffer = false; + } } /** From a35dd5dd98b67b98f57e55efb4374e55c2cb680f Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:02:44 +0300 Subject: [PATCH 0769/1477] dmaengine: at_hdmac: Fix concurrency over the active list commit 03ed9ba357cc78116164b90b87f45eacab60b561 upstream. The tasklet (atc_advance_work()) did not held the channel lock when retrieving the first active descriptor, causing concurrency problems if issue_pending() was called in between. If issue_pending() was called exactly after the lock was released in the tasklet (atc_advance_work()), atc_chain_complete() could complete a descriptor for which the controller has not yet raised an interrupt. Fixes: dc78baa2b90b ("dmaengine: at_hdmac: new driver for the Atmel AHB DMA Controller") Reported-by: Peter Rosin Signed-off-by: Tudor Ambarus Cc: stable@vger.kernel.org Link: https://lore.kernel.org/lkml/13c6c9a2-6db5-c3bf-349b-4c127ad3496a@axentia.se/ Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-11-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/at_hdmac.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 61e1c555cede..b5cff318461d 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -443,8 +443,6 @@ atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc) if (!atc_chan_is_cyclic(atchan)) dma_cookie_complete(txd); - /* Remove transfer node from the active list. */ - list_del_init(&desc->desc_node); spin_unlock_irqrestore(&atchan->lock, flags); dma_descriptor_unmap(txd); @@ -476,6 +474,7 @@ atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc) */ static void atc_advance_work(struct at_dma_chan *atchan) { + struct at_desc *desc; unsigned long flags; dev_vdbg(chan2dev(&atchan->chan_common), "advance_work\n"); @@ -483,9 +482,12 @@ static void atc_advance_work(struct at_dma_chan *atchan) spin_lock_irqsave(&atchan->lock, flags); if (atc_chan_is_enabled(atchan) || list_empty(&atchan->active_list)) return spin_unlock_irqrestore(&atchan->lock, flags); - spin_unlock_irqrestore(&atchan->lock, flags); - atc_chain_complete(atchan, atc_first_active(atchan)); + desc = atc_first_active(atchan); + /* Remove the transfer node from the active list. */ + list_del_init(&desc->desc_node); + spin_unlock_irqrestore(&atchan->lock, flags); + atc_chain_complete(atchan, desc); /* advance work */ spin_lock_irqsave(&atchan->lock, flags); From 6be4ab08c8637bcea930ee399bd1001d2250b168 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:02:45 +0300 Subject: [PATCH 0770/1477] dmaengine: at_hdmac: Fix descriptor handling when issuing it to hardware commit ba2423633ba646e1df20e30cb3cf35495c16f173 upstream. As it was before, the descriptor was issued to the hardware without adding it to the active (issued) list. This could result in a completion of other descriptor, or/and in the descriptor never being completed. Fixes: dc78baa2b90b ("dmaengine: at_hdmac: new driver for the Atmel AHB DMA Controller") Reported-by: Peter Rosin Signed-off-by: Tudor Ambarus Cc: stable@vger.kernel.org Link: https://lore.kernel.org/lkml/13c6c9a2-6db5-c3bf-349b-4c127ad3496a@axentia.se/ Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-12-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/at_hdmac.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index b5cff318461d..d4f92c8befc1 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -491,8 +491,11 @@ static void atc_advance_work(struct at_dma_chan *atchan) /* advance work */ spin_lock_irqsave(&atchan->lock, flags); - if (!list_empty(&atchan->active_list)) - atc_dostart(atchan, atc_first_active(atchan)); + if (!list_empty(&atchan->active_list)) { + desc = atc_first_queued(atchan); + list_move_tail(&desc->desc_node, &atchan->active_list); + atc_dostart(atchan, desc); + } spin_unlock_irqrestore(&atchan->lock, flags); } @@ -504,6 +507,7 @@ static void atc_advance_work(struct at_dma_chan *atchan) static void atc_handle_error(struct at_dma_chan *atchan) { struct at_desc *bad_desc; + struct at_desc *desc; struct at_desc *child; unsigned long flags; @@ -521,8 +525,11 @@ static void atc_handle_error(struct at_dma_chan *atchan) list_splice_init(&atchan->queue, atchan->active_list.prev); /* Try to restart the controller */ - if (!list_empty(&atchan->active_list)) - atc_dostart(atchan, atc_first_active(atchan)); + if (!list_empty(&atchan->active_list)) { + desc = atc_first_queued(atchan); + list_move_tail(&desc->desc_node, &atchan->active_list); + atc_dostart(atchan, desc); + } /* * KERN_CRITICAL may seem harsh, but since this only happens From f4512855223c37aae610f667aed85fdfc701fc4f Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:02:46 +0300 Subject: [PATCH 0771/1477] dmaengine: at_hdmac: Fix completion of unissued descriptor in case of errors commit ef2cb4f0ce479f77607b04c4b0414bf32f863ee8 upstream. In case the controller detected an error, the code took the chance to move all the queued (submitted) descriptors to the active (issued) list. This was wrong as if there were any descriptors in the submitted list they were moved to the issued list without actually issuing them to the controller, thus a completion could be raised without even fireing the descriptor. Fixes: dc78baa2b90b ("dmaengine: at_hdmac: new driver for the Atmel AHB DMA Controller") Reported-by: Peter Rosin Signed-off-by: Tudor Ambarus Cc: stable@vger.kernel.org Link: https://lore.kernel.org/lkml/13c6c9a2-6db5-c3bf-349b-4c127ad3496a@axentia.se/ Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-13-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/at_hdmac.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index d4f92c8befc1..2740c78649db 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -520,10 +520,6 @@ static void atc_handle_error(struct at_dma_chan *atchan) bad_desc = atc_first_active(atchan); list_del_init(&bad_desc->desc_node); - /* As we are stopped, take advantage to push queued descriptors - * in active_list */ - list_splice_init(&atchan->queue, atchan->active_list.prev); - /* Try to restart the controller */ if (!list_empty(&atchan->active_list)) { desc = atc_first_queued(atchan); From f53a233eaad61ddb7622bc279cf1145b022cff46 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:02:47 +0300 Subject: [PATCH 0772/1477] dmaengine: at_hdmac: Don't allow CPU to reorder channel enable commit 580ee84405c27d6ed419abe4d2b3de1968abdafd upstream. at_hdmac uses __raw_writel for register writes. In the absence of a barrier, the CPU may reorder the register operations. Introduce a write memory barrier so that the CPU does not reorder the channel enable, thus the start of the transfer, without making sure that all the pre-required register fields are already written. Fixes: dc78baa2b90b ("dmaengine: at_hdmac: new driver for the Atmel AHB DMA Controller") Reported-by: Peter Rosin Signed-off-by: Tudor Ambarus Cc: stable@vger.kernel.org Link: https://lore.kernel.org/lkml/13c6c9a2-6db5-c3bf-349b-4c127ad3496a@axentia.se/ Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-14-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/at_hdmac.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 2740c78649db..128579d5e0d8 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -237,6 +237,8 @@ static void atc_dostart(struct at_dma_chan *atchan, struct at_desc *first) ATC_SPIP_BOUNDARY(first->boundary)); channel_writel(atchan, DPIP, ATC_DPIP_HOLE(first->dst_hole) | ATC_DPIP_BOUNDARY(first->boundary)); + /* Don't allow CPU to reorder channel enable. */ + wmb(); dma_writel(atdma, CHER, atchan->mask); vdbg_dump_regs(atchan); From 85f97c97efc501191bb73020e98d822561d61828 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:02:48 +0300 Subject: [PATCH 0773/1477] dmaengine: at_hdmac: Fix impossible condition commit 28cbe5a0a46a6637adbda52337d7b2777fc04027 upstream. The iterator can not be greater than ATC_MAX_DSCR_TRIALS, as the for loop will stop when i == ATC_MAX_DSCR_TRIALS. While here, use the common "i" name for the iterator. Fixes: 93dce3a6434f ("dmaengine: at_hdmac: fix residue computation") Signed-off-by: Tudor Ambarus Cc: stable@vger.kernel.org Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-15-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/at_hdmac.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 128579d5e0d8..e3a825098140 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -299,7 +299,8 @@ static int atc_get_bytes_left(struct dma_chan *chan, dma_cookie_t cookie) struct at_desc *desc_first = atc_first_active(atchan); struct at_desc *desc; int ret; - u32 ctrla, dscr, trials; + u32 ctrla, dscr; + unsigned int i; /* * If the cookie doesn't match to the currently running transfer then @@ -369,7 +370,7 @@ static int atc_get_bytes_left(struct dma_chan *chan, dma_cookie_t cookie) dscr = channel_readl(atchan, DSCR); rmb(); /* ensure DSCR is read before CTRLA */ ctrla = channel_readl(atchan, CTRLA); - for (trials = 0; trials < ATC_MAX_DSCR_TRIALS; ++trials) { + for (i = 0; i < ATC_MAX_DSCR_TRIALS; ++i) { u32 new_dscr; rmb(); /* ensure DSCR is read after CTRLA */ @@ -395,7 +396,7 @@ static int atc_get_bytes_left(struct dma_chan *chan, dma_cookie_t cookie) rmb(); /* ensure DSCR is read before CTRLA */ ctrla = channel_readl(atchan, CTRLA); } - if (unlikely(trials >= ATC_MAX_DSCR_TRIALS)) + if (unlikely(i == ATC_MAX_DSCR_TRIALS)) return -ETIMEDOUT; /* for the first descriptor we can be more accurate */ From 367bc0fa988fde83dbb6f654067c0d76c86faf68 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:02:49 +0300 Subject: [PATCH 0774/1477] dmaengine: at_hdmac: Check return code of dma_async_device_register commit c47e6403fa099f200868d6b106701cb42d181d2b upstream. dma_async_device_register() can fail, check the return code and display an error. Fixes: dc78baa2b90b ("dmaengine: at_hdmac: new driver for the Atmel AHB DMA Controller") Signed-off-by: Tudor Ambarus Cc: stable@vger.kernel.org Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-16-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/at_hdmac.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index e3a825098140..6a4f9697b574 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -1901,7 +1901,11 @@ static int __init at_dma_probe(struct platform_device *pdev) dma_has_cap(DMA_SLAVE, atdma->dma_common.cap_mask) ? "slave " : "", plat_dat->nr_channels); - dma_async_device_register(&atdma->dma_common); + err = dma_async_device_register(&atdma->dma_common); + if (err) { + dev_err(&pdev->dev, "Unable to register: %d.\n", err); + goto err_dma_async_device_register; + } /* * Do not return an error if the dmac node is not present in order to @@ -1921,6 +1925,7 @@ static int __init at_dma_probe(struct platform_device *pdev) err_of_dma_controller_register: dma_async_device_unregister(&atdma->dma_common); +err_dma_async_device_register: dma_pool_destroy(atdma->memset_pool); err_memset_pool_create: dma_pool_destroy(atdma->dma_desc_pool); From 534762e261c84d43e5d56a780e40278b94c20540 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 7 Nov 2022 18:00:11 +0000 Subject: [PATCH 0775/1477] net: tun: call napi_schedule_prep() to ensure we own a napi commit 07d120aa33cc9d9115753d159f64d20c94458781 upstream. A recent patch exposed another issue in napi_get_frags() caught by syzbot [1] Before feeding packets to GRO, and calling napi_complete() we must first grab NAPI_STATE_SCHED. [1] WARNING: CPU: 0 PID: 3612 at net/core/dev.c:6076 napi_complete_done+0x45b/0x880 net/core/dev.c:6076 Modules linked in: CPU: 0 PID: 3612 Comm: syz-executor408 Not tainted 6.1.0-rc3-syzkaller-00175-g1118b2049d77 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/26/2022 RIP: 0010:napi_complete_done+0x45b/0x880 net/core/dev.c:6076 Code: c1 ea 03 0f b6 14 02 4c 89 f0 83 e0 07 83 c0 03 38 d0 7c 08 84 d2 0f 85 24 04 00 00 41 89 5d 1c e9 73 fc ff ff e8 b5 53 22 fa <0f> 0b e9 82 fe ff ff e8 a9 53 22 fa 48 8b 5c 24 08 31 ff 48 89 de RSP: 0018:ffffc90003c4f920 EFLAGS: 00010293 RAX: 0000000000000000 RBX: 0000000000000030 RCX: 0000000000000000 RDX: ffff8880251c0000 RSI: ffffffff875a58db RDI: 0000000000000007 RBP: 0000000000000001 R08: 0000000000000007 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000001 R12: ffff888072d02628 R13: ffff888072d02618 R14: ffff888072d02634 R15: 0000000000000000 FS: 0000555555f13300(0000) GS:ffff8880b9a00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055c44d3892b8 CR3: 00000000172d2000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: napi_complete include/linux/netdevice.h:510 [inline] tun_get_user+0x206d/0x3a60 drivers/net/tun.c:1980 tun_chr_write_iter+0xdb/0x200 drivers/net/tun.c:2027 call_write_iter include/linux/fs.h:2191 [inline] do_iter_readv_writev+0x20b/0x3b0 fs/read_write.c:735 do_iter_write+0x182/0x700 fs/read_write.c:861 vfs_writev+0x1aa/0x630 fs/read_write.c:934 do_writev+0x133/0x2f0 fs/read_write.c:977 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7f37021a3c19 Fixes: 1118b2049d77 ("net: tun: Fix memory leaks of napi_get_frags") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Wang Yufen Link: https://lore.kernel.org/r/20221107180011.188437-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/tun.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 83662f616b67..cb42fdbfeb32 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1986,18 +1986,25 @@ drop: skb_headlen(skb)); if (unlikely(headlen > skb_headlen(skb))) { + WARN_ON_ONCE(1); + err = -ENOMEM; this_cpu_inc(tun->pcpu_stats->rx_dropped); +napi_busy: napi_free_frags(&tfile->napi); rcu_read_unlock(); mutex_unlock(&tfile->napi_mutex); - WARN_ON(1); - return -ENOMEM; + return err; } - local_bh_disable(); - napi_gro_frags(&tfile->napi); - napi_complete(&tfile->napi); - local_bh_enable(); + if (likely(napi_schedule_prep(&tfile->napi))) { + local_bh_disable(); + napi_gro_frags(&tfile->napi); + napi_complete(&tfile->napi); + local_bh_enable(); + } else { + err = -EBUSY; + goto napi_busy; + } mutex_unlock(&tfile->napi_mutex); } else if (tfile->napi_enabled) { struct sk_buff_head *queue = &tfile->sk.sk_write_queue; From e7294b01de40eb34bed65f376986181fcdeca0cd Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Tue, 17 Nov 2020 08:37:50 -0300 Subject: [PATCH 0776/1477] mmc: sdhci-esdhc-imx: Convert the driver to DT-only commit 854a22997ad5d6c9860a2d695c40cd4004151d5b upstream. Since 5.10-rc1 i.MX is a devicetree-only platform, so simplify the code by removing the unused non-DT support. Signed-off-by: Fabio Estevam Reviewed-by: Haibo Chen Link: https://lore.kernel.org/r/20201117113750.25053-1-festevam@gmail.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-esdhc-imx.c | 86 +----------------------------- 1 file changed, 1 insertion(+), 85 deletions(-) diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index de42b5473f97..449562122adc 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -295,22 +295,6 @@ struct pltfm_imx_data { struct pm_qos_request pm_qos_req; }; -static const struct platform_device_id imx_esdhc_devtype[] = { - { - .name = "sdhci-esdhc-imx25", - .driver_data = (kernel_ulong_t) &esdhc_imx25_data, - }, { - .name = "sdhci-esdhc-imx35", - .driver_data = (kernel_ulong_t) &esdhc_imx35_data, - }, { - .name = "sdhci-esdhc-imx51", - .driver_data = (kernel_ulong_t) &esdhc_imx51_data, - }, { - /* sentinel */ - } -}; -MODULE_DEVICE_TABLE(platform, imx_esdhc_devtype); - static const struct of_device_id imx_esdhc_dt_ids[] = { { .compatible = "fsl,imx25-esdhc", .data = &esdhc_imx25_data, }, { .compatible = "fsl,imx35-esdhc", .data = &esdhc_imx35_data, }, @@ -1546,72 +1530,6 @@ sdhci_esdhc_imx_probe_dt(struct platform_device *pdev, } #endif -static int sdhci_esdhc_imx_probe_nondt(struct platform_device *pdev, - struct sdhci_host *host, - struct pltfm_imx_data *imx_data) -{ - struct esdhc_platform_data *boarddata = &imx_data->boarddata; - int err; - - if (!host->mmc->parent->platform_data) { - dev_err(mmc_dev(host->mmc), "no board data!\n"); - return -EINVAL; - } - - imx_data->boarddata = *((struct esdhc_platform_data *) - host->mmc->parent->platform_data); - /* write_protect */ - if (boarddata->wp_type == ESDHC_WP_GPIO) { - host->mmc->caps2 |= MMC_CAP2_RO_ACTIVE_HIGH; - - err = mmc_gpiod_request_ro(host->mmc, "wp", 0, 0); - if (err) { - dev_err(mmc_dev(host->mmc), - "failed to request write-protect gpio!\n"); - return err; - } - } - - /* card_detect */ - switch (boarddata->cd_type) { - case ESDHC_CD_GPIO: - err = mmc_gpiod_request_cd(host->mmc, "cd", 0, false, 0); - if (err) { - dev_err(mmc_dev(host->mmc), - "failed to request card-detect gpio!\n"); - return err; - } - fallthrough; - - case ESDHC_CD_CONTROLLER: - /* we have a working card_detect back */ - host->quirks &= ~SDHCI_QUIRK_BROKEN_CARD_DETECTION; - break; - - case ESDHC_CD_PERMANENT: - host->mmc->caps |= MMC_CAP_NONREMOVABLE; - break; - - case ESDHC_CD_NONE: - break; - } - - switch (boarddata->max_bus_width) { - case 8: - host->mmc->caps |= MMC_CAP_8_BIT_DATA | MMC_CAP_4_BIT_DATA; - break; - case 4: - host->mmc->caps |= MMC_CAP_4_BIT_DATA; - break; - case 1: - default: - host->quirks |= SDHCI_QUIRK_FORCE_1_BIT_DATA; - break; - } - - return 0; -} - static int sdhci_esdhc_imx_probe(struct platform_device *pdev) { const struct of_device_id *of_id = @@ -1631,8 +1549,7 @@ static int sdhci_esdhc_imx_probe(struct platform_device *pdev) imx_data = sdhci_pltfm_priv(pltfm_host); - imx_data->socdata = of_id ? of_id->data : (struct esdhc_soc_data *) - pdev->id_entry->driver_data; + imx_data->socdata = of_id->data; if (imx_data->socdata->flags & ESDHC_FLAG_PMQOS) cpu_latency_qos_add_request(&imx_data->pm_qos_req, 0); @@ -1944,7 +1861,6 @@ static struct platform_driver sdhci_esdhc_imx_driver = { .of_match_table = imx_esdhc_dt_ids, .pm = &sdhci_esdhc_pmops, }, - .id_table = imx_esdhc_devtype, .probe = sdhci_esdhc_imx_probe, .remove = sdhci_esdhc_imx_remove, }; From 154d744fbefcd13648ff036db2d185319afa74dc Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 14 Nov 2022 12:44:01 +0100 Subject: [PATCH 0777/1477] x86/cpu: Restore AMD's DE_CFG MSR after resume commit 2632daebafd04746b4b96c2f26a6021bc38f6209 upstream. DE_CFG contains the LFENCE serializing bit, restore it on resume too. This is relevant to older families due to the way how they do S3. Unify and correct naming while at it. Fixes: e4d0e84e4907 ("x86/cpu/AMD: Make LFENCE a serializing instruction") Reported-by: Andrew Cooper Reported-by: Pawan Gupta Signed-off-by: Borislav Petkov Cc: Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/msr-index.h | 8 +++++--- arch/x86/kernel/cpu/amd.c | 6 ++---- arch/x86/kernel/cpu/hygon.c | 4 ++-- arch/x86/kvm/svm/svm.c | 10 +++++----- arch/x86/kvm/x86.c | 2 +- arch/x86/power/cpu.c | 1 + tools/arch/x86/include/asm/msr-index.h | 8 +++++--- 7 files changed, 21 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 144dc164b759..5a8ee3b83af2 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -489,6 +489,11 @@ #define MSR_AMD64_CPUID_FN_1 0xc0011004 #define MSR_AMD64_LS_CFG 0xc0011020 #define MSR_AMD64_DC_CFG 0xc0011022 + +#define MSR_AMD64_DE_CFG 0xc0011029 +#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT 1 +#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE BIT_ULL(MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT) + #define MSR_AMD64_BU_CFG2 0xc001102a #define MSR_AMD64_IBSFETCHCTL 0xc0011030 #define MSR_AMD64_IBSFETCHLINAD 0xc0011031 @@ -565,9 +570,6 @@ #define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL #define FAM10H_MMIO_CONF_BASE_SHIFT 20 #define MSR_FAM10H_NODE_ID 0xc001100c -#define MSR_F10H_DECFG 0xc0011029 -#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1 -#define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT) /* K8 MSRs */ #define MSR_K8_TOP_MEM1 0xc001001a diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 8b9e3277a6ce..ec3fa4dc9031 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -822,8 +822,6 @@ static void init_amd_gh(struct cpuinfo_x86 *c) set_cpu_bug(c, X86_BUG_AMD_TLB_MMATCH); } -#define MSR_AMD64_DE_CFG 0xC0011029 - static void init_amd_ln(struct cpuinfo_x86 *c) { /* @@ -1018,8 +1016,8 @@ static void init_amd(struct cpuinfo_x86 *c) * msr_set_bit() uses the safe accessors, too, even if the MSR * is not present. */ - msr_set_bit(MSR_F10H_DECFG, - MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT); + msr_set_bit(MSR_AMD64_DE_CFG, + MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT); /* A serializing LFENCE stops RDTSC speculation */ set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c index 774ca6bfda9f..205fa420ee7c 100644 --- a/arch/x86/kernel/cpu/hygon.c +++ b/arch/x86/kernel/cpu/hygon.c @@ -342,8 +342,8 @@ static void init_hygon(struct cpuinfo_x86 *c) * msr_set_bit() uses the safe accessors, too, even if the MSR * is not present. */ - msr_set_bit(MSR_F10H_DECFG, - MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT); + msr_set_bit(MSR_AMD64_DE_CFG, + MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT); /* A serializing LFENCE stops RDTSC speculation */ set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index a0512a91760d..2b7528821577 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2475,9 +2475,9 @@ static int svm_get_msr_feature(struct kvm_msr_entry *msr) msr->data = 0; switch (msr->index) { - case MSR_F10H_DECFG: - if (boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) - msr->data |= MSR_F10H_DECFG_LFENCE_SERIALIZE; + case MSR_AMD64_DE_CFG: + if (cpu_feature_enabled(X86_FEATURE_LFENCE_RDTSC)) + msr->data |= MSR_AMD64_DE_CFG_LFENCE_SERIALIZE; break; case MSR_IA32_PERF_CAPABILITIES: return 0; @@ -2584,7 +2584,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = 0x1E; } break; - case MSR_F10H_DECFG: + case MSR_AMD64_DE_CFG: msr_info->data = svm->msr_decfg; break; default: @@ -2764,7 +2764,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) case MSR_VM_IGNNE: vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); break; - case MSR_F10H_DECFG: { + case MSR_AMD64_DE_CFG: { struct kvm_msr_entry msr_entry; msr_entry.index = msr->index; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0ac80b3ff0f5..23d7c563e012 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1362,7 +1362,7 @@ static const u32 msr_based_features_all[] = { MSR_IA32_VMX_EPT_VPID_CAP, MSR_IA32_VMX_VMFUNC, - MSR_F10H_DECFG, + MSR_AMD64_DE_CFG, MSR_IA32_UCODE_REV, MSR_IA32_ARCH_CAPABILITIES, MSR_IA32_PERF_CAPABILITIES, diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index d023c85e3c53..61581c45788e 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -522,6 +522,7 @@ static void pm_save_spec_msr(void) MSR_TSX_FORCE_ABORT, MSR_IA32_MCU_OPT_CTRL, MSR_AMD64_LS_CFG, + MSR_AMD64_DE_CFG, }; msr_build_context(spec_msr_id, ARRAY_SIZE(spec_msr_id)); diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 144dc164b759..8fb925676813 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -489,6 +489,11 @@ #define MSR_AMD64_CPUID_FN_1 0xc0011004 #define MSR_AMD64_LS_CFG 0xc0011020 #define MSR_AMD64_DC_CFG 0xc0011022 + +#define MSR_AMD64_DE_CFG 0xc0011029 +#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT 1 +#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE BIT_ULL(MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT) + #define MSR_AMD64_BU_CFG2 0xc001102a #define MSR_AMD64_IBSFETCHCTL 0xc0011030 #define MSR_AMD64_IBSFETCHLINAD 0xc0011031 @@ -565,9 +570,6 @@ #define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL #define FAM10H_MMIO_CONF_BASE_SHIFT 20 #define MSR_FAM10H_NODE_ID 0xc001100c -#define MSR_F10H_DECFG 0xc0011029 -#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1 -#define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT) /* K8 MSRs */ #define MSR_K8_TOP_MEM1 0xc001001a From 0f544353fec8e717d37724d95b92538e1de79e86 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 16 Nov 2022 09:43:39 +0100 Subject: [PATCH 0778/1477] io_uring: kill goto error handling in io_sqpoll_wait_sq() Hunk extracted from commit 70aacfe66136809d7f080f89c492c278298719f4 upstream. If the sqpoll thread has died, the out condition doesn't remove the waiting task from the waitqueue. The goto and check are not needed, just make it a break condition after setting the error value. That ensures that we always remove ourselves from sqo_sq_wait waitqueue. Reported-by: Xingyuan Mo Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- fs/io_uring.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 05f360b66b07..d1cb1addea96 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -9038,7 +9038,7 @@ static int io_sqpoll_wait_sq(struct io_ring_ctx *ctx) if (unlikely(ctx->sqo_dead)) { ret = -EOWNERDEAD; - goto out; + break; } if (!io_sqring_full(ctx)) @@ -9048,7 +9048,6 @@ static int io_sqpoll_wait_sq(struct io_ring_ctx *ctx) } while (!signal_pending(current)); finish_wait(&ctx->sqo_sq_wait, &wait); -out: return ret; } From 41217963b1d97ec170f24fc4155953a2b0835191 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 16 Nov 2022 09:57:20 +0100 Subject: [PATCH 0779/1477] Linux 5.10.155 Link: https://lore.kernel.org/r/20221114124442.530286937@linuxfoundation.org Tested-by: Jon Hunter Tested-by: Pavel Machek (CIP) Tested-by: Shuah Khan Tested-by: Slade Watkins Tested-by: Florian Fainelli Tested-by: Guenter Roeck Tested-by: Salvatore Bonaccorso Tested-by: Sudip Mukherjee Tested-by: Linux Kernel Functional Testing Tested-by: Rudi Heitbaum Tested-by: Hulk Robot Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 43fecb404581..8ccf902b3609 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 10 -SUBLEVEL = 154 +SUBLEVEL = 155 EXTRAVERSION = NAME = Dare mighty things From 3905cfd1d6725336b02ef0d923115ab8c543f246 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 16 Nov 2022 09:54:40 +0000 Subject: [PATCH 0780/1477] Revert "serial: 8250: Toggle IER bits on only after irq has been set up" This reverts commit 572fb97fce35a3cc5694b9c8a7e83c97d11461b9 which is commit 039d4926379b1d1c17b51cf21c500a5eed86899e upstream. It breaks the kernel abi and is not needed for any supported Android devices at this point in time, so it can be safely reverted. Bug: 161946584 Signed-off-by: Greg Kroah-Hartman Change-Id: I07e784e722762dab69f031063317350379e59b76 --- drivers/tty/serial/8250/8250_core.c | 16 +++++----------- drivers/tty/serial/8250/8250_port.c | 8 +++----- include/linux/serial_8250.h | 1 - 3 files changed, 8 insertions(+), 17 deletions(-) diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index 0a7e9491b4d1..aae9d26ce4f4 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -310,9 +310,10 @@ static void serial8250_backup_timeout(struct timer_list *t) jiffies + uart_poll_timeout(&up->port) + HZ / 5); } -static void univ8250_setup_timer(struct uart_8250_port *up) +static int univ8250_setup_irq(struct uart_8250_port *up) { struct uart_port *port = &up->port; + int retval = 0; /* * The above check will only give an accurate result the first time @@ -333,16 +334,10 @@ static void univ8250_setup_timer(struct uart_8250_port *up) */ if (!port->irq) mod_timer(&up->timer, jiffies + uart_poll_timeout(port)); -} + else + retval = serial_link_irq_chain(up); -static int univ8250_setup_irq(struct uart_8250_port *up) -{ - struct uart_port *port = &up->port; - - if (port->irq) - return serial_link_irq_chain(up); - - return 0; + return retval; } static void univ8250_release_irq(struct uart_8250_port *up) @@ -398,7 +393,6 @@ static struct uart_ops univ8250_port_ops; static const struct uart_8250_ops univ8250_driver_ops = { .setup_irq = univ8250_setup_irq, .release_irq = univ8250_release_irq, - .setup_timer = univ8250_setup_timer, }; static struct uart_8250_port serial8250_ports[UART_NR]; diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 3caa4aa193f6..9d60418e4adb 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -2276,10 +2276,6 @@ int serial8250_do_startup(struct uart_port *port) if (port->irq && (up->port.flags & UPF_SHARE_IRQ)) up->port.irqflags |= IRQF_SHARED; - retval = up->ops->setup_irq(up); - if (retval) - goto out; - if (port->irq && !(up->port.flags & UPF_NO_THRE_TEST)) { unsigned char iir1; @@ -2322,7 +2318,9 @@ int serial8250_do_startup(struct uart_port *port) } } - up->ops->setup_timer(up); + retval = up->ops->setup_irq(up); + if (retval) + goto out; /* * Now, initialize the UART diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index 92f3b778d8c2..2b70f736b091 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -74,7 +74,6 @@ struct uart_8250_port; struct uart_8250_ops { int (*setup_irq)(struct uart_8250_port *); void (*release_irq)(struct uart_8250_port *); - void (*setup_timer)(struct uart_8250_port *); }; struct uart_8250_em485 { From bd66e91ad2546d8308628fcdf8d25db2817f3454 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 2 Nov 2022 03:10:11 +0100 Subject: [PATCH 0781/1477] ANDROID: fix up CRC issue with struct tcp_sock Commit 96a3ddb87031 ("tcp: fix tcp_cwnd_validate() to not forget is_cwnd_limited") renames a field in struct tcp_sock from max_packets_seq to cwnd_usage_seq. This is not a real abi break as it is not visible to external modules, but the CRC of the function tcp_sock() is changed. Fix this up with the __GENKSYMS__ hack to preserve the CRC. Bug: 161946584 Fixes: 96a3ddb87031 ("tcp: fix tcp_cwnd_validate() to not forget is_cwnd_limited") Signed-off-by: Greg Kroah-Hartman Change-Id: I29352ecfa5e49f97c3896782f09ffdb8da2dfaaa --- include/linux/tcp.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index a8a6eb9b5dc8..e079555af7b8 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -266,7 +266,14 @@ struct tcp_sock { u32 packets_out; /* Packets which are "in flight" */ u32 retrans_out; /* Retransmitted packets out */ u32 max_packets_out; /* max packets_out in last window */ +/* GENKSYMS hack to preserve the ABI because of f4ce91ce12a7 ("tcp: fix + * tcp_cwnd_validate() to not forget is_cwnd_limited") + */ +#ifndef __GENKSYMS__ u32 cwnd_usage_seq; /* right edge of cwnd usage tracking flight */ +#else + u32 max_packets_seq; /* right edge of max_packets_out flight */ +#endif u16 urg_data; /* Saved octet of OOB data and control flags */ u8 ecn_flags; /* ECN status bits. */ From 6b31c548a114044c01436f04d2fe6da5a0bb59d8 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 30 Oct 2022 12:40:56 +0100 Subject: [PATCH 0782/1477] ANDROID: fix up struct sk_buf ABI breakage In commit c378c479c517 ("io_uring/af_unix: defer registered files gc to io_uring release"), a new field is added to struct sk_buf. We need that new field to resolve real problems, so move it to the padded area that we had reserved for these types of issues. Update the .xml file to handle the change in fields for the padding structure, which is an abi-safe change: Leaf changes summary: 1 artifact changed (1 filtered out) Changed leaf types summary: 1 (1 filtered out) leaf types changed Removed/Changed/Added functions summary: 0 Removed, 0 Changed, 0 Added function Removed/Changed/Added variables summary: 0 Removed, 0 Changed, 0 Added variable 'struct sk_buff at skbuff.h:717:1' changed: type size hasn't changed there are data member changes: data member u64 android_kabi_reserved1 at offset 1472 (in bits) became anonymous data member 'union {struct {__u8 scm_io_uring; __u8 android_kabi_reserved1_padding1; __u16 android_kabi_reserved1_padding2; __u32 android_kabi_reserved1_padding3;}; struct {u64 android_kabi_reserved1;}; union {};}' 169 impacted interfaces Bug: 161946584 Fixes: c378c479c517 ("io_uring/af_unix: defer registered files gc to io_uring release") Signed-off-by: Greg Kroah-Hartman Change-Id: I9f99d85c95d78c688c4c9e88562863a1795e7b3a --- android/abi_gki_aarch64.xml | 1189 ++++++++++++++++++----------------- include/linux/skbuff.h | 17 +- 2 files changed, 622 insertions(+), 584 deletions(-) diff --git a/android/abi_gki_aarch64.xml b/android/abi_gki_aarch64.xml index f32878041ead..c6377fd04f7a 100644 --- a/android/abi_gki_aarch64.xml +++ b/android/abi_gki_aarch64.xml @@ -13532,12 +13532,12 @@ - + - + - + @@ -14876,9 +14876,9 @@ - + - + @@ -18751,42 +18751,42 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -21819,12 +21819,12 @@ - + - + - + @@ -23337,7 +23337,7 @@ - + @@ -27089,12 +27089,12 @@ - + - + - + @@ -38364,6 +38364,20 @@ + + + + + + + + + + + + + + @@ -44932,6 +44946,17 @@ + + + + + + + + + + + @@ -48081,7 +48106,7 @@ - + @@ -51476,12 +51501,12 @@ - + - + - + @@ -58935,36 +58960,36 @@ - + - + - + - + - + - + - + - + - + - + - + @@ -62572,21 +62597,21 @@ - + - + - + - + - + - + @@ -63237,12 +63262,12 @@ - + - + - + @@ -70029,12 +70054,12 @@ - + - + - + @@ -71638,15 +71663,15 @@ - + - + - + - + @@ -74431,12 +74456,12 @@ - + - + - + @@ -75793,18 +75818,18 @@ - + - + - + - + - + @@ -77933,234 +77958,234 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -79783,12 +79808,12 @@ - + - + - + @@ -80915,12 +80940,12 @@ - + - + - + @@ -82157,12 +82182,12 @@ - + - + - + @@ -82749,73 +82774,73 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -83029,12 +83054,12 @@ - + - + - + @@ -86496,15 +86521,15 @@ - + - + - + - + @@ -98224,15 +98249,15 @@ - + - + - + - + @@ -100586,15 +100611,15 @@ - + - + - + - + @@ -103512,199 +103537,199 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -111691,15 +111716,15 @@ - + - + - + - + @@ -113983,33 +114008,33 @@ - + - + - + - + - + - + - + - + - + - + @@ -114702,15 +114727,15 @@ - + - + - + - + @@ -116050,10 +116075,10 @@ - - - - + + + + @@ -116188,16 +116213,16 @@ - - - - + + + + - - - + + + @@ -117166,10 +117191,10 @@ - - - - + + + + @@ -123149,8 +123174,8 @@ - - + + @@ -123520,9 +123545,9 @@ - - - + + + @@ -123808,8 +123833,8 @@ - - + + @@ -123969,8 +123994,8 @@ - - + + @@ -125569,13 +125594,13 @@ - - - + + + - - + + @@ -126036,9 +126061,9 @@ - - - + + + @@ -128607,10 +128632,10 @@ - - - - + + + + @@ -130034,9 +130059,9 @@ - - - + + + @@ -130088,9 +130113,9 @@ - - - + + + @@ -130528,8 +130553,8 @@ - - + + @@ -131089,7 +131114,7 @@ - + @@ -131116,12 +131141,12 @@ - - - - - - + + + + + + @@ -132350,28 +132375,28 @@ - - - + + + - - + + - - + + - - + + - - - - - + + + + + @@ -132404,9 +132429,9 @@ - - - + + + @@ -132424,21 +132449,21 @@ - - - - - + + + + + - - - + + + - - - + + + @@ -132470,9 +132495,9 @@ - - - + + + @@ -134314,9 +134339,9 @@ - - - + + + @@ -135009,103 +135034,103 @@ - - + + - - - + + + - - - + + + - - + + - - + + - - - + + + - - - - - + + + + + - - - - + + + + - - - + + + - - + + - - + + - - - - + + + + - - - + + + - - + + - - - + + + - - - + + + - - + + - - - - - + + + + + - - - - + + + + - - + + @@ -135117,48 +135142,48 @@ - - - + + + - - + + - - - - - - + + + + + + - - - - + + + + - - + + - - + + - - + + - - - + + + @@ -137208,9 +137233,9 @@ - - - + + + @@ -137528,9 +137553,9 @@ - - - + + + @@ -137577,10 +137602,10 @@ - - - - + + + + @@ -139222,14 +139247,14 @@ - - - - + + + + - - + + @@ -139373,7 +139398,7 @@ - + @@ -141634,12 +141659,12 @@ - - + + - - + + @@ -141654,8 +141679,8 @@ - - + + @@ -142531,27 +142556,27 @@ - - + + - - - + + + - - - - + + + + - - + + @@ -142560,11 +142585,11 @@ - - - - - + + + + + @@ -142573,9 +142598,9 @@ - - - + + + @@ -143653,42 +143678,42 @@ - - + + - - + + - - + + - - + + - - - + + + - - + + - - + + - - + + @@ -143743,34 +143768,34 @@ - - + + - - + + - - - + + + - - - + + + - - + + - - - - - - + + + + + + @@ -144199,7 +144224,7 @@ - + @@ -144762,8 +144787,8 @@ - - + + @@ -144780,8 +144805,8 @@ - - + + @@ -144810,8 +144835,8 @@ - - + + @@ -144896,10 +144921,10 @@ - + - + @@ -146218,14 +146243,14 @@ - - - - - - - - + + + + + + + + @@ -148263,9 +148288,9 @@ - - - + + + @@ -148277,8 +148302,8 @@ - - + + @@ -148767,28 +148792,28 @@ - - - - + + + + - - - + + + - - - - + + + + - - - - - + + + + + @@ -148799,38 +148824,38 @@ - - - + + + - - + + - - - - + + + + - - - + + + - - - + + + - - - + + + - - + + @@ -148848,9 +148873,9 @@ - - - + + + @@ -148873,9 +148898,9 @@ - - - + + + @@ -148925,8 +148950,8 @@ - - + + diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9c892c347887..59a6bf66421f 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -863,7 +863,6 @@ struct sk_buff { #ifdef CONFIG_TLS_DEVICE __u8 decrypted:1; #endif - __u8 scm_io_uring:1; #ifdef CONFIG_NET_SCHED __u16 tc_index; /* traffic control index */ @@ -914,7 +913,21 @@ struct sk_buff { __u32 headers_end[0]; /* public: */ - ANDROID_KABI_RESERVE(1); + /* Android KABI preservation. + * + * "open coded" version of ANDROID_KABI_USE() to pack more + * fields/variables into the space that we have. + * + * scm_io_uring is from 04df9719df18 ("io_uring/af_unix: defer + * registered files gc to io_uring release") + */ + _ANDROID_KABI_REPLACE(_ANDROID_KABI_RESERVE(1), + struct { + __u8 scm_io_uring:1; + __u8 android_kabi_reserved1_padding1; + __u16 android_kabi_reserved1_padding2; + __u32 android_kabi_reserved1_padding3; + }); ANDROID_KABI_RESERVE(2); /* These elements must be at the end, see alloc_skb() for details. */ From c73aa2cc41564670d13f79b6197331816aec66ff Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Mon, 10 Oct 2022 19:48:50 +0800 Subject: [PATCH 0783/1477] ASoC: wm5102: Revert "ASoC: wm5102: Fix PM disable depth imbalance in wm5102_probe" [ Upstream commit de71d7567e358effd06dfc3e2a154b25f1331c10 ] This reverts commit fcbb60820cd3008bb44334a0395e5e57ccb77329. The pm_runtime_disable is redundant when error returns in wm5102_probe, we just revert the old patch to fix it. Signed-off-by: Zhang Qilong Acked-by: Charles Keepax Link: https://lore.kernel.org/r/20221010114852.88127-2-zhangqilong3@huawei.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/wm5102.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/wm5102.c b/sound/soc/codecs/wm5102.c index b7f5e5391fdb..2ed3fa67027d 100644 --- a/sound/soc/codecs/wm5102.c +++ b/sound/soc/codecs/wm5102.c @@ -2083,6 +2083,9 @@ static int wm5102_probe(struct platform_device *pdev) regmap_update_bits(arizona->regmap, wm5102_digital_vu[i], WM5102_DIG_VU, WM5102_DIG_VU); + pm_runtime_enable(&pdev->dev); + pm_runtime_idle(&pdev->dev); + ret = arizona_request_irq(arizona, ARIZONA_IRQ_DSP_IRQ1, "ADSP2 Compressed IRQ", wm5102_adsp2_irq, wm5102); @@ -2115,9 +2118,6 @@ static int wm5102_probe(struct platform_device *pdev) goto err_spk_irqs; } - pm_runtime_enable(&pdev->dev); - pm_runtime_idle(&pdev->dev); - return ret; err_spk_irqs: From f8f254c8b50641b62894b511d4b9b49f3f70bb04 Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Mon, 10 Oct 2022 19:48:51 +0800 Subject: [PATCH 0784/1477] ASoC: wm5110: Revert "ASoC: wm5110: Fix PM disable depth imbalance in wm5110_probe" [ Upstream commit 7d4e966f4cd73ff69bf06934e8e14a33fb7ef447 ] This reverts commit 86b46bf1feb83898d89a2b4a8d08d21e9ea277a7. The pm_runtime_disable is redundant when error returns in wm5110_probe, we just revert the old patch to fix it. Signed-off-by: Zhang Qilong Acked-by: Charles Keepax Link: https://lore.kernel.org/r/20221010114852.88127-3-zhangqilong3@huawei.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/wm5110.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/wm5110.c b/sound/soc/codecs/wm5110.c index c158f8b1e8e4..d0cef982215d 100644 --- a/sound/soc/codecs/wm5110.c +++ b/sound/soc/codecs/wm5110.c @@ -2452,6 +2452,9 @@ static int wm5110_probe(struct platform_device *pdev) regmap_update_bits(arizona->regmap, wm5110_digital_vu[i], WM5110_DIG_VU, WM5110_DIG_VU); + pm_runtime_enable(&pdev->dev); + pm_runtime_idle(&pdev->dev); + ret = arizona_request_irq(arizona, ARIZONA_IRQ_DSP_IRQ1, "ADSP2 Compressed IRQ", wm5110_adsp2_irq, wm5110); @@ -2484,9 +2487,6 @@ static int wm5110_probe(struct platform_device *pdev) goto err_spk_irqs; } - pm_runtime_enable(&pdev->dev); - pm_runtime_idle(&pdev->dev); - return ret; err_spk_irqs: From a47606064cc096c8e1d965f2267760168c6c97c4 Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Mon, 10 Oct 2022 19:48:52 +0800 Subject: [PATCH 0785/1477] ASoC: wm8997: Revert "ASoC: wm8997: Fix PM disable depth imbalance in wm8997_probe" [ Upstream commit 68ce83e3bb26feba0fcdd59667fde942b3a600a1 ] This reverts commit 41a736ac20602f64773e80f0f5b32cde1830a44a. The pm_runtime_disable is redundant when error returns in wm8997_probe, we just revert the old patch to fix it. Signed-off-by: Zhang Qilong Acked-by: Charles Keepax Link: https://lore.kernel.org/r/20221010114852.88127-4-zhangqilong3@huawei.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/wm8997.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/wm8997.c b/sound/soc/codecs/wm8997.c index 07378714b013..229f2986cd96 100644 --- a/sound/soc/codecs/wm8997.c +++ b/sound/soc/codecs/wm8997.c @@ -1156,6 +1156,9 @@ static int wm8997_probe(struct platform_device *pdev) regmap_update_bits(arizona->regmap, wm8997_digital_vu[i], WM8997_DIG_VU, WM8997_DIG_VU); + pm_runtime_enable(&pdev->dev); + pm_runtime_idle(&pdev->dev); + arizona_init_common(arizona); ret = arizona_init_vol_limit(arizona); @@ -1174,9 +1177,6 @@ static int wm8997_probe(struct platform_device *pdev) goto err_spk_irqs; } - pm_runtime_enable(&pdev->dev); - pm_runtime_idle(&pdev->dev); - return ret; err_spk_irqs: From c7432616f6aac06a8055d083a779c2bedc8680e6 Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Sat, 8 Oct 2022 22:05:22 +0800 Subject: [PATCH 0786/1477] ASoC: mt6660: Keep the pm_runtime enables before component stuff in mt6660_i2c_probe [ Upstream commit c4ab29b0f3a6f1e167c5a627f7cd036c1d2b7d65 ] It would be better to keep the pm_runtime enables before the IRQ and component stuff. Both of those could start triggering PM runtime events. Signed-off-by: Zhang Qilong Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20221008140522.134912-1-zhangqilong3@huawei.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/mt6660.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/soc/codecs/mt6660.c b/sound/soc/codecs/mt6660.c index e18a58868273..3cee2ea4b85d 100644 --- a/sound/soc/codecs/mt6660.c +++ b/sound/soc/codecs/mt6660.c @@ -504,14 +504,14 @@ static int mt6660_i2c_probe(struct i2c_client *client, dev_err(chip->dev, "read chip revision fail\n"); goto probe_fail; } + pm_runtime_set_active(chip->dev); + pm_runtime_enable(chip->dev); ret = devm_snd_soc_register_component(chip->dev, &mt6660_component_driver, &mt6660_codec_dai, 1); - if (!ret) { - pm_runtime_set_active(chip->dev); - pm_runtime_enable(chip->dev); - } + if (ret) + pm_runtime_disable(chip->dev); return ret; From 6910e7279f5d8db0521bb8bc5ff48c56be51c8e1 Mon Sep 17 00:00:00 2001 From: Xiaolei Wang Date: Mon, 10 Oct 2022 17:20:14 +0800 Subject: [PATCH 0787/1477] ASoC: wm8962: Add an event handler for TEMP_HP and TEMP_SPK [ Upstream commit ee1aa2ae3eaa96e70229fa61deee87ef4528ffdf ] In wm8962 driver, the WM8962_ADDITIONAL_CONTROL_4 is used as a volatile register, but this register mixes a bunch of volatile status bits and a bunch of non-volatile control bits. The dapm widgets TEMP_HP and TEMP_SPK leverages the control bits in this register. After the wm8962 probe, the regmap will bet set to cache only mode, then a read error like below would be triggered when trying to read the initial power state of the dapm widgets TEMP_HP and TEMP_SPK. wm8962 0-001a: ASoC: error at soc_component_read_no_lock on wm8962.0-001a: -16 In order to fix this issue, we add event handler to actually power up/down these widgets. With this change, we also need to explicitly power off these widgets in the wm8962 probe since they are enabled by default. Signed-off-by: Xiaolei Wang Tested-by: Adam Ford Acked-by: Charles Keepax Link: https://lore.kernel.org/r/20221010092014.2229246-1-xiaolei.wang@windriver.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/wm8962.c | 54 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c index 38651022e3d5..21574447650c 100644 --- a/sound/soc/codecs/wm8962.c +++ b/sound/soc/codecs/wm8962.c @@ -1840,6 +1840,49 @@ SOC_SINGLE_TLV("SPKOUTR Mixer DACR Volume", WM8962_SPEAKER_MIXER_5, 4, 1, 0, inmix_tlv), }; +static int tp_event(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event) +{ + int ret, reg, val, mask; + struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm); + + ret = pm_runtime_resume_and_get(component->dev); + if (ret < 0) { + dev_err(component->dev, "Failed to resume device: %d\n", ret); + return ret; + } + + reg = WM8962_ADDITIONAL_CONTROL_4; + + if (!strcmp(w->name, "TEMP_HP")) { + mask = WM8962_TEMP_ENA_HP_MASK; + val = WM8962_TEMP_ENA_HP; + } else if (!strcmp(w->name, "TEMP_SPK")) { + mask = WM8962_TEMP_ENA_SPK_MASK; + val = WM8962_TEMP_ENA_SPK; + } else { + pm_runtime_put(component->dev); + return -EINVAL; + } + + switch (event) { + case SND_SOC_DAPM_POST_PMD: + val = 0; + fallthrough; + case SND_SOC_DAPM_POST_PMU: + ret = snd_soc_component_update_bits(component, reg, mask, val); + break; + default: + WARN(1, "Invalid event %d\n", event); + pm_runtime_put(component->dev); + return -EINVAL; + } + + pm_runtime_put(component->dev); + + return 0; +} + static int cp_event(struct snd_soc_dapm_widget *w, struct snd_kcontrol *kcontrol, int event) { @@ -2133,8 +2176,10 @@ SND_SOC_DAPM_SUPPLY("TOCLK", WM8962_ADDITIONAL_CONTROL_1, 0, 0, NULL, 0), SND_SOC_DAPM_SUPPLY_S("DSP2", 1, WM8962_DSP2_POWER_MANAGEMENT, WM8962_DSP2_ENA_SHIFT, 0, dsp2_event, SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_PRE_PMD), -SND_SOC_DAPM_SUPPLY("TEMP_HP", WM8962_ADDITIONAL_CONTROL_4, 2, 0, NULL, 0), -SND_SOC_DAPM_SUPPLY("TEMP_SPK", WM8962_ADDITIONAL_CONTROL_4, 1, 0, NULL, 0), +SND_SOC_DAPM_SUPPLY("TEMP_HP", SND_SOC_NOPM, 0, 0, tp_event, + SND_SOC_DAPM_POST_PMU|SND_SOC_DAPM_POST_PMD), +SND_SOC_DAPM_SUPPLY("TEMP_SPK", SND_SOC_NOPM, 0, 0, tp_event, + SND_SOC_DAPM_POST_PMU|SND_SOC_DAPM_POST_PMD), SND_SOC_DAPM_MIXER("INPGAL", WM8962_LEFT_INPUT_PGA_CONTROL, 4, 0, inpgal, ARRAY_SIZE(inpgal)), @@ -3760,6 +3805,11 @@ static int wm8962_i2c_probe(struct i2c_client *i2c, if (ret < 0) goto err_pm_runtime; + regmap_update_bits(wm8962->regmap, WM8962_ADDITIONAL_CONTROL_4, + WM8962_TEMP_ENA_HP_MASK, 0); + regmap_update_bits(wm8962->regmap, WM8962_ADDITIONAL_CONTROL_4, + WM8962_TEMP_ENA_SPK_MASK, 0); + regcache_cache_only(wm8962->regmap, true); /* The drivers should power up as needed */ From 0b4d650f905cf332ef74ac95a4cd2edc4817913b Mon Sep 17 00:00:00 2001 From: Mauro Lima Date: Wed, 12 Oct 2022 12:21:35 -0300 Subject: [PATCH 0788/1477] spi: intel: Fix the offset to get the 64K erase opcode [ Upstream commit 6a43cd02ddbc597dc9a1f82c1e433f871a2f6f06 ] According to documentation, the 64K erase opcode is located in VSCC range [16:23] instead of [8:15]. Use the proper value to shift the mask over the correct range. Signed-off-by: Mauro Lima Reviewed-by: Mika Westerberg Link: https://lore.kernel.org/r/20221012152135.28353-1-mauro.lima@eclypsium.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/mtd/spi-nor/controllers/intel-spi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/spi-nor/controllers/intel-spi.c b/drivers/mtd/spi-nor/controllers/intel-spi.c index b54a56a68100..b4b0affd16c8 100644 --- a/drivers/mtd/spi-nor/controllers/intel-spi.c +++ b/drivers/mtd/spi-nor/controllers/intel-spi.c @@ -117,7 +117,7 @@ #define ERASE_OPCODE_SHIFT 8 #define ERASE_OPCODE_MASK (0xff << ERASE_OPCODE_SHIFT) #define ERASE_64K_OPCODE_SHIFT 16 -#define ERASE_64K_OPCODE_MASK (0xff << ERASE_OPCODE_SHIFT) +#define ERASE_64K_OPCODE_MASK (0xff << ERASE_64K_OPCODE_SHIFT) #define INTEL_SPI_TIMEOUT 5000 /* ms */ #define INTEL_SPI_FIFO_SZ 64 From 9aae00961ab3a49ca453b918bd770f878e5318dd Mon Sep 17 00:00:00 2001 From: Siarhei Volkau Date: Sun, 16 Oct 2022 16:26:42 +0300 Subject: [PATCH 0789/1477] ASoC: codecs: jz4725b: add missed Line In power control bit [ Upstream commit 1013999b431b4bcdc1f5ae47dd3338122751db31 ] Line In path stayed powered off during capturing or bypass to mixer. Signed-off-by: Siarhei Volkau Link: https://lore.kernel.org/r/20221016132648.3011729-2-lis8215@gmail.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/jz4725b.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/jz4725b.c b/sound/soc/codecs/jz4725b.c index e49374c72e70..9f6f4e941e55 100644 --- a/sound/soc/codecs/jz4725b.c +++ b/sound/soc/codecs/jz4725b.c @@ -236,7 +236,8 @@ static const struct snd_soc_dapm_widget jz4725b_codec_dapm_widgets[] = { SND_SOC_DAPM_MIXER("DAC to Mixer", JZ4725B_CODEC_REG_CR1, REG_CR1_DACSEL_OFFSET, 0, NULL, 0), - SND_SOC_DAPM_MIXER("Line In", SND_SOC_NOPM, 0, 0, NULL, 0), + SND_SOC_DAPM_MIXER("Line In", JZ4725B_CODEC_REG_PMR1, + REG_PMR1_SB_LIN_OFFSET, 1, NULL, 0), SND_SOC_DAPM_MIXER("HP Out", JZ4725B_CODEC_REG_CR1, REG_CR1_HP_DIS_OFFSET, 1, NULL, 0), From c87945c173853357288485208af5963ce83f01a2 Mon Sep 17 00:00:00 2001 From: Siarhei Volkau Date: Sun, 16 Oct 2022 16:26:43 +0300 Subject: [PATCH 0790/1477] ASoC: codecs: jz4725b: fix reported volume for Master ctl [ Upstream commit 088777bf65b98cfa4b5378119d0a7d49a58ece44 ] DAC volume control is the Master Playback Volume at the moment and it reports wrong levels in alsamixer and other alsa apps. The patch fixes that, as stated in manual on the jz4725b SoC (16.6.3.4 Programmable attenuation: GOD) the ctl range varies from -22.5dB to 0dB with 1.5dB step. Signed-off-by: Siarhei Volkau Link: https://lore.kernel.org/r/20221016132648.3011729-3-lis8215@gmail.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/jz4725b.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/jz4725b.c b/sound/soc/codecs/jz4725b.c index 9f6f4e941e55..6f3d4ead9150 100644 --- a/sound/soc/codecs/jz4725b.c +++ b/sound/soc/codecs/jz4725b.c @@ -142,8 +142,8 @@ struct jz_icdc { struct clk *clk; }; -static const SNDRV_CTL_TLVD_DECLARE_DB_LINEAR(jz4725b_dac_tlv, -2250, 0); static const SNDRV_CTL_TLVD_DECLARE_DB_LINEAR(jz4725b_line_tlv, -1500, 600); +static const SNDRV_CTL_TLVD_DECLARE_DB_SCALE(jz4725b_dac_tlv, -2250, 150, 0); static const struct snd_kcontrol_new jz4725b_codec_controls[] = { SOC_DOUBLE_TLV("Master Playback Volume", From aeb7e8bc0d3eb5fd416d28d42c38436ce469348d Mon Sep 17 00:00:00 2001 From: Siarhei Volkau Date: Sun, 16 Oct 2022 16:26:44 +0300 Subject: [PATCH 0791/1477] ASoC: codecs: jz4725b: use right control for Capture Volume [ Upstream commit 1538e2c8c9b7e7a656effcc6e4e7cfe8c1b405fd ] Line In Bypass control is used as Master Capture at the moment this is completely incorrect. Current control routed to Mixer instead of ADC, thus can't affect Capture path. ADC control shall be used instead. ADC volume control parameters are different, so the patch fixes that as well. Manual says (16.6.3.2 Programmable input attenuation amplifier: PGATM) that gain varies in range 0dB..22.5dB with 1.5dB step. Signed-off-by: Siarhei Volkau Link: https://lore.kernel.org/r/20221016132648.3011729-4-lis8215@gmail.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/jz4725b.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/sound/soc/codecs/jz4725b.c b/sound/soc/codecs/jz4725b.c index 6f3d4ead9150..9dc8d76bf104 100644 --- a/sound/soc/codecs/jz4725b.c +++ b/sound/soc/codecs/jz4725b.c @@ -136,13 +136,16 @@ enum { #define REG_CGR3_GO1L_OFFSET 0 #define REG_CGR3_GO1L_MASK (0x1f << REG_CGR3_GO1L_OFFSET) +#define REG_CGR10_GIL_OFFSET 0 +#define REG_CGR10_GIR_OFFSET 4 + struct jz_icdc { struct regmap *regmap; void __iomem *base; struct clk *clk; }; -static const SNDRV_CTL_TLVD_DECLARE_DB_LINEAR(jz4725b_line_tlv, -1500, 600); +static const SNDRV_CTL_TLVD_DECLARE_DB_SCALE(jz4725b_adc_tlv, 0, 150, 0); static const SNDRV_CTL_TLVD_DECLARE_DB_SCALE(jz4725b_dac_tlv, -2250, 150, 0); static const struct snd_kcontrol_new jz4725b_codec_controls[] = { @@ -151,11 +154,11 @@ static const struct snd_kcontrol_new jz4725b_codec_controls[] = { REG_CGR1_GODL_OFFSET, REG_CGR1_GODR_OFFSET, 0xf, 1, jz4725b_dac_tlv), - SOC_DOUBLE_R_TLV("Master Capture Volume", - JZ4725B_CODEC_REG_CGR3, - JZ4725B_CODEC_REG_CGR2, - REG_CGR2_GO1R_OFFSET, - 0x1f, 1, jz4725b_line_tlv), + SOC_DOUBLE_TLV("Master Capture Volume", + JZ4725B_CODEC_REG_CGR10, + REG_CGR10_GIL_OFFSET, + REG_CGR10_GIR_OFFSET, + 0xf, 0, jz4725b_adc_tlv), SOC_SINGLE("Master Playback Switch", JZ4725B_CODEC_REG_CR1, REG_CR1_DAC_MUTE_OFFSET, 1, 1), From c1f0defecbdcbce8dcd8b6e419e675f6bb2a3c0a Mon Sep 17 00:00:00 2001 From: Siarhei Volkau Date: Sun, 16 Oct 2022 16:26:45 +0300 Subject: [PATCH 0792/1477] ASoC: codecs: jz4725b: fix capture selector naming [ Upstream commit 80852f8268769715db335a22305e81a0c4a38a84 ] At the moment Capture source selector appears on Playback tab in the alsamixer and has a senseless name. Let's fix that. Signed-off-by: Siarhei Volkau Link: https://lore.kernel.org/r/20221016132648.3011729-5-lis8215@gmail.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/jz4725b.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sound/soc/codecs/jz4725b.c b/sound/soc/codecs/jz4725b.c index 9dc8d76bf104..ab744e375367 100644 --- a/sound/soc/codecs/jz4725b.c +++ b/sound/soc/codecs/jz4725b.c @@ -183,7 +183,7 @@ static SOC_VALUE_ENUM_SINGLE_DECL(jz4725b_codec_adc_src_enum, jz4725b_codec_adc_src_texts, jz4725b_codec_adc_src_values); static const struct snd_kcontrol_new jz4725b_codec_adc_src_ctrl = - SOC_DAPM_ENUM("Route", jz4725b_codec_adc_src_enum); + SOC_DAPM_ENUM("ADC Source Capture Route", jz4725b_codec_adc_src_enum); static const struct snd_kcontrol_new jz4725b_codec_mixer_controls[] = { SOC_DAPM_SINGLE("Line In Bypass", JZ4725B_CODEC_REG_CR1, @@ -228,7 +228,7 @@ static const struct snd_soc_dapm_widget jz4725b_codec_dapm_widgets[] = { SND_SOC_DAPM_ADC("ADC", "Capture", JZ4725B_CODEC_REG_PMR1, REG_PMR1_SB_ADC_OFFSET, 1), - SND_SOC_DAPM_MUX("ADC Source", SND_SOC_NOPM, 0, 0, + SND_SOC_DAPM_MUX("ADC Source Capture Route", SND_SOC_NOPM, 0, 0, &jz4725b_codec_adc_src_ctrl), /* Mixer */ @@ -287,11 +287,11 @@ static const struct snd_soc_dapm_route jz4725b_codec_dapm_routes[] = { {"Mixer", NULL, "DAC to Mixer"}, {"Mixer to ADC", NULL, "Mixer"}, - {"ADC Source", "Mixer", "Mixer to ADC"}, - {"ADC Source", "Line In", "Line In"}, - {"ADC Source", "Mic 1", "Mic 1"}, - {"ADC Source", "Mic 2", "Mic 2"}, - {"ADC", NULL, "ADC Source"}, + {"ADC Source Capture Route", "Mixer", "Mixer to ADC"}, + {"ADC Sourc Capture Routee", "Line In", "Line In"}, + {"ADC Source Capture Route", "Mic 1", "Mic 1"}, + {"ADC Source Capture Route", "Mic 2", "Mic 2"}, + {"ADC", NULL, "ADC Source Capture Route"}, {"Out Stage", NULL, "Mixer"}, {"HP Out", NULL, "Out Stage"}, From fdf6807606293aa148254510fa14d8f88e2a8984 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Ca=C3=B1uelo?= Date: Mon, 10 Oct 2022 08:37:02 +0200 Subject: [PATCH 0793/1477] selftests/futex: fix build for clang MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 03cab65a07e083b6c1010fbc8f9b817e9aca75d9 ] Don't use the test-specific header files as source files to force a target dependency, as clang will complain if more than one source file is used for a compile command with a single '-o' flag. Use the proper Makefile variables instead as defined in tools/testing/selftests/lib.mk. Signed-off-by: Ricardo Cañuelo Reviewed-by: André Almeida Signed-off-by: Shuah Khan Signed-off-by: Sasha Levin --- tools/testing/selftests/futex/functional/Makefile | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile index 23207829ec75..6a0ed2e7881e 100644 --- a/tools/testing/selftests/futex/functional/Makefile +++ b/tools/testing/selftests/futex/functional/Makefile @@ -3,11 +3,11 @@ INCLUDES := -I../include -I../../ CFLAGS := $(CFLAGS) -g -O2 -Wall -D_GNU_SOURCE -pthread $(INCLUDES) LDLIBS := -lpthread -lrt -HEADERS := \ +LOCAL_HDRS := \ ../include/futextest.h \ ../include/atomic.h \ ../include/logging.h -TEST_GEN_FILES := \ +TEST_GEN_PROGS := \ futex_wait_timeout \ futex_wait_wouldblock \ futex_requeue_pi \ @@ -21,5 +21,3 @@ TEST_PROGS := run.sh top_srcdir = ../../../../.. KSFT_KHDR_INSTALL := 1 include ../../lib.mk - -$(TEST_GEN_FILES): $(HEADERS) From b1619f0307762526949f95c878119ca80e32149d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Ca=C3=B1uelo?= Date: Mon, 10 Oct 2022 08:38:11 +0200 Subject: [PATCH 0794/1477] selftests/intel_pstate: fix build for ARCH=x86_64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit beb7d862ed4ac6aa14625418970f22a7d55b8615 ] Handle the scenario where the build is launched with the ARCH envvar defined as x86_64. Signed-off-by: Ricardo Cañuelo Signed-off-by: Shuah Khan Signed-off-by: Sasha Levin --- tools/testing/selftests/intel_pstate/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/intel_pstate/Makefile b/tools/testing/selftests/intel_pstate/Makefile index 39f0fa2a8fd6..05d66ef50c97 100644 --- a/tools/testing/selftests/intel_pstate/Makefile +++ b/tools/testing/selftests/intel_pstate/Makefile @@ -2,10 +2,10 @@ CFLAGS := $(CFLAGS) -Wall -D_GNU_SOURCE LDLIBS += -lm -uname_M := $(shell uname -m 2>/dev/null || echo not) -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) +ARCH ?= $(shell uname -m 2>/dev/null || echo not) +ARCH_PROCESSED := $(shell echo $(ARCH) | sed -e s/i.86/x86/ -e s/x86_64/x86/) -ifeq (x86,$(ARCH)) +ifeq (x86,$(ARCH_PROCESSED)) TEST_GEN_FILES := msr aperf endif From bb3edbd09287e515fe289fbedbffed5661a0ebda Mon Sep 17 00:00:00 2001 From: Shuming Fan Date: Wed, 19 Oct 2022 17:57:15 +0800 Subject: [PATCH 0795/1477] ASoC: rt1308-sdw: add the default value of some registers [ Upstream commit 75d8b1662ca5c20cf8365575222abaef18ff1f50 ] The driver missed the default value of register 0xc070/0xc360. This patch adds that default value to avoid invalid register access when the device doesn't be enumerated yet. BugLink: https://github.com/thesofproject/linux/issues/3924 Signed-off-by: Shuming Fan Link: https://lore.kernel.org/r/20221019095715.31082-1-shumingf@realtek.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/rt1308-sdw.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/codecs/rt1308-sdw.h b/sound/soc/codecs/rt1308-sdw.h index c5ce75666dcc..98293d73ebab 100644 --- a/sound/soc/codecs/rt1308-sdw.h +++ b/sound/soc/codecs/rt1308-sdw.h @@ -139,9 +139,11 @@ static const struct reg_default rt1308_reg_defaults[] = { { 0x3005, 0x23 }, { 0x3008, 0x02 }, { 0x300a, 0x00 }, + { 0xc000 | (RT1308_DATA_PATH << 4), 0x00 }, { 0xc003 | (RT1308_DAC_SET << 4), 0x00 }, { 0xc001 | (RT1308_POWER << 4), 0x00 }, { 0xc002 | (RT1308_POWER << 4), 0x00 }, + { 0xc000 | (RT1308_POWER_STATUS << 4), 0x00 }, }; #define RT1308_SDW_OFFSET 0xc000 From f0187227e2b8a8377d6d3847b656099ed81e9f1a Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Thu, 6 Oct 2022 17:26:48 -0400 Subject: [PATCH 0796/1477] drm/amd/display: Remove wrong pipe control lock [ Upstream commit ca08a1725d0d78efca8d2dbdbce5ea70355da0f2 ] When using a device based on DCN32/321, we have an issue where a second 4k@60Hz display does not light up, and the system becomes unresponsive for a few minutes. In the debug process, it was possible to see a hang in the function dcn20_post_unlock_program_front_end in this part: for (j = 0; j < TIMEOUT_FOR_PIPE_ENABLE_MS*1000 && hubp->funcs->hubp_is_flip_pending(hubp); j++) mdelay(1); } The hubp_is_flip_pending always returns positive for waiting pending flips which is a symptom of pipe hang. Additionally, the dmesg log shows this message after a few minutes: BUG: soft lockup - CPU#4 stuck for 26s! ... [ +0.000003] dcn20_post_unlock_program_front_end+0x112/0x340 [amdgpu] [ +0.000171] dc_commit_state_no_check+0x63d/0xbf0 [amdgpu] [ +0.000155] ? dc_validate_global_state+0x358/0x3d0 [amdgpu] [ +0.000154] dc_commit_state+0xe2/0xf0 [amdgpu] This confirmed the hypothesis that we had a pipe hanging somewhere. Next, after checking the ftrace entries, we have the below weird sequence: [..] 2) | dcn10_lock_all_pipes [amdgpu]() { 2) 0.120 us | optc1_is_tg_enabled [amdgpu](); 2) | dcn20_pipe_control_lock [amdgpu]() { 2) | dc_dmub_srv_clear_inbox0_ack [amdgpu]() { 2) 0.121 us | amdgpu_dm_dmub_reg_write [amdgpu](); 2) 0.551 us | } 2) | dc_dmub_srv_send_inbox0_cmd [amdgpu]() { 2) 0.110 us | amdgpu_dm_dmub_reg_write [amdgpu](); 2) 0.511 us | } 2) | dc_dmub_srv_wait_for_inbox0_ack [amdgpu]() { 2) 0.110 us | amdgpu_dm_dmub_reg_read [amdgpu](); 2) 0.110 us | amdgpu_dm_dmub_reg_read [amdgpu](); 2) 0.110 us | amdgpu_dm_dmub_reg_read [amdgpu](); 2) 0.110 us | amdgpu_dm_dmub_reg_read [amdgpu](); 2) 0.110 us | amdgpu_dm_dmub_reg_read [amdgpu](); 2) 0.110 us | amdgpu_dm_dmub_reg_read [amdgpu](); 2) 0.110 us | amdgpu_dm_dmub_reg_read [amdgpu](); [..] We are not expected to read from dmub register so many times and for so long. From the trace log, it was possible to identify that the function dcn20_pipe_control_lock was triggering the dmub operation when it was unnecessary and causing the hang issue. This commit drops the unnecessary dmub code and, consequently, fixes the second display not lighting up the issue. Tested-by: Daniel Wheeler Acked-by: Qingqing Zhuo Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index 8f66eef0c683..c6c4888c6665 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -1746,7 +1746,7 @@ void dcn20_post_unlock_program_front_end( for (j = 0; j < TIMEOUT_FOR_PIPE_ENABLE_MS*1000 && hubp->funcs->hubp_is_flip_pending(hubp); j++) - mdelay(1); + udelay(1); } } From 6cb657722e37561202194bb52ba387a82572ac3f Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Wed, 19 Oct 2022 12:09:18 -0400 Subject: [PATCH 0797/1477] NFSv4: Retry LOCK on OLD_STATEID during delegation return [ Upstream commit f5ea16137a3fa2858620dc9084466491c128535f ] There's a small window where a LOCK sent during a delegation return can race with another OPEN on client, but the open stateid has not yet been updated. In this case, the client doesn't handle the OLD_STATEID error from the server and will lose this lock, emitting: "NFS: nfs4_handle_delegation_recall_error: unhandled error -10024". Fix this by sending the task through the nfs4 error handling in nfs4_lock_done() when we may have to reconcile our stateid with what the server believes it to be. For this case, the result is a retry of the LOCK operation with the updated stateid. Reported-by: Gonzalo Siero Humet Signed-off-by: Benjamin Coddington Signed-off-by: Anna Schumaker Signed-off-by: Sasha Levin --- fs/nfs/nfs4proc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 03f09399abf4..36af3734ac87 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7014,6 +7014,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) { struct nfs4_lockdata *data = calldata; struct nfs4_lock_state *lsp = data->lsp; + struct nfs_server *server = NFS_SERVER(d_inode(data->ctx->dentry)); dprintk("%s: begin!\n", __func__); @@ -7023,8 +7024,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) data->rpc_status = task->tk_status; switch (task->tk_status) { case 0: - renew_lease(NFS_SERVER(d_inode(data->ctx->dentry)), - data->timestamp); + renew_lease(server, data->timestamp); if (data->arg.new_lock && !data->cancelled) { data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS); if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0) @@ -7045,6 +7045,8 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) if (!nfs4_stateid_match(&data->arg.open_stateid, &lsp->ls_state->open_stateid)) goto out_restart; + else if (nfs4_async_handle_error(task, server, lsp->ls_state, NULL) == -EAGAIN) + goto out_restart; } else if (!nfs4_stateid_match(&data->arg.lock_stateid, &lsp->ls_stateid)) goto out_restart; From 959cb0fd6951406da4d25fd756a2c061bf89e88d Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Thu, 20 Oct 2022 16:39:33 +0200 Subject: [PATCH 0798/1477] i2c: tegra: Allocate DMA memory for DMA engine [ Upstream commit cdbf26251d3b35c4ccaea0c3a6de4318f727d3d2 ] When the I2C controllers are running in DMA mode, it is the DMA engine that performs the memory accesses rather than the I2C controller. Pass the DMA engine's struct device pointer to the DMA API to make sure the correct DMA operations are used. This fixes an issue where the DMA engine's SMMU stream ID needs to be misleadingly set for the I2C controllers in device tree. Suggested-by: Robin Murphy Signed-off-by: Thierry Reding Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-tegra.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 8b113ae32dc7..42f1db60ad6f 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -283,6 +283,7 @@ struct tegra_i2c_dev { struct dma_chan *tx_dma_chan; struct dma_chan *rx_dma_chan; unsigned int dma_buf_size; + struct device *dma_dev; dma_addr_t dma_phys; void *dma_buf; @@ -419,7 +420,7 @@ static int tegra_i2c_dma_submit(struct tegra_i2c_dev *i2c_dev, size_t len) static void tegra_i2c_release_dma(struct tegra_i2c_dev *i2c_dev) { if (i2c_dev->dma_buf) { - dma_free_coherent(i2c_dev->dev, i2c_dev->dma_buf_size, + dma_free_coherent(i2c_dev->dma_dev, i2c_dev->dma_buf_size, i2c_dev->dma_buf, i2c_dev->dma_phys); i2c_dev->dma_buf = NULL; } @@ -466,10 +467,13 @@ static int tegra_i2c_init_dma(struct tegra_i2c_dev *i2c_dev) i2c_dev->tx_dma_chan = chan; + WARN_ON(i2c_dev->tx_dma_chan->device != i2c_dev->rx_dma_chan->device); + i2c_dev->dma_dev = chan->device->dev; + i2c_dev->dma_buf_size = i2c_dev->hw->quirks->max_write_len + I2C_PACKET_HEADER_SIZE; - dma_buf = dma_alloc_coherent(i2c_dev->dev, i2c_dev->dma_buf_size, + dma_buf = dma_alloc_coherent(i2c_dev->dma_dev, i2c_dev->dma_buf_size, &dma_phys, GFP_KERNEL | __GFP_NOWARN); if (!dma_buf) { dev_err(i2c_dev->dev, "failed to allocate DMA buffer\n"); @@ -1255,7 +1259,7 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, if (i2c_dev->dma_mode) { if (i2c_dev->msg_read) { - dma_sync_single_for_device(i2c_dev->dev, + dma_sync_single_for_device(i2c_dev->dma_dev, i2c_dev->dma_phys, xfer_size, DMA_FROM_DEVICE); @@ -1263,7 +1267,7 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, if (err) return err; } else { - dma_sync_single_for_cpu(i2c_dev->dev, + dma_sync_single_for_cpu(i2c_dev->dma_dev, i2c_dev->dma_phys, xfer_size, DMA_TO_DEVICE); } @@ -1276,7 +1280,7 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, memcpy(i2c_dev->dma_buf + I2C_PACKET_HEADER_SIZE, msg->buf, msg->len); - dma_sync_single_for_device(i2c_dev->dev, + dma_sync_single_for_device(i2c_dev->dma_dev, i2c_dev->dma_phys, xfer_size, DMA_TO_DEVICE); @@ -1327,7 +1331,7 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, } if (i2c_dev->msg_read && i2c_dev->msg_err == I2C_ERR_NONE) { - dma_sync_single_for_cpu(i2c_dev->dev, + dma_sync_single_for_cpu(i2c_dev->dma_dev, i2c_dev->dma_phys, xfer_size, DMA_FROM_DEVICE); From df2747f295ac265dfa62889bdd07f5b7932b2d18 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Thu, 6 Oct 2022 16:54:40 +0200 Subject: [PATCH 0799/1477] i2c: i801: add lis3lv02d's I2C address for Vostro 5568 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d6643d7207c572c1b0305ed505101f15502c6c87 ] Dell Vostro 5568 laptop has lis3lv02d, but its i2c address is not known to the kernel. Add this address. Output of "cat /sys/devices/platform/lis3lv02d/position" on Dell Vostro 5568 laptop: - Horizontal: (-18,0,1044) - Front elevated: (522,-18,1080) - Left elevated: (-18,-360,1080) - Upside down: (36,108,-1134) Signed-off-by: Nam Cao Reviewed-by: Jean Delvare Reviewed-by: Pali Rohár Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-i801.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index 5618c1ff34dc..45682d30d705 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -1275,6 +1275,7 @@ static const struct { */ { "Latitude 5480", 0x29 }, { "Vostro V131", 0x1d }, + { "Vostro 5568", 0x29 }, }; static void register_dell_lis3lv02d_i2c_device(struct i801_priv *priv) From 16743c4bf3ef433cdd9babcd4d627b3387ad947d Mon Sep 17 00:00:00 2001 From: Nathan Huckleberry Date: Tue, 13 Sep 2022 13:55:44 -0700 Subject: [PATCH 0800/1477] drm/imx: imx-tve: Fix return type of imx_tve_connector_mode_valid [ Upstream commit fc007fb815ab5395c3962c09b79a1630b0fbed9c ] The mode_valid field in drm_connector_helper_funcs is expected to be of type: enum drm_mode_status (* mode_valid) (struct drm_connector *connector, struct drm_display_mode *mode); The mismatched return type breaks forward edge kCFI since the underlying function definition does not match the function hook definition. The return type of imx_tve_connector_mode_valid should be changed from int to enum drm_mode_status. Reported-by: Dan Carpenter Link: https://github.com/ClangBuiltLinux/linux/issues/1703 Cc: llvm@lists.linux.dev Signed-off-by: Nathan Huckleberry Reviewed-by: Nathan Chancellor Reviewed-by: Fabio Estevam Reviewed-by: Philipp Zabel Signed-off-by: Philipp Zabel Link: https://patchwork.freedesktop.org/patch/msgid/20220913205544.155106-1-nhuck@google.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/imx/imx-tve.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/imx/imx-tve.c b/drivers/gpu/drm/imx/imx-tve.c index 2a8d2e32e7b4..9fe6a4733106 100644 --- a/drivers/gpu/drm/imx/imx-tve.c +++ b/drivers/gpu/drm/imx/imx-tve.c @@ -212,8 +212,9 @@ static int imx_tve_connector_get_modes(struct drm_connector *connector) return ret; } -static int imx_tve_connector_mode_valid(struct drm_connector *connector, - struct drm_display_mode *mode) +static enum drm_mode_status +imx_tve_connector_mode_valid(struct drm_connector *connector, + struct drm_display_mode *mode) { struct imx_tve *tve = con_to_tve(connector); unsigned long rate; From ce75e9085988e51dbf2f57fd7c49aa4457fa892e Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 1 Nov 2022 16:15:40 +0000 Subject: [PATCH 0801/1477] btrfs: remove pointless and double ulist frees in error paths of qgroup tests [ Upstream commit d0ea17aec12ea0f7b9d2ed727d8ef8169d1e7699 ] Several places in the qgroup self tests follow the pattern of freeing the ulist pointer they passed to btrfs_find_all_roots() if the call to that function returned an error. That is pointless because that function always frees the ulist in case it returns an error. Also In some places like at test_multiple_refs(), after a call to btrfs_qgroup_account_extent() we also leave "old_roots" and "new_roots" pointing to ulists that were freed, because btrfs_qgroup_account_extent() has freed those ulists, and if after that the next call to btrfs_find_all_roots() fails, we call ulist_free() on the "old_roots" ulist again, resulting in a double free. So remove those calls to reduce the code size and avoid double ulist free in case of an error. Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/tests/qgroup-tests.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c index c4b31dccc184..289366c98f5b 100644 --- a/fs/btrfs/tests/qgroup-tests.c +++ b/fs/btrfs/tests/qgroup-tests.c @@ -230,7 +230,6 @@ static int test_no_shared_qgroup(struct btrfs_root *root, ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false); if (ret) { - ulist_free(old_roots); test_err("couldn't find old roots: %d", ret); return ret; } @@ -246,7 +245,6 @@ static int test_no_shared_qgroup(struct btrfs_root *root, false); if (ret) { ulist_free(old_roots); - ulist_free(new_roots); test_err("couldn't find old roots: %d", ret); return ret; } @@ -258,18 +256,19 @@ static int test_no_shared_qgroup(struct btrfs_root *root, return ret; } + /* btrfs_qgroup_account_extent() always frees the ulists passed to it. */ + old_roots = NULL; + new_roots = NULL; + if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID, nodesize, nodesize)) { test_err("qgroup counts didn't match expected values"); return -EINVAL; } - old_roots = NULL; - new_roots = NULL; ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false); if (ret) { - ulist_free(old_roots); test_err("couldn't find old roots: %d", ret); return ret; } @@ -284,7 +283,6 @@ static int test_no_shared_qgroup(struct btrfs_root *root, false); if (ret) { ulist_free(old_roots); - ulist_free(new_roots); test_err("couldn't find old roots: %d", ret); return ret; } @@ -335,7 +333,6 @@ static int test_multiple_refs(struct btrfs_root *root, ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false); if (ret) { - ulist_free(old_roots); test_err("couldn't find old roots: %d", ret); return ret; } @@ -351,7 +348,6 @@ static int test_multiple_refs(struct btrfs_root *root, false); if (ret) { ulist_free(old_roots); - ulist_free(new_roots); test_err("couldn't find old roots: %d", ret); return ret; } @@ -372,7 +368,6 @@ static int test_multiple_refs(struct btrfs_root *root, ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false); if (ret) { - ulist_free(old_roots); test_err("couldn't find old roots: %d", ret); return ret; } @@ -388,7 +383,6 @@ static int test_multiple_refs(struct btrfs_root *root, false); if (ret) { ulist_free(old_roots); - ulist_free(new_roots); test_err("couldn't find old roots: %d", ret); return ret; } @@ -415,7 +409,6 @@ static int test_multiple_refs(struct btrfs_root *root, ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false); if (ret) { - ulist_free(old_roots); test_err("couldn't find old roots: %d", ret); return ret; } @@ -431,7 +424,6 @@ static int test_multiple_refs(struct btrfs_root *root, false); if (ret) { ulist_free(old_roots); - ulist_free(new_roots); test_err("couldn't find old roots: %d", ret); return ret; } From bd487932408d462ed86b10833da35c61f618f62f Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 31 Oct 2022 16:10:33 -0700 Subject: [PATCH 0802/1477] Bluetooth: L2CAP: Fix l2cap_global_chan_by_psm [ Upstream commit f937b758a188d6fd328a81367087eddbb2fce50f ] l2cap_global_chan_by_psm shall not return fixed channels as they are not meant to be connected by (S)PSM. Signed-off-by: Luiz Augusto von Dentz Reviewed-by: Tedd Ho-Jeong An Signed-off-by: Sasha Levin --- net/bluetooth/l2cap_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index e69e96ef4927..c5e4d2b8cb0b 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1986,7 +1986,7 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm, if (link_type == LE_LINK && c->src_type == BDADDR_BREDR) continue; - if (c->psm == psm) { + if (c->chan_type != L2CAP_CHAN_FIXED && c->psm == psm) { int src_match, dst_match; int src_any, dst_any; From 24839d027c8352104c92f1fe5d2b8e59fca0c442 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 19 Oct 2022 08:16:39 +0100 Subject: [PATCH 0803/1477] ASoC: codecs: jz4725b: Fix spelling mistake "Sourc" -> "Source", "Routee" -> "Route" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit df496157a5afa1b6d1f4c46ad6549c2c346d1e59 ] There are two spelling mistakes in codec routing description. Fix it. Signed-off-by: Colin Ian King Reviewed-by: Philippe Mathieu-Daudé Acked-by: Paul Cercueil Link: https://lore.kernel.org/r/20221019071639.1003730-1-colin.i.king@gmail.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/jz4725b.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/jz4725b.c b/sound/soc/codecs/jz4725b.c index ab744e375367..8a830d0ad950 100644 --- a/sound/soc/codecs/jz4725b.c +++ b/sound/soc/codecs/jz4725b.c @@ -288,7 +288,7 @@ static const struct snd_soc_dapm_route jz4725b_codec_dapm_routes[] = { {"Mixer to ADC", NULL, "Mixer"}, {"ADC Source Capture Route", "Mixer", "Mixer to ADC"}, - {"ADC Sourc Capture Routee", "Line In", "Line In"}, + {"ADC Source Capture Route", "Line In", "Line In"}, {"ADC Source Capture Route", "Mic 1", "Mic 1"}, {"ADC Source Capture Route", "Mic 2", "Mic 2"}, {"ADC", NULL, "ADC Source Capture Route"}, From a326fffdc78bc535152ea400beb811977a1da8d3 Mon Sep 17 00:00:00 2001 From: Alexander Sergeyev Date: Fri, 14 Jan 2022 19:50:50 +0300 Subject: [PATCH 0804/1477] ALSA: hda/realtek: fix speakers and micmute on HP 855 G8 [ Upstream commit 91502a9a0b0d5252cf3f32ebd898823c2f5aadab ] There are several PCI ids associated with HP EliteBook 855 G8 Notebook PC. Commit 0e68c4b11f1e6 ("ALSA: hda/realtek: fix mute/micmute LEDs for HP 855 G8") covers 0x103c:0x8896, while this commit covers 0x103c:0x8895 which needs some additional work on top of the quirk from 0e68c4b11f1e6. Note that the device can boot up with working speakers and micmute LED without this patch, but the success rate would be quite low (order of 16 working boots across 709 boots) at least for the built-in drivers scenario. This also means that there are some timing issues during early boot and this patch is a workaround. With this patch applied speakers and headphones are consistenly working, as well as mute/micmute LEDs and the internal microphone. Signed-off-by: Alexander Sergeyev Link: https://lore.kernel.org/r/20220114165050.ouw2nknuspclynro@localhost.localdomain Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/patch_realtek.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index e3f6b930ad4a..d427e8939a0e 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6883,6 +6883,7 @@ enum { ALC256_FIXUP_MIC_NO_PRESENCE_AND_RESUME, ALC285_FIXUP_LEGION_Y9000X_SPEAKERS, ALC285_FIXUP_LEGION_Y9000X_AUTOMUTE, + ALC285_FIXUP_HP_SPEAKERS_MICMUTE_LED, }; /* A special fixup for Lenovo C940 and Yoga Duet 7; @@ -8693,6 +8694,16 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC }, + [ALC285_FIXUP_HP_SPEAKERS_MICMUTE_LED] = { + .type = HDA_FIXUP_VERBS, + .v.verbs = (const struct hda_verb[]) { + { 0x20, AC_VERB_SET_COEF_INDEX, 0x19 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x8e11 }, + { } + }, + .chained = true, + .chain_id = ALC285_FIXUP_HP_MUTE_LED, + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -8915,6 +8926,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8870, "HP ZBook Fury 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x8873, "HP ZBook Studio 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x888d, "HP ZBook Power 15.6 inch G8 Mobile Workstation PC", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8895, "HP EliteBook 855 G8 Notebook PC", ALC285_FIXUP_HP_SPEAKERS_MICMUTE_LED), SND_PCI_QUIRK(0x103c, 0x8896, "HP EliteBook 855 G8 Notebook PC", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x89aa, "HP EliteBook 630 G9", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), From 23793518a7523887266769d9345c683e08c590e8 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 9 Feb 2022 15:27:04 +0300 Subject: [PATCH 0805/1477] mtd: spi-nor: intel-spi: Disable write protection only if asked [ Upstream commit cd149eff8d2201a63c074a6d9d03e52926aa535d ] Currently the driver tries to disable the BIOS write protection automatically even if this is not what the user wants. For this reason modify the driver so that by default it does not touch the write protection. Only if specifically asked by the user (setting writeable=1 command line parameter) the driver tries to disable the BIOS write protection. Signed-off-by: Mika Westerberg Reviewed-by: Andy Shevchenko Reviewed-by: Mauro Lima Reviewed-by: Tudor Ambarus Acked-by: Lee Jones Link: https://lore.kernel.org/r/20220209122706.42439-2-mika.westerberg@linux.intel.com Signed-off-by: Mark Brown Stable-dep-of: 92a66cbf6b30 ("spi: intel: Use correct mask for flash and protected regions") Signed-off-by: Sasha Levin --- drivers/mfd/lpc_ich.c | 59 +++++++++++++++++-- .../mtd/spi-nor/controllers/intel-spi-pci.c | 29 +++++---- drivers/mtd/spi-nor/controllers/intel-spi.c | 41 ++++++------- include/linux/platform_data/intel-spi.h | 6 +- 4 files changed, 96 insertions(+), 39 deletions(-) diff --git a/drivers/mfd/lpc_ich.c b/drivers/mfd/lpc_ich.c index 3bbb29a7e7a5..2411b7a2e6f4 100644 --- a/drivers/mfd/lpc_ich.c +++ b/drivers/mfd/lpc_ich.c @@ -63,6 +63,8 @@ #define SPIBASE_BYT 0x54 #define SPIBASE_BYT_SZ 512 #define SPIBASE_BYT_EN BIT(1) +#define BYT_BCR 0xfc +#define BYT_BCR_WPD BIT(0) #define SPIBASE_LPT 0x3800 #define SPIBASE_LPT_SZ 512 @@ -1083,12 +1085,57 @@ wdt_done: return ret; } +static bool lpc_ich_byt_set_writeable(void __iomem *base, void *data) +{ + u32 val; + + val = readl(base + BYT_BCR); + if (!(val & BYT_BCR_WPD)) { + val |= BYT_BCR_WPD; + writel(val, base + BYT_BCR); + val = readl(base + BYT_BCR); + } + + return val & BYT_BCR_WPD; +} + +static bool lpc_ich_lpt_set_writeable(void __iomem *base, void *data) +{ + struct pci_dev *pdev = data; + u32 bcr; + + pci_read_config_dword(pdev, BCR, &bcr); + if (!(bcr & BCR_WPD)) { + bcr |= BCR_WPD; + pci_write_config_dword(pdev, BCR, bcr); + pci_read_config_dword(pdev, BCR, &bcr); + } + + return bcr & BCR_WPD; +} + +static bool lpc_ich_bxt_set_writeable(void __iomem *base, void *data) +{ + unsigned int spi = PCI_DEVFN(13, 2); + struct pci_bus *bus = data; + u32 bcr; + + pci_bus_read_config_dword(bus, spi, BCR, &bcr); + if (!(bcr & BCR_WPD)) { + bcr |= BCR_WPD; + pci_bus_write_config_dword(bus, spi, BCR, bcr); + pci_bus_read_config_dword(bus, spi, BCR, &bcr); + } + + return bcr & BCR_WPD; +} + static int lpc_ich_init_spi(struct pci_dev *dev) { struct lpc_ich_priv *priv = pci_get_drvdata(dev); struct resource *res = &intel_spi_res[0]; struct intel_spi_boardinfo *info; - u32 spi_base, rcba, bcr; + u32 spi_base, rcba; info = devm_kzalloc(&dev->dev, sizeof(*info), GFP_KERNEL); if (!info) @@ -1102,6 +1149,8 @@ static int lpc_ich_init_spi(struct pci_dev *dev) if (spi_base & SPIBASE_BYT_EN) { res->start = spi_base & ~(SPIBASE_BYT_SZ - 1); res->end = res->start + SPIBASE_BYT_SZ - 1; + + info->set_writeable = lpc_ich_byt_set_writeable; } break; @@ -1112,8 +1161,8 @@ static int lpc_ich_init_spi(struct pci_dev *dev) res->start = spi_base + SPIBASE_LPT; res->end = res->start + SPIBASE_LPT_SZ - 1; - pci_read_config_dword(dev, BCR, &bcr); - info->writeable = !!(bcr & BCR_WPD); + info->set_writeable = lpc_ich_lpt_set_writeable; + info->data = dev; } break; @@ -1134,8 +1183,8 @@ static int lpc_ich_init_spi(struct pci_dev *dev) res->start = spi_base & 0xfffffff0; res->end = res->start + SPIBASE_APL_SZ - 1; - pci_bus_read_config_dword(bus, spi, BCR, &bcr); - info->writeable = !!(bcr & BCR_WPD); + info->set_writeable = lpc_ich_bxt_set_writeable; + info->data = bus; } pci_bus_write_config_byte(bus, p2sb, 0xe1, 0x1); diff --git a/drivers/mtd/spi-nor/controllers/intel-spi-pci.c b/drivers/mtd/spi-nor/controllers/intel-spi-pci.c index 555fe55d14ae..8a3c1f3c2d2e 100644 --- a/drivers/mtd/spi-nor/controllers/intel-spi-pci.c +++ b/drivers/mtd/spi-nor/controllers/intel-spi-pci.c @@ -16,12 +16,30 @@ #define BCR 0xdc #define BCR_WPD BIT(0) +static bool intel_spi_pci_set_writeable(void __iomem *base, void *data) +{ + struct pci_dev *pdev = data; + u32 bcr; + + /* Try to make the chip read/write */ + pci_read_config_dword(pdev, BCR, &bcr); + if (!(bcr & BCR_WPD)) { + bcr |= BCR_WPD; + pci_write_config_dword(pdev, BCR, bcr); + pci_read_config_dword(pdev, BCR, &bcr); + } + + return bcr & BCR_WPD; +} + static const struct intel_spi_boardinfo bxt_info = { .type = INTEL_SPI_BXT, + .set_writeable = intel_spi_pci_set_writeable, }; static const struct intel_spi_boardinfo cnl_info = { .type = INTEL_SPI_CNL, + .set_writeable = intel_spi_pci_set_writeable, }; static int intel_spi_pci_probe(struct pci_dev *pdev, @@ -29,7 +47,6 @@ static int intel_spi_pci_probe(struct pci_dev *pdev, { struct intel_spi_boardinfo *info; struct intel_spi *ispi; - u32 bcr; int ret; ret = pcim_enable_device(pdev); @@ -41,15 +58,7 @@ static int intel_spi_pci_probe(struct pci_dev *pdev, if (!info) return -ENOMEM; - /* Try to make the chip read/write */ - pci_read_config_dword(pdev, BCR, &bcr); - if (!(bcr & BCR_WPD)) { - bcr |= BCR_WPD; - pci_write_config_dword(pdev, BCR, bcr); - pci_read_config_dword(pdev, BCR, &bcr); - } - info->writeable = !!(bcr & BCR_WPD); - + info->data = pdev; ispi = intel_spi_probe(&pdev->dev, &pdev->resource[0], info); if (IS_ERR(ispi)) return PTR_ERR(ispi); diff --git a/drivers/mtd/spi-nor/controllers/intel-spi.c b/drivers/mtd/spi-nor/controllers/intel-spi.c index b4b0affd16c8..65f41c0781bf 100644 --- a/drivers/mtd/spi-nor/controllers/intel-spi.c +++ b/drivers/mtd/spi-nor/controllers/intel-spi.c @@ -132,7 +132,6 @@ * @sregs: Start of software sequencer registers * @nregions: Maximum number of regions * @pr_num: Maximum number of protected range registers - * @writeable: Is the chip writeable * @locked: Is SPI setting locked * @swseq_reg: Use SW sequencer in register reads/writes * @swseq_erase: Use SW sequencer in erase operation @@ -150,7 +149,6 @@ struct intel_spi { void __iomem *sregs; size_t nregions; size_t pr_num; - bool writeable; bool locked; bool swseq_reg; bool swseq_erase; @@ -305,6 +303,14 @@ static int intel_spi_wait_sw_busy(struct intel_spi *ispi) INTEL_SPI_TIMEOUT * 1000); } +static bool intel_spi_set_writeable(struct intel_spi *ispi) +{ + if (!ispi->info->set_writeable) + return false; + + return ispi->info->set_writeable(ispi->base, ispi->info->data); +} + static int intel_spi_init(struct intel_spi *ispi) { u32 opmenu0, opmenu1, lvscc, uvscc, val; @@ -317,19 +323,6 @@ static int intel_spi_init(struct intel_spi *ispi) ispi->nregions = BYT_FREG_NUM; ispi->pr_num = BYT_PR_NUM; ispi->swseq_reg = true; - - if (writeable) { - /* Disable write protection */ - val = readl(ispi->base + BYT_BCR); - if (!(val & BYT_BCR_WPD)) { - val |= BYT_BCR_WPD; - writel(val, ispi->base + BYT_BCR); - val = readl(ispi->base + BYT_BCR); - } - - ispi->writeable = !!(val & BYT_BCR_WPD); - } - break; case INTEL_SPI_LPT: @@ -359,6 +352,12 @@ static int intel_spi_init(struct intel_spi *ispi) return -EINVAL; } + /* Try to disable write protection if user asked to do so */ + if (writeable && !intel_spi_set_writeable(ispi)) { + dev_warn(ispi->dev, "can't disable chip write protection\n"); + writeable = false; + } + /* Disable #SMI generation from HW sequencer */ val = readl(ispi->base + HSFSTS_CTL); val &= ~HSFSTS_CTL_FSMIE; @@ -885,9 +884,12 @@ static void intel_spi_fill_partition(struct intel_spi *ispi, /* * If any of the regions have protection bits set, make the * whole partition read-only to be on the safe side. + * + * Also if the user did not ask the chip to be writeable + * mask the bit too. */ - if (intel_spi_is_protected(ispi, base, limit)) - ispi->writeable = false; + if (!writeable || intel_spi_is_protected(ispi, base, limit)) + part->mask_flags |= MTD_WRITEABLE; end = (limit << 12) + 4096; if (end > part->size) @@ -928,7 +930,6 @@ struct intel_spi *intel_spi_probe(struct device *dev, ispi->dev = dev; ispi->info = info; - ispi->writeable = info->writeable; ret = intel_spi_init(ispi); if (ret) @@ -946,10 +947,6 @@ struct intel_spi *intel_spi_probe(struct device *dev, intel_spi_fill_partition(ispi, &part); - /* Prevent writes if not explicitly enabled */ - if (!ispi->writeable || !writeable) - ispi->nor.mtd.flags &= ~MTD_WRITEABLE; - ret = mtd_device_register(&ispi->nor.mtd, &part, 1); if (ret) return ERR_PTR(ret); diff --git a/include/linux/platform_data/intel-spi.h b/include/linux/platform_data/intel-spi.h index 7f53a5c6f35e..7dda3f690465 100644 --- a/include/linux/platform_data/intel-spi.h +++ b/include/linux/platform_data/intel-spi.h @@ -19,11 +19,13 @@ enum intel_spi_type { /** * struct intel_spi_boardinfo - Board specific data for Intel SPI driver * @type: Type which this controller is compatible with - * @writeable: The chip is writeable + * @set_writeable: Try to make the chip writeable (optional) + * @data: Data to be passed to @set_writeable can be %NULL */ struct intel_spi_boardinfo { enum intel_spi_type type; - bool writeable; + bool (*set_writeable)(void __iomem *base, void *data); + void *data; }; #endif /* INTEL_SPI_PDATA_H */ From 65ac4d1807d2dd4a77833902b1d4c2e73edf7d37 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Tue, 25 Oct 2022 09:28:00 +0300 Subject: [PATCH 0806/1477] spi: intel: Use correct mask for flash and protected regions [ Upstream commit 92a66cbf6b30eda5719fbdfb24cd15fb341bba32 ] The flash and protected region mask is actually 0x7fff (30:16 and 14:0) and not 0x3fff so fix this accordingly. While there use GENMASK() instead. Cc: stable@vger.kernel.org Signed-off-by: Mika Westerberg Link: https://lore.kernel.org/r/20221025062800.22357-1-mika.westerberg@linux.intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/mtd/spi-nor/controllers/intel-spi.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/mtd/spi-nor/controllers/intel-spi.c b/drivers/mtd/spi-nor/controllers/intel-spi.c index 65f41c0781bf..6c802db6b4af 100644 --- a/drivers/mtd/spi-nor/controllers/intel-spi.c +++ b/drivers/mtd/spi-nor/controllers/intel-spi.c @@ -53,17 +53,17 @@ #define FRACC 0x50 #define FREG(n) (0x54 + ((n) * 4)) -#define FREG_BASE_MASK 0x3fff +#define FREG_BASE_MASK GENMASK(14, 0) #define FREG_LIMIT_SHIFT 16 -#define FREG_LIMIT_MASK (0x03fff << FREG_LIMIT_SHIFT) +#define FREG_LIMIT_MASK GENMASK(30, 16) /* Offset is from @ispi->pregs */ #define PR(n) ((n) * 4) #define PR_WPE BIT(31) #define PR_LIMIT_SHIFT 16 -#define PR_LIMIT_MASK (0x3fff << PR_LIMIT_SHIFT) +#define PR_LIMIT_MASK GENMASK(30, 16) #define PR_RPE BIT(15) -#define PR_BASE_MASK 0x3fff +#define PR_BASE_MASK GENMASK(14, 0) /* Offsets are from @ispi->sregs */ #define SSFSTS_CTL 0x00 From 7daab001a6f618a1ac797a6ec40f231e8666d93e Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Tue, 8 Nov 2022 15:45:03 +0800 Subject: [PATCH 0807/1477] mmc: sdhci-esdhc-imx: use the correct host caps for MMC_CAP_8_BIT_DATA [ Upstream commit f002f45a00ee14214d96b18b9a555fe2c56afb20 ] MMC_CAP_8_BIT_DATA belongs to struct mmc_host, not struct sdhci_host. So correct it here. Fixes: 1ed5c3b22fc7 ("mmc: sdhci-esdhc-imx: Propagate ESDHC_FLAG_HS400* only on 8bit bus") Signed-off-by: Haibo Chen Cc: stable@vger.kernel.org Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/1667893503-20583-1-git-send-email-haibo.chen@nxp.com Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/mmc/host/sdhci-esdhc-imx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index 449562122adc..1f1bdd34dd55 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -1621,14 +1621,14 @@ static int sdhci_esdhc_imx_probe(struct platform_device *pdev) if (imx_data->socdata->flags & ESDHC_FLAG_ERR004536) host->quirks |= SDHCI_QUIRK_BROKEN_ADMA; - if (host->caps & MMC_CAP_8_BIT_DATA && + if (host->mmc->caps & MMC_CAP_8_BIT_DATA && imx_data->socdata->flags & ESDHC_FLAG_HS400) host->quirks2 |= SDHCI_QUIRK2_CAPS_BIT63_FOR_HS400; if (imx_data->socdata->flags & ESDHC_FLAG_BROKEN_AUTO_CMD23) host->quirks2 |= SDHCI_QUIRK2_ACMD23_BROKEN; - if (host->caps & MMC_CAP_8_BIT_DATA && + if (host->mmc->caps & MMC_CAP_8_BIT_DATA && imx_data->socdata->flags & ESDHC_FLAG_HS400_ES) { host->mmc->caps2 |= MMC_CAP2_HS400_ES; host->mmc_host_ops.hs400_enhanced_strobe = From 6958556285ec640e73fea18ed48b2514ebd2359e Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Mon, 7 Dec 2020 16:21:03 +0800 Subject: [PATCH 0808/1477] drm/amd/pm: support power source switch on Sienna Cichlid [ Upstream commit 18a4b3de5fc1c63c80e3be0673886431a56e4307 ] Enable power source switch on Sienna Cichlid. Signed-off-by: Evan Quan Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher Stable-dep-of: 0c85c067c9d9 ("drm/amdgpu: disable BACO on special BEIGE_GOBY card") Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 49d7fa1d0842..834ac633281c 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -301,6 +301,9 @@ static int sienna_cichlid_check_powerplay_table(struct smu_context *smu) table_context->power_play_table; struct smu_baco_context *smu_baco = &smu->smu_baco; + if (powerplay_table->platform_caps & SMU_11_0_7_PP_PLATFORM_CAP_HARDWAREDC) + smu->dc_controlled_by_gpio = true; + if (powerplay_table->platform_caps & SMU_11_0_7_PP_PLATFORM_CAP_BACO || powerplay_table->platform_caps & SMU_11_0_7_PP_PLATFORM_CAP_MACO) smu_baco->platform_support = true; @@ -2806,6 +2809,7 @@ static const struct pptable_funcs sienna_cichlid_ppt_funcs = { .get_dpm_ultimate_freq = sienna_cichlid_get_dpm_ultimate_freq, .set_soft_freq_limited_range = smu_v11_0_set_soft_freq_limited_range, .run_btc = sienna_cichlid_run_btc, + .set_power_source = smu_v11_0_set_power_source, .get_pp_feature_mask = smu_cmn_get_pp_feature_mask, .set_pp_feature_mask = smu_cmn_set_pp_feature_mask, .get_gpu_metrics = sienna_cichlid_get_gpu_metrics, From b0faeff69a0a6829642b7d3bf06bcc103f8aae39 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Fri, 4 Jun 2021 15:33:48 +0800 Subject: [PATCH 0809/1477] drm/amd/pm: Read BIF STRAP also for BACO check [ Upstream commit 458020dd4f7109693d4857ed320398e662e8899a ] Avoid reading BIF STRAP each time for BACO capability. Read the STRAP value while checking BACO capability in PPTable. Signed-off-by: Lijo Lazar Reviewed-by: Evan Quan Signed-off-by: Alex Deucher Stable-dep-of: 0c85c067c9d9 ("drm/amdgpu: disable BACO on special BEIGE_GOBY card") Signed-off-by: Sasha Levin --- .../gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c | 25 ++++++++++++----- .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c | 27 ++++++++++++++----- .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 27 ++++++++++++++----- 3 files changed, 59 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c index 1c526cb239e0..3a31058b029e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c @@ -379,16 +379,31 @@ static int arcturus_set_default_dpm_table(struct smu_context *smu) return 0; } -static int arcturus_check_powerplay_table(struct smu_context *smu) +static void arcturus_check_bxco_support(struct smu_context *smu) { struct smu_table_context *table_context = &smu->smu_table; struct smu_11_0_powerplay_table *powerplay_table = table_context->power_play_table; struct smu_baco_context *smu_baco = &smu->smu_baco; + struct amdgpu_device *adev = smu->adev; + uint32_t val; if (powerplay_table->platform_caps & SMU_11_0_PP_PLATFORM_CAP_BACO || - powerplay_table->platform_caps & SMU_11_0_PP_PLATFORM_CAP_MACO) - smu_baco->platform_support = true; + powerplay_table->platform_caps & SMU_11_0_PP_PLATFORM_CAP_MACO) { + val = RREG32_SOC15(NBIO, 0, mmRCC_BIF_STRAP0); + smu_baco->platform_support = + (val & RCC_BIF_STRAP0__STRAP_PX_CAPABLE_MASK) ? true : + false; + } +} + +static int arcturus_check_powerplay_table(struct smu_context *smu) +{ + struct smu_table_context *table_context = &smu->smu_table; + struct smu_11_0_powerplay_table *powerplay_table = + table_context->power_play_table; + + arcturus_check_bxco_support(smu); table_context->thermal_controller_type = powerplay_table->thermal_controller_type; @@ -2131,13 +2146,11 @@ static void arcturus_get_unique_id(struct smu_context *smu) static bool arcturus_is_baco_supported(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; - uint32_t val; if (!smu_v11_0_baco_is_support(smu) || amdgpu_sriov_vf(adev)) return false; - val = RREG32_SOC15(NBIO, 0, mmRCC_BIF_STRAP0); - return (val & RCC_BIF_STRAP0__STRAP_PX_CAPABLE_MASK) ? true : false; + return true; } static int arcturus_set_df_cstate(struct smu_context *smu, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c index 2937784bc824..a7773b6453d5 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c @@ -338,19 +338,34 @@ navi10_get_allowed_feature_mask(struct smu_context *smu, return 0; } -static int navi10_check_powerplay_table(struct smu_context *smu) +static void navi10_check_bxco_support(struct smu_context *smu) { struct smu_table_context *table_context = &smu->smu_table; struct smu_11_0_powerplay_table *powerplay_table = table_context->power_play_table; struct smu_baco_context *smu_baco = &smu->smu_baco; + struct amdgpu_device *adev = smu->adev; + uint32_t val; + + if (powerplay_table->platform_caps & SMU_11_0_PP_PLATFORM_CAP_BACO || + powerplay_table->platform_caps & SMU_11_0_PP_PLATFORM_CAP_MACO) { + val = RREG32_SOC15(NBIO, 0, mmRCC_BIF_STRAP0); + smu_baco->platform_support = + (val & RCC_BIF_STRAP0__STRAP_PX_CAPABLE_MASK) ? true : + false; + } +} + +static int navi10_check_powerplay_table(struct smu_context *smu) +{ + struct smu_table_context *table_context = &smu->smu_table; + struct smu_11_0_powerplay_table *powerplay_table = + table_context->power_play_table; if (powerplay_table->platform_caps & SMU_11_0_PP_PLATFORM_CAP_HARDWAREDC) smu->dc_controlled_by_gpio = true; - if (powerplay_table->platform_caps & SMU_11_0_PP_PLATFORM_CAP_BACO || - powerplay_table->platform_caps & SMU_11_0_PP_PLATFORM_CAP_MACO) - smu_baco->platform_support = true; + navi10_check_bxco_support(smu); table_context->thermal_controller_type = powerplay_table->thermal_controller_type; @@ -1948,13 +1963,11 @@ static int navi10_overdrive_get_gfx_clk_base_voltage(struct smu_context *smu, static bool navi10_is_baco_supported(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; - uint32_t val; if (amdgpu_sriov_vf(adev) || (!smu_v11_0_baco_is_support(smu))) return false; - val = RREG32_SOC15(NBIO, 0, mmRCC_BIF_STRAP0); - return (val & RCC_BIF_STRAP0__STRAP_PX_CAPABLE_MASK) ? true : false; + return true; } static int navi10_set_default_od_settings(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 834ac633281c..def32b6897f9 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -294,19 +294,34 @@ sienna_cichlid_get_allowed_feature_mask(struct smu_context *smu, return 0; } -static int sienna_cichlid_check_powerplay_table(struct smu_context *smu) +static void sienna_cichlid_check_bxco_support(struct smu_context *smu) { struct smu_table_context *table_context = &smu->smu_table; struct smu_11_0_7_powerplay_table *powerplay_table = table_context->power_play_table; struct smu_baco_context *smu_baco = &smu->smu_baco; + struct amdgpu_device *adev = smu->adev; + uint32_t val; + + if (powerplay_table->platform_caps & SMU_11_0_7_PP_PLATFORM_CAP_BACO || + powerplay_table->platform_caps & SMU_11_0_7_PP_PLATFORM_CAP_MACO) { + val = RREG32_SOC15(NBIO, 0, mmRCC_BIF_STRAP0); + smu_baco->platform_support = + (val & RCC_BIF_STRAP0__STRAP_PX_CAPABLE_MASK) ? true : + false; + } +} + +static int sienna_cichlid_check_powerplay_table(struct smu_context *smu) +{ + struct smu_table_context *table_context = &smu->smu_table; + struct smu_11_0_7_powerplay_table *powerplay_table = + table_context->power_play_table; if (powerplay_table->platform_caps & SMU_11_0_7_PP_PLATFORM_CAP_HARDWAREDC) smu->dc_controlled_by_gpio = true; - if (powerplay_table->platform_caps & SMU_11_0_7_PP_PLATFORM_CAP_BACO || - powerplay_table->platform_caps & SMU_11_0_7_PP_PLATFORM_CAP_MACO) - smu_baco->platform_support = true; + sienna_cichlid_check_bxco_support(smu); table_context->thermal_controller_type = powerplay_table->thermal_controller_type; @@ -1739,13 +1754,11 @@ static int sienna_cichlid_run_btc(struct smu_context *smu) static bool sienna_cichlid_is_baco_supported(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; - uint32_t val; if (amdgpu_sriov_vf(adev) || (!smu_v11_0_baco_is_support(smu))) return false; - val = RREG32_SOC15(NBIO, 0, mmRCC_BIF_STRAP0); - return (val & RCC_BIF_STRAP0__STRAP_PX_CAPABLE_MASK) ? true : false; + return true; } static bool sienna_cichlid_is_mode1_reset_supported(struct smu_context *smu) From f3adf0adf306e82c343206f5cfa87cccf3ca6f82 Mon Sep 17 00:00:00 2001 From: Guchun Chen Date: Wed, 7 Sep 2022 20:31:36 +0800 Subject: [PATCH 0810/1477] drm/amd/pm: disable BACO entry/exit completely on several sienna cichlid cards [ Upstream commit 7bb91228291aa95bfee3b9d5710887673711c74c ] To avoid hardware intermittent failures. Signed-off-by: Guchun Chen Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher Stable-dep-of: 0c85c067c9d9 ("drm/amdgpu: disable BACO on special BEIGE_GOBY card") Signed-off-by: Sasha Levin --- .../gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index def32b6897f9..91026d0c1c79 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -309,6 +309,17 @@ static void sienna_cichlid_check_bxco_support(struct smu_context *smu) smu_baco->platform_support = (val & RCC_BIF_STRAP0__STRAP_PX_CAPABLE_MASK) ? true : false; + + /* + * Disable BACO entry/exit completely on below SKUs to + * avoid hardware intermittent failures. + */ + if (((adev->pdev->device == 0x73A1) && + (adev->pdev->revision == 0x00)) || + ((adev->pdev->device == 0x73BF) && + (adev->pdev->revision == 0xCF))) + smu_baco->platform_support = false; + } } From a180da5564b5ad0270b28b85fc946b06c11058b5 Mon Sep 17 00:00:00 2001 From: Guchun Chen Date: Mon, 7 Nov 2022 16:46:59 +0800 Subject: [PATCH 0811/1477] drm/amdgpu: disable BACO on special BEIGE_GOBY card [ Upstream commit 0c85c067c9d9d7a1b2cc2e01a236d5d0d4a872b5 ] Still avoid intermittent failure. Signed-off-by: Guchun Chen Reviewed-by: Lijo Lazar Acked-by: Evan Quan Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 91026d0c1c79..45c815262200 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -317,7 +317,9 @@ static void sienna_cichlid_check_bxco_support(struct smu_context *smu) if (((adev->pdev->device == 0x73A1) && (adev->pdev->revision == 0x00)) || ((adev->pdev->device == 0x73BF) && - (adev->pdev->revision == 0xCF))) + (adev->pdev->revision == 0xCF)) || + ((adev->pdev->device == 0x7422) && + (adev->pdev->revision == 0x00))) smu_baco->platform_support = false; } From 38ca9bd336c8affd46a33b944ad2b33cecfbd476 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Tue, 18 Oct 2022 20:35:13 +0200 Subject: [PATCH 0812/1477] spi: stm32: Print summary 'callbacks suppressed' message [ Upstream commit 195583504be28df5d608a4677dd796117aea875f ] The original fix "spi: stm32: Rate-limit the 'Communication suspended' message" still leads to "stm32h7_spi_irq_thread: 1696 callbacks suppressed" spew in the kernel log. Since this 'Communication suspended' message is a debug print, add RATELIMIT_MSG_ON_RELEASE flag to inhibit the "callbacks suspended" part during normal operation and only print summary at the end. Fixes: ea8be08cc9358 ("spi: stm32: Rate-limit the 'Communication suspended' message") Signed-off-by: Marek Vasut Link: https://lore.kernel.org/r/20221018183513.206706-1-marex@denx.de Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-stm32.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c index a6dfc8fef20c..651a6510fb54 100644 --- a/drivers/spi/spi-stm32.c +++ b/drivers/spi/spi-stm32.c @@ -941,6 +941,7 @@ static irqreturn_t stm32h7_spi_irq_thread(int irq, void *dev_id) static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL * 10, 1); + ratelimit_set_flags(&rs, RATELIMIT_MSG_ON_RELEASE); if (__ratelimit(&rs)) dev_dbg_ratelimited(spi->dev, "Communication suspended\n"); if (!spi->cur_usedma && (spi->rx_buf && (spi->rx_len > 0))) From 8d21554ec7680e9585fb852d933203c3db60dad1 Mon Sep 17 00:00:00 2001 From: Chen Zhongjin Date: Fri, 28 Oct 2022 11:16:03 +0800 Subject: [PATCH 0813/1477] ASoC: core: Fix use-after-free in snd_soc_exit() [ Upstream commit 6ec27c53886c8963729885bcf2dd996eba2767a7 ] KASAN reports a use-after-free: BUG: KASAN: use-after-free in device_del+0xb5b/0xc60 Read of size 8 at addr ffff888008655050 by task rmmod/387 CPU: 2 PID: 387 Comm: rmmod Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) Call Trace: dump_stack_lvl+0x79/0x9a print_report+0x17f/0x47b kasan_report+0xbb/0xf0 device_del+0xb5b/0xc60 platform_device_del.part.0+0x24/0x200 platform_device_unregister+0x2e/0x40 snd_soc_exit+0xa/0x22 [snd_soc_core] __do_sys_delete_module.constprop.0+0x34f/0x5b0 do_syscall_64+0x3a/0x90 entry_SYSCALL_64_after_hwframe+0x63/0xcd ... It's bacause in snd_soc_init(), snd_soc_util_init() is possble to fail, but its ret is ignored, which makes soc_dummy_dev unregistered twice. snd_soc_init() snd_soc_util_init() platform_device_register_simple(soc_dummy_dev) platform_driver_register() # fail platform_device_unregister(soc_dummy_dev) platform_driver_register() # success ... snd_soc_exit() snd_soc_util_exit() # soc_dummy_dev will be unregistered for second time To fix it, handle error and stop snd_soc_init() when util_init() fail. Also clean debugfs when util_init() or driver_register() fail. Fixes: fb257897bf20 ("ASoC: Work around allmodconfig failure") Signed-off-by: Chen Zhongjin Link: https://lore.kernel.org/r/20221028031603.59416-1-chenzhongjin@huawei.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/soc-core.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index a6d6d10cd471..e9da95ebccc8 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -3178,10 +3178,23 @@ EXPORT_SYMBOL_GPL(snd_soc_of_get_dai_link_codecs); static int __init snd_soc_init(void) { - snd_soc_debugfs_init(); - snd_soc_util_init(); + int ret; - return platform_driver_register(&soc_driver); + snd_soc_debugfs_init(); + ret = snd_soc_util_init(); + if (ret) + goto err_util_init; + + ret = platform_driver_register(&soc_driver); + if (ret) + goto err_register; + return 0; + +err_register: + snd_soc_util_exit(); +err_util_init: + snd_soc_debugfs_exit(); + return ret; } module_init(snd_soc_init); From 9e82d78fbe54f5ca02e0397271a8b5207ccb07b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Povi=C5=A1er?= Date: Thu, 27 Oct 2022 11:57:58 +0200 Subject: [PATCH 0814/1477] ASoC: tas2770: Fix set_tdm_slot in case of single slot MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e59bf547a7dd366f93bfebb7487959580ca6c0ec ] There's a special branch in the set_tdm_slot op for the case of nslots being 1, but: (1) That branch can never work (there's a check for tx_mask being non-zero, later there's another check for it *being* zero; one or the other always throws -EINVAL). (2) The intention of the branch seems to be what the general other branch reduces to in case of nslots being 1. For those reasons remove the 'nslots being 1' special case. Fixes: 1a476abc723e ("tas2770: add tas2770 smart PA kernel driver") Suggested-by: Jos Dehaes Signed-off-by: Martin Povišer Link: https://lore.kernel.org/r/20221027095800.16094-1-povik+lin@cutebit.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/tas2770.c | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/sound/soc/codecs/tas2770.c b/sound/soc/codecs/tas2770.c index 171bbcc919d5..c213c8096142 100644 --- a/sound/soc/codecs/tas2770.c +++ b/sound/soc/codecs/tas2770.c @@ -395,21 +395,13 @@ static int tas2770_set_dai_tdm_slot(struct snd_soc_dai *dai, if (tx_mask == 0 || rx_mask != 0) return -EINVAL; - if (slots == 1) { - if (tx_mask != 1) - return -EINVAL; - - left_slot = 0; - right_slot = 0; + left_slot = __ffs(tx_mask); + tx_mask &= ~(1 << left_slot); + if (tx_mask == 0) { + right_slot = left_slot; } else { - left_slot = __ffs(tx_mask); - tx_mask &= ~(1 << left_slot); - if (tx_mask == 0) { - right_slot = left_slot; - } else { - right_slot = __ffs(tx_mask); - tx_mask &= ~(1 << right_slot); - } + right_slot = __ffs(tx_mask); + tx_mask &= ~(1 << right_slot); } if (tx_mask != 0 || left_slot >= slots || right_slot >= slots) From f5dedad4059b99ad3f6ce9f9c74c0cf1ea3cb1ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Povi=C5=A1er?= Date: Thu, 27 Oct 2022 11:57:59 +0200 Subject: [PATCH 0815/1477] ASoC: tas2764: Fix set_tdm_slot in case of single slot MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit faac764ea1ea6898d93e46c403271fb105c0906e ] There's a special branch in the set_tdm_slot op for the case of nslots being 1, but: (1) That branch can never work (there's a check for tx_mask being non-zero, later there's another check for it *being* zero; one or the other always throws -EINVAL). (2) The intention of the branch seems to be what the general other branch reduces to in case of nslots being 1. For those reasons remove the 'nslots being 1' special case. Fixes: 827ed8a0fa50 ("ASoC: tas2764: Add the driver for the TAS2764") Suggested-by: Jos Dehaes Signed-off-by: Martin Povišer Link: https://lore.kernel.org/r/20221027095800.16094-2-povik+lin@cutebit.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/tas2764.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/sound/soc/codecs/tas2764.c b/sound/soc/codecs/tas2764.c index 8b262e7f5275..c8f6f5122cac 100644 --- a/sound/soc/codecs/tas2764.c +++ b/sound/soc/codecs/tas2764.c @@ -386,20 +386,13 @@ static int tas2764_set_dai_tdm_slot(struct snd_soc_dai *dai, if (tx_mask == 0 || rx_mask != 0) return -EINVAL; - if (slots == 1) { - if (tx_mask != 1) - return -EINVAL; - left_slot = 0; - right_slot = 0; + left_slot = __ffs(tx_mask); + tx_mask &= ~(1 << left_slot); + if (tx_mask == 0) { + right_slot = left_slot; } else { - left_slot = __ffs(tx_mask); - tx_mask &= ~(1 << left_slot); - if (tx_mask == 0) { - right_slot = left_slot; - } else { - right_slot = __ffs(tx_mask); - tx_mask &= ~(1 << right_slot); - } + right_slot = __ffs(tx_mask); + tx_mask &= ~(1 << right_slot); } if (tx_mask != 0 || left_slot >= slots || right_slot >= slots) From 85cdbf04b435d40db127869afb6611f13df9a02c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 6 Jun 2022 13:04:33 +0300 Subject: [PATCH 0816/1477] serial: 8250: Remove serial_rs485 sanitization from em485 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 84f2faa7852e1f55d89bb0c99b3a672b87b11f87 ] Serial core handles serial_rs485 sanitization. When em485 init fails, there are two possible paths of entry: 1) uart_rs485_config (init path) that fully clears port->rs485 on error. 2) ioctl path with a pre-existing, valid port->rs485 unto which the kernel falls back on error and port->rs485 should therefore be kept untouched. The temporary rs485 struct is not returned to userspace in case of error so its flag don't matter. ...Thus SER_RS485_ENABLED clearing on error can/should be dropped. Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20220606100433.13793-37-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman Stable-dep-of: 93810191f5d2 ("serial: 8250: omap: Fix missing PM runtime calls for omap8250_set_mctrl()") Signed-off-by: Sasha Levin --- drivers/tty/serial/8250/8250_port.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index f648fd1d7548..7cdfc2458d36 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -661,13 +661,6 @@ int serial8250_em485_config(struct uart_port *port, struct serial_rs485 *rs485) rs485->flags &= ~SER_RS485_RTS_AFTER_SEND; } - /* clamp the delays to [0, 100ms] */ - rs485->delay_rts_before_send = min(rs485->delay_rts_before_send, 100U); - rs485->delay_rts_after_send = min(rs485->delay_rts_after_send, 100U); - - memset(rs485->padding, 0, sizeof(rs485->padding)); - port->rs485 = *rs485; - gpiod_set_value(port->rs485_term_gpio, rs485->flags & SER_RS485_TERMINATE_BUS); @@ -675,15 +668,8 @@ int serial8250_em485_config(struct uart_port *port, struct serial_rs485 *rs485) * Both serial8250_em485_init() and serial8250_em485_destroy() * are idempotent. */ - if (rs485->flags & SER_RS485_ENABLED) { - int ret = serial8250_em485_init(up); - - if (ret) { - rs485->flags &= ~SER_RS485_ENABLED; - port->rs485.flags &= ~SER_RS485_ENABLED; - } - return ret; - } + if (rs485->flags & SER_RS485_ENABLED) + return serial8250_em485_init(up); serial8250_em485_destroy(up); return 0; From f14c312c2189a39b5b720da7ec783f9f1daf5112 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 24 Oct 2022 09:36:13 +0300 Subject: [PATCH 0817/1477] serial: 8250: omap: Fix missing PM runtime calls for omap8250_set_mctrl() [ Upstream commit 93810191f5d23652c0b8a1a9b3a4a89d6fd5063e ] There are cases where omap8250_set_mctrl() may get called after the UART has already autoidled causing an asynchronous external abort. This can happen on ttyport_open(): mem_serial_in from omap8250_set_mctrl+0x38/0xa0 omap8250_set_mctrl from uart_update_mctrl+0x4c/0x58 uart_update_mctrl from uart_dtr_rts+0x60/0xa8 uart_dtr_rts from tty_port_block_til_ready+0xd0/0x2a8 tty_port_block_til_ready from uart_open+0x14/0x1c uart_open from ttyport_open+0x64/0x148 And on ttyport_close(): omap8250_set_mctrl from uart_update_mctrl+0x3c/0x48 uart_update_mctrl from uart_dtr_rts+0x54/0x9c uart_dtr_rts from tty_port_shutdown+0x78/0x9c tty_port_shutdown from tty_port_close+0x3c/0x74 tty_port_close from ttyport_close+0x40/0x58 It can also happen on disassociate_ctty() calling uart_shutdown() that ends up calling omap8250_set_mctrl(). Let's fix the issue by adding missing PM runtime calls to omap8250_set_mctrl(). To do this, we need to add __omap8250_set_mctrl() that can be called from both omap8250_set_mctrl(), and from runtime PM resume path when restoring the registers. Fixes: 61929cf0169d ("tty: serial: Add 8250-core based omap driver") Reported-by: Merlijn Wajer Reported-by: Romain Naour Reported-by: Ivaylo Dimitrov Tested-by: Ivaylo Dimitrov Signed-off-by: Tony Lindgren Depends-on: dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter") Link: https://lore.kernel.org/r/20221024063613.25943-1-tony@atomide.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/8250/8250_omap.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index f3744ac805ec..7c7cfd6d48d8 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -157,7 +157,11 @@ static u32 uart_read(struct uart_8250_port *up, u32 reg) return readl(up->port.membase + (reg << up->port.regshift)); } -static void omap8250_set_mctrl(struct uart_port *port, unsigned int mctrl) +/* + * Called on runtime PM resume path from omap8250_restore_regs(), and + * omap8250_set_mctrl(). + */ +static void __omap8250_set_mctrl(struct uart_port *port, unsigned int mctrl) { struct uart_8250_port *up = up_to_u8250p(port); struct omap8250_priv *priv = up->port.private_data; @@ -181,6 +185,20 @@ static void omap8250_set_mctrl(struct uart_port *port, unsigned int mctrl) } } +static void omap8250_set_mctrl(struct uart_port *port, unsigned int mctrl) +{ + int err; + + err = pm_runtime_resume_and_get(port->dev); + if (err) + return; + + __omap8250_set_mctrl(port, mctrl); + + pm_runtime_mark_last_busy(port->dev); + pm_runtime_put_autosuspend(port->dev); +} + /* * Work Around for Errata i202 (2430, 3430, 3630, 4430 and 4460) * The access to uart register after MDR1 Access @@ -341,7 +359,7 @@ static void omap8250_restore_regs(struct uart_8250_port *up) omap8250_update_mdr1(up, priv); - up->port.ops->set_mctrl(&up->port, up->port.mctrl); + __omap8250_set_mctrl(&up->port, up->port.mctrl); if (up->port.rs485.flags & SER_RS485_ENABLED) serial8250_em485_stop_tx(up); From b0b6ea651ecf1fc17d4841d42b37dede1195841b Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Thu, 13 Oct 2022 13:23:39 +0200 Subject: [PATCH 0818/1477] serial: 8250_omap: remove wait loop from Errata i202 workaround MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e828e56684d61b17317e0cfdef83791fa61cb76b ] We were occasionally seeing the "Errata i202: timedout" on an AM335x board when repeatedly opening and closing a UART connected to an active sender. As new input may arrive at any time, it is possible to miss the "RX FIFO empty" condition, forcing the loop to wait until it times out. Nothing in the i202 Advisory states that such a wait is even necessary; other FIFO clear functions like serial8250_clear_fifos() do not wait either. For this reason, it seems safe to remove the wait, fixing the mentioned issue. Fixes: 61929cf0169d ("tty: serial: Add 8250-core based omap driver") Reviewed-by: Ilpo Järvinen Signed-off-by: Matthias Schiffer Link: https://lore.kernel.org/r/20221013112339.2540767-1-matthias.schiffer@ew.tq-group.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/8250/8250_omap.c | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index 7c7cfd6d48d8..655553a3c78a 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -211,27 +211,10 @@ static void omap8250_set_mctrl(struct uart_port *port, unsigned int mctrl) static void omap_8250_mdr1_errataset(struct uart_8250_port *up, struct omap8250_priv *priv) { - u8 timeout = 255; - serial_out(up, UART_OMAP_MDR1, priv->mdr1); udelay(2); serial_out(up, UART_FCR, up->fcr | UART_FCR_CLEAR_XMIT | UART_FCR_CLEAR_RCVR); - /* - * Wait for FIFO to empty: when empty, RX_FIFO_E bit is 0 and - * TX_FIFO_E bit is 1. - */ - while (UART_LSR_THRE != (serial_in(up, UART_LSR) & - (UART_LSR_THRE | UART_LSR_DR))) { - timeout--; - if (!timeout) { - /* Should *never* happen. we warn and carry on */ - dev_crit(up->port.dev, "Errata i202: timedout %x\n", - serial_in(up, UART_LSR)); - break; - } - udelay(1); - } } static void omap_8250_get_divisor(struct uart_port *port, unsigned int baud, From d833cba201adf9237168e19f0d76e4d7aa69f303 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 28 Oct 2022 13:58:13 +0300 Subject: [PATCH 0819/1477] serial: 8250: omap: Fix unpaired pm_runtime_put_sync() in omap8250_remove() [ Upstream commit e3f0c638f428fd66b5871154b62706772045f91a ] On remove, we get an error for "Runtime PM usage count underflow!". I guess this driver is mostly built-in, and this issue has gone unnoticed for a while. Somehow I did not catch this issue with my earlier fix done with commit 4e0f5cc65098 ("serial: 8250_omap: Fix probe and remove for PM runtime"). Fixes: 4e0f5cc65098 ("serial: 8250_omap: Fix probe and remove for PM runtime") Signed-off-by: Tony Lindgren Depends-on: dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter") Link: https://lore.kernel.org/r/20221028105813.54290-1-tony@atomide.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/8250/8250_omap.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index 655553a3c78a..57524950b8dc 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -1475,6 +1475,11 @@ err: static int omap8250_remove(struct platform_device *pdev) { struct omap8250_priv *priv = platform_get_drvdata(pdev); + int err; + + err = pm_runtime_resume_and_get(&pdev->dev); + if (err) + return err; pm_runtime_dont_use_autosuspend(&pdev->dev); pm_runtime_put_sync(&pdev->dev); From 7e1f908e65c56b06c2ccdc1c8c8034bbb1e2de62 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 28 Oct 2022 14:00:44 +0300 Subject: [PATCH 0820/1477] serial: 8250: omap: Flush PM QOS work on remove [ Upstream commit d0b68629bd2fb61e0171a62f2e8da3db322f5cf6 ] Rebinding 8250_omap in a loop will at some point produce a warning for kernel/power/qos.c:296 cpu_latency_qos_update_request() with error "cpu_latency_qos_update_request called for unknown object". Let's flush the possibly pending PM QOS work scheduled from omap8250_runtime_suspend() before we disable runtime PM. Fixes: 61929cf0169d ("tty: serial: Add 8250-core based omap driver") Signed-off-by: Tony Lindgren Link: https://lore.kernel.org/r/20221028110044.54719-1-tony@atomide.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/8250/8250_omap.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index 57524950b8dc..3f7379f16a36 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -1483,6 +1483,7 @@ static int omap8250_remove(struct platform_device *pdev) pm_runtime_dont_use_autosuspend(&pdev->dev); pm_runtime_put_sync(&pdev->dev); + flush_work(&priv->qos_work); pm_runtime_disable(&pdev->dev); serial8250_unregister_port(priv->line); cpu_latency_qos_remove_request(&priv->pm_qos_request); From 0a3160f4ffc70ee4bfa1521f698dace06e6091fd Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Wed, 12 Oct 2022 20:13:53 +0800 Subject: [PATCH 0821/1477] serial: imx: Add missing .thaw_noirq hook [ Upstream commit 4561d8008a467cb05ac632a215391d6b787f40aa ] The following warning is seen with non-console UART instance when system hibernates. [ 37.371969] ------------[ cut here ]------------ [ 37.376599] uart3_root_clk already disabled [ 37.380810] WARNING: CPU: 0 PID: 296 at drivers/clk/clk.c:952 clk_core_disable+0xa4/0xb0 ... [ 37.506986] Call trace: [ 37.509432] clk_core_disable+0xa4/0xb0 [ 37.513270] clk_disable+0x34/0x50 [ 37.516672] imx_uart_thaw+0x38/0x5c [ 37.520250] platform_pm_thaw+0x30/0x6c [ 37.524089] dpm_run_callback.constprop.0+0x3c/0xd4 [ 37.528972] device_resume+0x7c/0x160 [ 37.532633] dpm_resume+0xe8/0x230 [ 37.536036] hibernation_snapshot+0x288/0x430 [ 37.540397] hibernate+0x10c/0x2e0 [ 37.543798] state_store+0xc4/0xd0 [ 37.547203] kobj_attr_store+0x1c/0x30 [ 37.550953] sysfs_kf_write+0x48/0x60 [ 37.554619] kernfs_fop_write_iter+0x118/0x1ac [ 37.559063] new_sync_write+0xe8/0x184 [ 37.562812] vfs_write+0x230/0x290 [ 37.566214] ksys_write+0x68/0xf4 [ 37.569529] __arm64_sys_write+0x20/0x2c [ 37.573452] invoke_syscall.constprop.0+0x50/0xf0 [ 37.578156] do_el0_svc+0x11c/0x150 [ 37.581648] el0_svc+0x30/0x140 [ 37.584792] el0t_64_sync_handler+0xe8/0xf0 [ 37.588976] el0t_64_sync+0x1a0/0x1a4 [ 37.592639] ---[ end trace 56e22eec54676d75 ]--- On hibernating, pm core calls into related hooks in sequence like: .freeze .freeze_noirq .thaw_noirq .thaw With .thaw_noirq hook being absent, the clock will be disabled in a unbalanced call which results the warning above. imx_uart_freeze() clk_prepare_enable() imx_uart_suspend_noirq() clk_disable() imx_uart_thaw clk_disable_unprepare() Adding the missing .thaw_noirq hook as imx_uart_resume_noirq() will have the call sequence corrected as below and thus fix the warning. imx_uart_freeze() clk_prepare_enable() imx_uart_suspend_noirq() clk_disable() imx_uart_resume_noirq() clk_enable() imx_uart_thaw clk_disable_unprepare() Fixes: 09df0b3464e5 ("serial: imx: fix endless loop during suspend") Reviewed-by: Martin Kaiser Signed-off-by: Shawn Guo Link: https://lore.kernel.org/r/20221012121353.2346280-1-shawn.guo@linaro.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/imx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index cf3d53165776..164597e2e004 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -2626,6 +2626,7 @@ static const struct dev_pm_ops imx_uart_pm_ops = { .suspend_noirq = imx_uart_suspend_noirq, .resume_noirq = imx_uart_resume_noirq, .freeze_noirq = imx_uart_suspend_noirq, + .thaw_noirq = imx_uart_resume_noirq, .restore_noirq = imx_uart_resume_noirq, .suspend = imx_uart_suspend, .resume = imx_uart_resume, From b8fe1a5aa7330590b9bc3a57d43f488711e14955 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Sun, 2 Oct 2022 12:07:09 +0800 Subject: [PATCH 0822/1477] tty: n_gsm: fix sleep-in-atomic-context bug in gsm_control_send [ Upstream commit 7b7dfe4833c70a11cdfa51b38705103bd31eddaa ] The function gsm_dlci_t1() is a timer handler that runs in an atomic context, but it calls "kzalloc(..., GFP_KERNEL)" that may sleep. As a result, the sleep-in-atomic-context bug will happen. The process is shown below: gsm_dlci_t1() gsm_dlci_open() gsm_modem_update() gsm_modem_upd_via_msc() gsm_control_send() kzalloc(sizeof(.., GFP_KERNEL) //may sleep This patch changes the gfp_t parameter of kzalloc() from GFP_KERNEL to GFP_ATOMIC in order to mitigate the bug. Fixes: e1eaea46bb40 ("tty: n_gsm line discipline") Signed-off-by: Duoming Zhou Link: https://lore.kernel.org/r/20221002040709.27849-1-duoming@zju.edu.cn Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/n_gsm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index c91a3004931f..e85282825973 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -1416,7 +1416,7 @@ static struct gsm_control *gsm_control_send(struct gsm_mux *gsm, unsigned int command, u8 *data, int clen) { struct gsm_control *ctrl = kzalloc(sizeof(struct gsm_control), - GFP_KERNEL); + GFP_ATOMIC); unsigned long flags; if (ctrl == NULL) return NULL; From e60f37a1d379c821c17b08f366412dce9ef3d99f Mon Sep 17 00:00:00 2001 From: Baisong Zhong Date: Wed, 2 Nov 2022 16:16:20 +0800 Subject: [PATCH 0823/1477] bpf, test_run: Fix alignment problem in bpf_prog_test_run_skb() [ Upstream commit d3fd203f36d46aa29600a72d57a1b61af80e4a25 ] We got a syzkaller problem because of aarch64 alignment fault if KFENCE enabled. When the size from user bpf program is an odd number, like 399, 407, etc, it will cause the struct skb_shared_info's unaligned access. As seen below: BUG: KFENCE: use-after-free read in __skb_clone+0x23c/0x2a0 net/core/skbuff.c:1032 Use-after-free read at 0xffff6254fffac077 (in kfence-#213): __lse_atomic_add arch/arm64/include/asm/atomic_lse.h:26 [inline] arch_atomic_add arch/arm64/include/asm/atomic.h:28 [inline] arch_atomic_inc include/linux/atomic-arch-fallback.h:270 [inline] atomic_inc include/asm-generic/atomic-instrumented.h:241 [inline] __skb_clone+0x23c/0x2a0 net/core/skbuff.c:1032 skb_clone+0xf4/0x214 net/core/skbuff.c:1481 ____bpf_clone_redirect net/core/filter.c:2433 [inline] bpf_clone_redirect+0x78/0x1c0 net/core/filter.c:2420 bpf_prog_d3839dd9068ceb51+0x80/0x330 bpf_dispatcher_nop_func include/linux/bpf.h:728 [inline] bpf_test_run+0x3c0/0x6c0 net/bpf/test_run.c:53 bpf_prog_test_run_skb+0x638/0xa7c net/bpf/test_run.c:594 bpf_prog_test_run kernel/bpf/syscall.c:3148 [inline] __do_sys_bpf kernel/bpf/syscall.c:4441 [inline] __se_sys_bpf+0xad0/0x1634 kernel/bpf/syscall.c:4381 kfence-#213: 0xffff6254fffac000-0xffff6254fffac196, size=407, cache=kmalloc-512 allocated by task 15074 on cpu 0 at 1342.585390s: kmalloc include/linux/slab.h:568 [inline] kzalloc include/linux/slab.h:675 [inline] bpf_test_init.isra.0+0xac/0x290 net/bpf/test_run.c:191 bpf_prog_test_run_skb+0x11c/0xa7c net/bpf/test_run.c:512 bpf_prog_test_run kernel/bpf/syscall.c:3148 [inline] __do_sys_bpf kernel/bpf/syscall.c:4441 [inline] __se_sys_bpf+0xad0/0x1634 kernel/bpf/syscall.c:4381 __arm64_sys_bpf+0x50/0x60 kernel/bpf/syscall.c:4381 To fix the problem, we adjust @size so that (@size + @hearoom) is a multiple of SMP_CACHE_BYTES. So we make sure the struct skb_shared_info is aligned to a cache line. Fixes: 1cf1cae963c2 ("bpf: introduce BPF_PROG_TEST_RUN command") Signed-off-by: Baisong Zhong Signed-off-by: Daniel Borkmann Cc: Eric Dumazet Link: https://lore.kernel.org/bpf/20221102081620.1465154-1-zhongbaisong@huawei.com Signed-off-by: Sasha Levin --- net/bpf/test_run.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 2983e926fe3c..717b01ff9b2b 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -231,6 +231,7 @@ static void *bpf_test_init(const union bpf_attr *kattr, u32 size, if (user_size > size) return ERR_PTR(-EMSGSIZE); + size = SKB_DATA_ALIGN(size); data = kzalloc(size + headroom + tailroom, GFP_USER); if (!data) return ERR_PTR(-ENOMEM); From 7360e7c29d276631f2af15d68d138f7f9e72803a Mon Sep 17 00:00:00 2001 From: Chen Zhongjin Date: Mon, 31 Oct 2022 21:40:31 +0800 Subject: [PATCH 0824/1477] ASoC: soc-utils: Remove __exit for snd_soc_util_exit() [ Upstream commit 314d34fe7f0a5836cb0472950c1f17744b4efde8 ] snd_soc_util_exit() is called in __init snd_soc_init() for cleanup. Remove the __exit annotation for it to fix the build warning: WARNING: modpost: sound/soc/snd-soc-core.o: section mismatch in reference: init_module (section: .init.text) -> snd_soc_util_exit (section: .exit.text) Fixes: 6ec27c53886c ("ASoC: core: Fix use-after-free in snd_soc_exit()") Signed-off-by: Chen Zhongjin Link: https://lore.kernel.org/r/20221031134031.256511-1-chenzhongjin@huawei.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/soc-utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/soc-utils.c b/sound/soc/soc-utils.c index f27f94ca064b..6b398ffabb02 100644 --- a/sound/soc/soc-utils.c +++ b/sound/soc/soc-utils.c @@ -171,7 +171,7 @@ int __init snd_soc_util_init(void) return ret; } -void __exit snd_soc_util_exit(void) +void snd_soc_util_exit(void) { platform_driver_unregister(&soc_dummy_driver); platform_device_unregister(soc_dummy_dev); From 0b4c259b63eaab592987c47f41f32e4e2f5d4fbe Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 4 Nov 2022 17:45:15 -0400 Subject: [PATCH 0825/1477] sctp: remove the unnecessary sinfo_stream check in sctp_prsctp_prune_unsent [ Upstream commit 9f0b773210c27a8f5d98ddb2fc4ba60a42a3285f ] Since commit 5bbbbe32a431 ("sctp: introduce stream scheduler foundations"), sctp_stream_outq_migrate() has been called in sctp_stream_init/update to removes those chunks to streams higher than the new max. There is no longer need to do such check in sctp_prsctp_prune_unsent(). Signed-off-by: Xin Long Signed-off-by: Jakub Kicinski Stable-dep-of: 2f201ae14ae0 ("sctp: clear out_curr if all frag chunks of current msg are pruned") Signed-off-by: Sasha Levin --- net/sctp/outqueue.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 3fd06a27105d..35d5532320f9 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -384,6 +384,7 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc, { struct sctp_outq *q = &asoc->outqueue; struct sctp_chunk *chk, *temp; + struct sctp_stream_out *sout; q->sched->unsched_all(&asoc->stream); @@ -398,12 +399,9 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc, sctp_sched_dequeue_common(q, chk); asoc->sent_cnt_removable--; asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; - if (chk->sinfo.sinfo_stream < asoc->stream.outcnt) { - struct sctp_stream_out *streamout = - SCTP_SO(&asoc->stream, chk->sinfo.sinfo_stream); - streamout->ext->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; - } + sout = SCTP_SO(&asoc->stream, chk->sinfo.sinfo_stream); + sout->ext->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; msg_len -= chk->skb->truesize + sizeof(struct sctp_chunk); sctp_chunk_free(chk); From e27458b18b35caee4b27b37a4a9c503b93cae5cc Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 4 Nov 2022 17:45:16 -0400 Subject: [PATCH 0826/1477] sctp: clear out_curr if all frag chunks of current msg are pruned [ Upstream commit 2f201ae14ae0f91dbf1cffea7bb1e29e81d4d108 ] A crash was reported by Zhen Chen: list_del corruption, ffffa035ddf01c18->next is NULL WARNING: CPU: 1 PID: 250682 at lib/list_debug.c:49 __list_del_entry_valid+0x59/0xe0 RIP: 0010:__list_del_entry_valid+0x59/0xe0 Call Trace: sctp_sched_dequeue_common+0x17/0x70 [sctp] sctp_sched_fcfs_dequeue+0x37/0x50 [sctp] sctp_outq_flush_data+0x85/0x360 [sctp] sctp_outq_uncork+0x77/0xa0 [sctp] sctp_cmd_interpreter.constprop.0+0x164/0x1450 [sctp] sctp_side_effects+0x37/0xe0 [sctp] sctp_do_sm+0xd0/0x230 [sctp] sctp_primitive_SEND+0x2f/0x40 [sctp] sctp_sendmsg_to_asoc+0x3fa/0x5c0 [sctp] sctp_sendmsg+0x3d5/0x440 [sctp] sock_sendmsg+0x5b/0x70 and in sctp_sched_fcfs_dequeue() it dequeued a chunk from stream out_curr outq while this outq was empty. Normally stream->out_curr must be set to NULL once all frag chunks of current msg are dequeued, as we can see in sctp_sched_dequeue_done(). However, in sctp_prsctp_prune_unsent() as it is not a proper dequeue, sctp_sched_dequeue_done() is not called to do this. This patch is to fix it by simply setting out_curr to NULL when the last frag chunk of current msg is dequeued from out_curr stream in sctp_prsctp_prune_unsent(). Fixes: 5bbbbe32a431 ("sctp: introduce stream scheduler foundations") Reported-by: Zhen Chen Tested-by: Caowangbao Signed-off-by: Xin Long Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/sctp/outqueue.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 35d5532320f9..83a89dcf75ed 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -403,6 +403,11 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc, sout = SCTP_SO(&asoc->stream, chk->sinfo.sinfo_stream); sout->ext->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; + /* clear out_curr if all frag chunks are pruned */ + if (asoc->stream.out_curr == sout && + list_is_last(&chk->frag_list, &chk->msg->chunks)) + asoc->stream.out_curr = NULL; + msg_len -= chk->skb->truesize + sizeof(struct sctp_chunk); sctp_chunk_free(chk); if (msg_len <= 0) From 58636b5ff3f654fd348ad217f9ac7a18201d4164 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Mon, 7 Nov 2022 23:39:44 +0300 Subject: [PATCH 0827/1477] block: sed-opal: kmalloc the cmd/resp buffers [ Upstream commit f829230dd51974c1f4478900ed30bb77ba530b40 ] In accordance with [1] the DMA-able memory buffers must be cacheline-aligned otherwise the cache writing-back and invalidation performed during the mapping may cause the adjacent data being lost. It's specifically required for the DMA-noncoherent platforms [2]. Seeing the opal_dev.{cmd,resp} buffers are implicitly used for DMAs in the NVME and SCSI/SD drivers in framework of the nvme_sec_submit() and sd_sec_submit() methods respectively they must be cacheline-aligned to prevent the denoted problem. One of the option to guarantee that is to kmalloc the buffers [2]. Let's explicitly allocate them then instead of embedding into the opal_dev structure instance. Note this fix was inspired by the commit c94b7f9bab22 ("nvme-hwmon: kmalloc the NVME SMART log buffer"). [1] Documentation/core-api/dma-api.rst [2] Documentation/core-api/dma-api-howto.rst Fixes: 455a7b238cd6 ("block: Add Sed-opal library") Signed-off-by: Serge Semin Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20221107203944.31686-1-Sergey.Semin@baikalelectronics.ru Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/sed-opal.c | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/block/sed-opal.c b/block/sed-opal.c index daafadbb88ca..0ac5a4f3f226 100644 --- a/block/sed-opal.c +++ b/block/sed-opal.c @@ -88,8 +88,8 @@ struct opal_dev { u64 lowest_lba; size_t pos; - u8 cmd[IO_BUFFER_LENGTH]; - u8 resp[IO_BUFFER_LENGTH]; + u8 *cmd; + u8 *resp; struct parsed_resp parsed; size_t prev_d_len; @@ -2134,6 +2134,8 @@ void free_opal_dev(struct opal_dev *dev) return; clean_opal_dev(dev); + kfree(dev->resp); + kfree(dev->cmd); kfree(dev); } EXPORT_SYMBOL(free_opal_dev); @@ -2146,17 +2148,39 @@ struct opal_dev *init_opal_dev(void *data, sec_send_recv *send_recv) if (!dev) return NULL; + /* + * Presumably DMA-able buffers must be cache-aligned. Kmalloc makes + * sure the allocated buffer is DMA-safe in that regard. + */ + dev->cmd = kmalloc(IO_BUFFER_LENGTH, GFP_KERNEL); + if (!dev->cmd) + goto err_free_dev; + + dev->resp = kmalloc(IO_BUFFER_LENGTH, GFP_KERNEL); + if (!dev->resp) + goto err_free_cmd; + INIT_LIST_HEAD(&dev->unlk_lst); mutex_init(&dev->dev_lock); dev->data = data; dev->send_recv = send_recv; if (check_opal_support(dev) != 0) { pr_debug("Opal is not supported on this device\n"); - kfree(dev); - return NULL; + goto err_free_resp; } return dev; + +err_free_resp: + kfree(dev->resp); + +err_free_cmd: + kfree(dev->cmd); + +err_free_dev: + kfree(dev); + + return NULL; } EXPORT_SYMBOL(init_opal_dev); From 0679f571d3de82a3f59a3d758a32c76506aaa464 Mon Sep 17 00:00:00 2001 From: D Scott Phillips Date: Wed, 2 Nov 2022 09:01:06 -0700 Subject: [PATCH 0828/1477] arm64: Fix bit-shifting UB in the MIDR_CPU_MODEL() macro [ Upstream commit 8ec8490a1950efeccb00967698cf7cb2fcd25ca7 ] CONFIG_UBSAN_SHIFT with gcc-5 complains that the shifting of ARM_CPU_IMP_AMPERE (0xC0) into bits [31:24] by MIDR_CPU_MODEL() is undefined behavior. Well, sort of, it actually spells the error as: arch/arm64/kernel/proton-pack.c: In function 'spectre_bhb_loop_affected': arch/arm64/include/asm/cputype.h:44:2: error: initializer element is not constant (((imp) << MIDR_IMPLEMENTOR_SHIFT) | \ ^ This isn't an issue for other Implementor codes, as all the other codes have zero in the top bit and so are representable as a signed int. Cast the implementor code to unsigned in MIDR_CPU_MODEL to remove the undefined behavior. Fixes: 0e5d5ae837c8 ("arm64: Add AMPERE1 to the Spectre-BHB affected list") Reported-by: Geert Uytterhoeven Signed-off-by: D Scott Phillips Link: https://lore.kernel.org/r/20221102160106.1096948-1-scott@os.amperecomputing.com Signed-off-by: Catalin Marinas Signed-off-by: Sasha Levin --- arch/arm64/include/asm/cputype.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 457b6bb276bb..9cf5d9551e99 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -41,7 +41,7 @@ (((midr) & MIDR_IMPLEMENTOR_MASK) >> MIDR_IMPLEMENTOR_SHIFT) #define MIDR_CPU_MODEL(imp, partnum) \ - (((imp) << MIDR_IMPLEMENTOR_SHIFT) | \ + ((_AT(u32, imp) << MIDR_IMPLEMENTOR_SHIFT) | \ (0xf << MIDR_ARCHITECTURE_SHIFT) | \ ((partnum) << MIDR_PARTNUM_SHIFT)) From a4b5423f88a17a36550ae8c16c46779b1ee42f4b Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 4 Nov 2022 10:13:34 +0800 Subject: [PATCH 0829/1477] siox: fix possible memory leak in siox_device_add() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 6e63153db50059fb78b8a8447b132664887d24e3 ] If device_register() returns error in siox_device_add(), the name allocated by dev_set_name() need be freed. As comment of device_register() says, it should use put_device() to give up the reference in the error path. So fix this by calling put_device(), then the name can be freed in kobject_cleanup(), and sdevice is freed in siox_device_release(), set it to null in error path. Fixes: bbecb07fa0af ("siox: new driver framework for eckelmann SIOX") Signed-off-by: Yang Yingliang Reviewed-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221104021334.618189-1-yangyingliang@huawei.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/siox/siox-core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/siox/siox-core.c b/drivers/siox/siox-core.c index f8c08fb9891d..e0ffef6e9386 100644 --- a/drivers/siox/siox-core.c +++ b/drivers/siox/siox-core.c @@ -835,6 +835,8 @@ static struct siox_device *siox_device_add(struct siox_master *smaster, err_device_register: /* don't care to make the buffer smaller again */ + put_device(&sdevice->dev); + sdevice = NULL; err_buf_alloc: siox_master_unlock(smaster); From bce3e6fe8ba7cc42d0111281f135204ce16e0d94 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Fri, 23 Sep 2022 19:52:08 +0100 Subject: [PATCH 0830/1477] parport_pc: Avoid FIFO port location truncation [ Upstream commit ab126f51c93a15093df604f661c9480854c005a3 ] Match the data type of a temporary holding a reference to the FIFO port with the type of the original reference coming from `struct parport', avoiding data truncation with LP64 ports such as SPARC64 that refer to PCI port I/O locations via their corresponding MMIO addresses and will therefore have non-zero bits in the high 32-bit part of the reference. And in any case it is cleaner to have the data types matching here. Signed-off-by: Maciej W. Rozycki Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Link: https://lore.kernel.org/linux-pci/20220419033752.GA1101844@bhelgaas/ Acked-by: Sudip Mukherjee Link: https://lore.kernel.org/r/alpine.DEB.2.21.2209231912550.29493@angie.orcam.me.uk Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/parport/parport_pc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/parport/parport_pc.c b/drivers/parport/parport_pc.c index eda4ded4d5e5..925be41eeebe 100644 --- a/drivers/parport/parport_pc.c +++ b/drivers/parport/parport_pc.c @@ -468,7 +468,7 @@ static size_t parport_pc_fifo_write_block_pio(struct parport *port, const unsigned char *bufp = buf; size_t left = length; unsigned long expire = jiffies + port->physport->cad->timeout; - const int fifo = FIFO(port); + const unsigned long fifo = FIFO(port); int poll_for = 8; /* 80 usecs */ const struct parport_pc_private *priv = port->physport->private_data; const int fifo_depth = priv->fifo_depth; From 777430aa4ddccaa5accec6db90ffc1d47f00d471 Mon Sep 17 00:00:00 2001 From: Zeng Heng Date: Thu, 10 Nov 2022 16:20:56 +0800 Subject: [PATCH 0831/1477] pinctrl: devicetree: fix null pointer dereferencing in pinctrl_dt_to_map [ Upstream commit 91d5c5060ee24fe8da88cd585bb43b843d2f0dce ] Here is the BUG report by KASAN about null pointer dereference: BUG: KASAN: null-ptr-deref in strcmp+0x2e/0x50 Read of size 1 at addr 0000000000000000 by task python3/2640 Call Trace: strcmp __of_find_property of_find_property pinctrl_dt_to_map kasprintf() would return NULL pointer when kmalloc() fail to allocate. So directly return ENOMEM, if kasprintf() return NULL pointer. Fixes: 57291ce295c0 ("pinctrl: core device tree mapping table parsing support") Signed-off-by: Zeng Heng Link: https://lore.kernel.org/r/20221110082056.2014898-1-zengheng4@huawei.com Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/devicetree.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pinctrl/devicetree.c b/drivers/pinctrl/devicetree.c index 3fb238714718..eac55fee5281 100644 --- a/drivers/pinctrl/devicetree.c +++ b/drivers/pinctrl/devicetree.c @@ -220,6 +220,8 @@ int pinctrl_dt_to_map(struct pinctrl *p, struct pinctrl_dev *pctldev) for (state = 0; ; state++) { /* Retrieve the pinctrl-* property */ propname = kasprintf(GFP_KERNEL, "pinctrl-%d", state); + if (!propname) + return -ENOMEM; prop = of_find_property(np, propname, &size); kfree(propname); if (!prop) { From c776a49d099cfc4fd9ccd49613ac03d4508d918d Mon Sep 17 00:00:00 2001 From: Aishwarya Kothari Date: Wed, 31 Aug 2022 16:16:22 +0200 Subject: [PATCH 0832/1477] drm/panel: simple: set bpc field for logic technologies displays [ Upstream commit 876153ab068b2507a19aa3ef481f5b00a2cc780f ] In case bpc is not set for a panel it then throws a WARN(). Add bpc to the panels logictechno_lt170410_2whc and logictechno_lt161010_2nh. Fixes: 5728fe7fa539 ("drm/panel: simple: add display timings for logic technologies displays") Signed-off-by: Aishwarya Kothari Signed-off-by: Francesco Dolcini Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20220831141622.39605-1-francesco.dolcini@toradex.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/panel/panel-simple.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index b7b37082a9d7..1a87cc445b5e 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -2655,6 +2655,7 @@ static const struct display_timing logictechno_lt161010_2nh_timing = { static const struct panel_desc logictechno_lt161010_2nh = { .timings = &logictechno_lt161010_2nh_timing, .num_timings = 1, + .bpc = 6, .size = { .width = 154, .height = 86, @@ -2684,6 +2685,7 @@ static const struct display_timing logictechno_lt170410_2whc_timing = { static const struct panel_desc logictechno_lt170410_2whc = { .timings = &logictechno_lt170410_2whc_timing, .num_timings = 1, + .bpc = 8, .size = { .width = 217, .height = 136, From c47a823ea186263ab69cfb665327b7f72cb5e779 Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Tue, 1 Nov 2022 15:07:15 +0800 Subject: [PATCH 0833/1477] drm/drv: Fix potential memory leak in drm_dev_init() [ Upstream commit ff963634f7b2e0dc011349abb3fb81a0d074f443 ] drm_dev_init() will add drm_dev_init_release() as a callback. When drmm_add_action() failed, the release function won't be added. As the result, the ref cnt added by device_get() in drm_dev_init() won't be put by drm_dev_init_release(), which leads to the memleak. Use drmm_add_action_or_reset() instead of drmm_add_action() to prevent memleak. unreferenced object 0xffff88810bc0c800 (size 2048): comm "modprobe", pid 8322, jiffies 4305809845 (age 15.292s) hex dump (first 32 bytes): e8 cc c0 0b 81 88 ff ff ff ff ff ff 00 00 00 00 ................ 20 24 3c 0c 81 88 ff ff 18 c8 c0 0b 81 88 ff ff $<............. backtrace: [<000000007251f72d>] __kmalloc+0x4b/0x1c0 [<0000000045f21f26>] platform_device_alloc+0x2d/0xe0 [<000000004452a479>] platform_device_register_full+0x24/0x1c0 [<0000000089f4ea61>] 0xffffffffa0736051 [<00000000235b2441>] do_one_initcall+0x7a/0x380 [<0000000001a4a177>] do_init_module+0x5c/0x230 [<000000002bf8a8e2>] load_module+0x227d/0x2420 [<00000000637d6d0a>] __do_sys_finit_module+0xd5/0x140 [<00000000c99fc324>] do_syscall_64+0x3f/0x90 [<000000004d85aa77>] entry_SYSCALL_64_after_hwframe+0x63/0xcd Fixes: 2cbf7fc6718b ("drm: Use drmm_ for drm_dev_init cleanup") Signed-off-by: Shang XiaoJing Reviewed-by: Lyude Paul Signed-off-by: Lyude Paul Link: https://patchwork.freedesktop.org/patch/msgid/20221101070716.9189-2-shangxiaojing@huawei.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index 006e3b896cae..4ca995ce19af 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -610,7 +610,7 @@ static int drm_dev_init(struct drm_device *dev, mutex_init(&dev->clientlist_mutex); mutex_init(&dev->master_mutex); - ret = drmm_add_action(dev, drm_dev_init_release, NULL); + ret = drmm_add_action_or_reset(dev, drm_dev_init_release, NULL); if (ret) return ret; From 1d160dfb3fdf11ba9447e862c548447f91f4e74a Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Tue, 1 Nov 2022 15:07:16 +0800 Subject: [PATCH 0834/1477] drm: Fix potential null-ptr-deref in drm_vblank_destroy_worker() [ Upstream commit 4979524f5a2a8210e87fde2f642b0dc060860821 ] drm_vblank_init() call drmm_add_action_or_reset() with drm_vblank_init_release() as action. If __drmm_add_action() failed, will directly call drm_vblank_init_release() with the vblank whose worker is NULL. As the resule, a null-ptr-deref will happen in kthread_destroy_worker(). Add the NULL check before calling drm_vblank_destroy_worker(). BUG: null-ptr-deref KASAN: null-ptr-deref in range [0x0000000000000068-0x000000000000006f] CPU: 5 PID: 961 Comm: modprobe Not tainted 6.0.0-11331-gd465bff130bf-dirty RIP: 0010:kthread_destroy_worker+0x25/0xb0 Call Trace: drm_vblank_init_release+0x124/0x220 [drm] ? drm_crtc_vblank_restore+0x8b0/0x8b0 [drm] __drmm_add_action_or_reset+0x41/0x50 [drm] drm_vblank_init+0x282/0x310 [drm] vkms_init+0x35f/0x1000 [vkms] ? 0xffffffffc4508000 ? lock_is_held_type+0xd7/0x130 ? __kmem_cache_alloc_node+0x1c2/0x2b0 ? lock_is_held_type+0xd7/0x130 ? 0xffffffffc4508000 do_one_initcall+0xd0/0x4f0 ... do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 Fixes: 5e6c2b4f9161 ("drm/vblank: Add vblank works") Signed-off-by: Shang XiaoJing Reviewed-by: Lyude Paul Signed-off-by: Lyude Paul Link: https://patchwork.freedesktop.org/patch/msgid/20221101070716.9189-3-shangxiaojing@huawei.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_internal.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h index f80e0f28087d..41efe40bc70f 100644 --- a/drivers/gpu/drm/drm_internal.h +++ b/drivers/gpu/drm/drm_internal.h @@ -116,7 +116,8 @@ static inline void drm_vblank_flush_worker(struct drm_vblank_crtc *vblank) static inline void drm_vblank_destroy_worker(struct drm_vblank_crtc *vblank) { - kthread_destroy_worker(vblank->worker); + if (vblank->worker) + kthread_destroy_worker(vblank->worker); } int drm_vblank_worker_init(struct drm_vblank_crtc *vblank); From f68a9efd7895e2f951523323628377bcdd97d068 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Wed, 2 Nov 2022 20:19:45 +0100 Subject: [PATCH 0835/1477] ARM: dts: imx7: Fix NAND controller size-cells [ Upstream commit 753395ea1e45c724150070b5785900b6a44bd5fb ] The NAND controller size-cells should be 0 per DT bindings. Fix the following warning produces by DT bindings check: " nand-controller@33002000: #size-cells:0:0: 0 was expected nand-controller@33002000: Unevaluated properties are not allowed ('#address-cells', '#size-cells' were unexpected) " Fix the missing space in node name too. Fixes: e7495a45a76de ("ARM: dts: imx7: add GPMI NAND and APBH DMA") Signed-off-by: Marek Vasut Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/imx7s.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/imx7s.dtsi b/arch/arm/boot/dts/imx7s.dtsi index 9e1b0af0aa43..e4ff47110a96 100644 --- a/arch/arm/boot/dts/imx7s.dtsi +++ b/arch/arm/boot/dts/imx7s.dtsi @@ -1221,10 +1221,10 @@ clocks = <&clks IMX7D_NAND_USDHC_BUS_RAWNAND_CLK>; }; - gpmi: nand-controller@33002000{ + gpmi: nand-controller@33002000 { compatible = "fsl,imx7d-gpmi-nand"; #address-cells = <1>; - #size-cells = <1>; + #size-cells = <0>; reg = <0x33002000 0x2000>, <0x33004000 0x4000>; reg-names = "gpmi-nand", "bch"; interrupts = ; From 30ece7dbeeca6b64c61441abbc2b99e356cced5a Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Wed, 2 Nov 2022 20:19:46 +0100 Subject: [PATCH 0836/1477] arm64: dts: imx8mm: Fix NAND controller size-cells [ Upstream commit 1610233bc2c2cae2dff9e101e6ea5ef69cceb0e9 ] The NAND controller size-cells should be 0 per DT bindings. Fix the following warning produces by DT bindings check: " nand-controller@33002000: #size-cells:0:0: 0 was expected nand-controller@33002000: Unevaluated properties are not allowed ('#address-cells', '#size-cells' were unexpected) " Fix the missing space in node name too. Fixes: a05ea40eb384e ("arm64: dts: imx: Add i.mx8mm dtsi support") Signed-off-by: Marek Vasut Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/freescale/imx8mm.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm.dtsi b/arch/arm64/boot/dts/freescale/imx8mm.dtsi index f4d7bb75707d..3490619a9ba9 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm.dtsi @@ -939,10 +939,10 @@ clocks = <&clk IMX8MM_CLK_NAND_USDHC_BUS_RAWNAND_CLK>; }; - gpmi: nand-controller@33002000{ + gpmi: nand-controller@33002000 { compatible = "fsl,imx8mm-gpmi-nand", "fsl,imx7d-gpmi-nand"; #address-cells = <1>; - #size-cells = <1>; + #size-cells = <0>; reg = <0x33002000 0x2000>, <0x33004000 0x4000>; reg-names = "gpmi-nand", "bch"; interrupts = ; From ac4f404c250b3e3c0d350da6f7f834f371c5e9c1 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Wed, 2 Nov 2022 20:19:47 +0100 Subject: [PATCH 0837/1477] arm64: dts: imx8mn: Fix NAND controller size-cells [ Upstream commit 5468e93b5b1083eaa729f98e59da18c85d9c4126 ] The NAND controller size-cells should be 0 per DT bindings. Fix the following warning produces by DT bindings check: " nand-controller@33002000: #size-cells:0:0: 0 was expected nand-controller@33002000: Unevaluated properties are not allowed ('#address-cells', '#size-cells' were unexpected) " Fixes: 6c3debcbae47a ("arm64: dts: freescale: Add i.MX8MN dtsi support") Signed-off-by: Marek Vasut Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/freescale/imx8mn.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mn.dtsi b/arch/arm64/boot/dts/freescale/imx8mn.dtsi index aea723eb2ba3..7dba83041264 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mn.dtsi @@ -809,7 +809,7 @@ gpmi: nand-controller@33002000 { compatible = "fsl,imx8mn-gpmi-nand", "fsl,imx7d-gpmi-nand"; #address-cells = <1>; - #size-cells = <1>; + #size-cells = <0>; reg = <0x33002000 0x2000>, <0x33004000 0x4000>; reg-names = "gpmi-nand", "bch"; interrupts = ; From ac471468f7c16cda2525909946ca13ddbcd14000 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 8 Nov 2022 21:40:01 +0800 Subject: [PATCH 0838/1477] ata: libata-transport: fix double ata_host_put() in ata_tport_add() [ Upstream commit 8c76310740807ade5ecdab5888f70ecb6d35732e ] In the error path in ata_tport_add(), when calling put_device(), ata_tport_release() is called, it will put the refcount of 'ap->host'. And then ata_host_put() is called again, the refcount is decreased to 0, ata_host_release() is called, all ports are freed and set to null. When unbinding the device after failure, ata_host_stop() is called to release the resources, it leads a null-ptr-deref(), because all the ports all freed and null. Unable to handle kernel NULL pointer dereference at virtual address 0000000000000008 CPU: 7 PID: 18671 Comm: modprobe Kdump: loaded Tainted: G E 6.1.0-rc3+ #8 pstate: 80400009 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : ata_host_stop+0x3c/0x84 [libata] lr : release_nodes+0x64/0xd0 Call trace: ata_host_stop+0x3c/0x84 [libata] release_nodes+0x64/0xd0 devres_release_all+0xbc/0x1b0 device_unbind_cleanup+0x20/0x70 really_probe+0x158/0x320 __driver_probe_device+0x84/0x120 driver_probe_device+0x44/0x120 __driver_attach+0xb4/0x220 bus_for_each_dev+0x78/0xdc driver_attach+0x2c/0x40 bus_add_driver+0x184/0x240 driver_register+0x80/0x13c __pci_register_driver+0x4c/0x60 ahci_pci_driver_init+0x30/0x1000 [ahci] Fix this by removing redundant ata_host_put() in the error path. Fixes: 2623c7a5f279 ("libata: add refcounting to ata_host") Signed-off-by: Yang Yingliang Signed-off-by: Damien Le Moal Signed-off-by: Sasha Levin --- drivers/ata/libata-transport.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/ata/libata-transport.c b/drivers/ata/libata-transport.c index b33772df9bc6..8a9850bd5d6c 100644 --- a/drivers/ata/libata-transport.c +++ b/drivers/ata/libata-transport.c @@ -317,7 +317,6 @@ int ata_tport_add(struct device *parent, tport_err: transport_destroy_device(dev); put_device(dev); - ata_host_put(ap->host); return error; } From b5362dc1634d8b8d5f30920f33ac11a3276b7ed9 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 8 Nov 2022 21:40:02 +0800 Subject: [PATCH 0839/1477] ata: libata-transport: fix error handling in ata_tport_add() [ Upstream commit 3613dbe3909dcc637fe6be00e4dc43b4aa0470ee ] In ata_tport_add(), the return value of transport_add_device() is not checked. As a result, it causes null-ptr-deref while removing the module, because transport_remove_device() is called to remove the device that was not added. Unable to handle kernel NULL pointer dereference at virtual address 00000000000000d0 CPU: 12 PID: 13605 Comm: rmmod Kdump: loaded Tainted: G W 6.1.0-rc3+ #8 pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : device_del+0x48/0x39c lr : device_del+0x44/0x39c Call trace: device_del+0x48/0x39c attribute_container_class_device_del+0x28/0x40 transport_remove_classdev+0x60/0x7c attribute_container_device_trigger+0x118/0x120 transport_remove_device+0x20/0x30 ata_tport_delete+0x34/0x60 [libata] ata_port_detach+0x148/0x1b0 [libata] ata_pci_remove_one+0x50/0x80 [libata] ahci_remove_one+0x4c/0x8c [ahci] Fix this by checking and handling return value of transport_add_device() in ata_tport_add(). Fixes: d9027470b886 ("[libata] Add ATA transport class") Signed-off-by: Yang Yingliang Signed-off-by: Damien Le Moal Signed-off-by: Sasha Levin --- drivers/ata/libata-transport.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/ata/libata-transport.c b/drivers/ata/libata-transport.c index 8a9850bd5d6c..da1b144d8288 100644 --- a/drivers/ata/libata-transport.c +++ b/drivers/ata/libata-transport.c @@ -301,7 +301,9 @@ int ata_tport_add(struct device *parent, pm_runtime_enable(dev); pm_runtime_forbid(dev); - transport_add_device(dev); + error = transport_add_device(dev); + if (error) + goto tport_transport_add_err; transport_configure_device(dev); error = ata_tlink_add(&ap->link); @@ -312,6 +314,7 @@ int ata_tport_add(struct device *parent, tport_link_err: transport_remove_device(dev); + tport_transport_add_err: device_del(dev); tport_err: From 7377a14598f6b04446c54bc4a50cd249470d6c6f Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 8 Nov 2022 21:40:03 +0800 Subject: [PATCH 0840/1477] ata: libata-transport: fix error handling in ata_tlink_add() [ Upstream commit cf0816f6322c5c37ee52655f928e91ecf32da103 ] In ata_tlink_add(), the return value of transport_add_device() is not checked. As a result, it causes null-ptr-deref while removing the module, because transport_remove_device() is called to remove the device that was not added. Unable to handle kernel NULL pointer dereference at virtual address 00000000000000d0 CPU: 33 PID: 13850 Comm: rmmod Kdump: loaded Tainted: G W 6.1.0-rc3+ #12 pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : device_del+0x48/0x39c lr : device_del+0x44/0x39c Call trace: device_del+0x48/0x39c attribute_container_class_device_del+0x28/0x40 transport_remove_classdev+0x60/0x7c attribute_container_device_trigger+0x118/0x120 transport_remove_device+0x20/0x30 ata_tlink_delete+0x88/0xb0 [libata] ata_tport_delete+0x2c/0x60 [libata] ata_port_detach+0x148/0x1b0 [libata] ata_pci_remove_one+0x50/0x80 [libata] ahci_remove_one+0x4c/0x8c [ahci] Fix this by checking and handling return value of transport_add_device() in ata_tlink_add(). Fixes: d9027470b886 ("[libata] Add ATA transport class") Signed-off-by: Yang Yingliang Signed-off-by: Damien Le Moal Signed-off-by: Sasha Levin --- drivers/ata/libata-transport.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/ata/libata-transport.c b/drivers/ata/libata-transport.c index da1b144d8288..e386e5f35015 100644 --- a/drivers/ata/libata-transport.c +++ b/drivers/ata/libata-transport.c @@ -428,7 +428,9 @@ int ata_tlink_add(struct ata_link *link) goto tlink_err; } - transport_add_device(dev); + error = transport_add_device(dev); + if (error) + goto tlink_transport_err; transport_configure_device(dev); ata_for_each_dev(ata_dev, link, ALL) { @@ -443,6 +445,7 @@ int ata_tlink_add(struct ata_link *link) ata_tdev_delete(ata_dev); } transport_remove_device(dev); + tlink_transport_err: device_del(dev); tlink_err: transport_destroy_device(dev); From ef2ac07ab83163b9a53f45da20e14302591ad9cc Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 8 Nov 2022 21:40:04 +0800 Subject: [PATCH 0841/1477] ata: libata-transport: fix error handling in ata_tdev_add() [ Upstream commit 1ff36351309e3eadcff297480baf4785e726de9b ] In ata_tdev_add(), the return value of transport_add_device() is not checked. As a result, it causes null-ptr-deref while removing the module, because transport_remove_device() is called to remove the device that was not added. Unable to handle kernel NULL pointer dereference at virtual address 00000000000000d0 CPU: 13 PID: 13603 Comm: rmmod Kdump: loaded Tainted: G W 6.1.0-rc3+ #36 pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : device_del+0x48/0x3a0 lr : device_del+0x44/0x3a0 Call trace: device_del+0x48/0x3a0 attribute_container_class_device_del+0x28/0x40 transport_remove_classdev+0x60/0x7c attribute_container_device_trigger+0x118/0x120 transport_remove_device+0x20/0x30 ata_tdev_delete+0x24/0x50 [libata] ata_tlink_delete+0x40/0xa0 [libata] ata_tport_delete+0x2c/0x60 [libata] ata_port_detach+0x148/0x1b0 [libata] ata_pci_remove_one+0x50/0x80 [libata] ahci_remove_one+0x4c/0x8c [ahci] Fix this by checking and handling return value of transport_add_device() in ata_tdev_add(). In the error path, device_del() is called to delete the device which was added earlier in this function, and ata_tdev_free() is called to free ata_dev. Fixes: d9027470b886 ("[libata] Add ATA transport class") Signed-off-by: Yang Yingliang Signed-off-by: Damien Le Moal Signed-off-by: Sasha Levin --- drivers/ata/libata-transport.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/ata/libata-transport.c b/drivers/ata/libata-transport.c index e386e5f35015..31a66fc0c31d 100644 --- a/drivers/ata/libata-transport.c +++ b/drivers/ata/libata-transport.c @@ -683,7 +683,13 @@ static int ata_tdev_add(struct ata_device *ata_dev) return error; } - transport_add_device(dev); + error = transport_add_device(dev); + if (error) { + device_del(dev); + ata_tdev_free(ata_dev); + return error; + } + transport_configure_device(dev); return 0; } From 1f6a73b25dabdd5e1bb4ca67bc0ee544de8fefd3 Mon Sep 17 00:00:00 2001 From: Xu Kuohai Date: Thu, 10 Nov 2022 07:21:28 -0500 Subject: [PATCH 0842/1477] bpf: Initialize same number of free nodes for each pcpu_freelist [ Upstream commit 4b45cd81f737d79d0fbfc0d320a1e518e7f0bbf0 ] pcpu_freelist_populate() initializes nr_elems / num_possible_cpus() + 1 free nodes for some CPUs, and then possibly one CPU with fewer nodes, followed by remaining cpus with 0 nodes. For example, when nr_elems == 256 and num_possible_cpus() == 32, CPU 0~27 each gets 9 free nodes, CPU 28 gets 4 free nodes, CPU 29~31 get 0 free nodes, while in fact each CPU should get 8 nodes equally. This patch initializes nr_elems / num_possible_cpus() free nodes for each CPU firstly, then allocates the remaining free nodes by one for each CPU until no free nodes left. Fixes: e19494edab82 ("bpf: introduce percpu_freelist") Signed-off-by: Xu Kuohai Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20221110122128.105214-1-xukuohai@huawei.com Signed-off-by: Sasha Levin --- kernel/bpf/percpu_freelist.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/kernel/bpf/percpu_freelist.c b/kernel/bpf/percpu_freelist.c index 3d897de89061..bbab8bb4b2fd 100644 --- a/kernel/bpf/percpu_freelist.c +++ b/kernel/bpf/percpu_freelist.c @@ -102,22 +102,21 @@ void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size, u32 nr_elems) { struct pcpu_freelist_head *head; - int i, cpu, pcpu_entries; + unsigned int cpu, cpu_idx, i, j, n, m; - pcpu_entries = nr_elems / num_possible_cpus() + 1; - i = 0; + n = nr_elems / num_possible_cpus(); + m = nr_elems % num_possible_cpus(); + cpu_idx = 0; for_each_possible_cpu(cpu) { -again: head = per_cpu_ptr(s->freelist, cpu); - /* No locking required as this is not visible yet. */ - pcpu_freelist_push_node(head, buf); - i++; - buf += elem_size; - if (i == nr_elems) - break; - if (i % pcpu_entries) - goto again; + j = n + (cpu_idx < m ? 1 : 0); + for (i = 0; i < j; i++) { + /* No locking required as this is not visible yet. */ + pcpu_freelist_push_node(head, buf); + buf += elem_size; + } + cpu_idx++; } } From 6b23993d5bef1959926099b060c3e803e1caec6b Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 9 Nov 2022 15:01:36 +0000 Subject: [PATCH 0843/1477] net: bgmac: Drop free_netdev() from bgmac_enet_remove() [ Upstream commit 6f928ab8ee9bfbcb0e631c47ea8a16c3d5116ff1 ] netdev is allocated in bgmac_alloc() with devm_alloc_etherdev() and will be auto released in ->remove and ->probe failure path. Using free_netdev() in bgmac_enet_remove() leads to double free. Fixes: 34a5102c3235 ("net: bgmac: allocate struct bgmac just once & don't copy it") Signed-off-by: Wei Yongjun Link: https://lore.kernel.org/r/20221109150136.2991171-1-weiyongjun@huaweicloud.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bgmac.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index 6290d8bedc92..9960127f612e 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -1568,7 +1568,6 @@ void bgmac_enet_remove(struct bgmac *bgmac) phy_disconnect(bgmac->net_dev->phydev); netif_napi_del(&bgmac->napi); bgmac_dma_free(bgmac); - free_netdev(bgmac->net_dev); } EXPORT_SYMBOL_GPL(bgmac_enet_remove); From 083a2c9ef82e184bdf0b9f9a1e5fc38d32afbb47 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Wed, 9 Nov 2022 21:28:32 +0800 Subject: [PATCH 0844/1477] mISDN: fix possible memory leak in mISDN_dsp_element_register() [ Upstream commit 98a2ac1ca8fd6eca6867726fe238d06e75eb1acd ] Afer commit 1fa5ae857bb1 ("driver core: get rid of struct device's bus_id string array"), the name of device is allocated dynamically, use put_device() to give up the reference, so that the name can be freed in kobject_cleanup() when the refcount is 0. The 'entry' is going to be freed in mISDN_dsp_dev_release(), so the kfree() is removed. list_del() is called in mISDN_dsp_dev_release(), so it need be initialized. Fixes: 1fa5ae857bb1 ("driver core: get rid of struct device's bus_id string array") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221109132832.3270119-1-yangyingliang@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/isdn/mISDN/dsp_pipeline.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/isdn/mISDN/dsp_pipeline.c b/drivers/isdn/mISDN/dsp_pipeline.c index c3b2c99b5cd5..cfbcd9e973c2 100644 --- a/drivers/isdn/mISDN/dsp_pipeline.c +++ b/drivers/isdn/mISDN/dsp_pipeline.c @@ -77,6 +77,7 @@ int mISDN_dsp_element_register(struct mISDN_dsp_element *elem) if (!entry) return -ENOMEM; + INIT_LIST_HEAD(&entry->list); entry->elem = elem; entry->dev.class = elements_class; @@ -107,7 +108,7 @@ err2: device_unregister(&entry->dev); return ret; err1: - kfree(entry); + put_device(&entry->dev); return ret; } EXPORT_SYMBOL(mISDN_dsp_element_register); From 4cba73f2d6fcda4d57e71f7966af5ac222cbad0d Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Thu, 10 Nov 2022 02:16:42 +0000 Subject: [PATCH 0845/1477] net: hinic: Fix error handling in hinic_module_init() [ Upstream commit 8eab9be56cc6b702a445d2b6d0256aa0992316b3 ] A problem about hinic create debugfs failed is triggered with the following log given: [ 931.419023] debugfs: Directory 'hinic' with parent '/' already present! The reason is that hinic_module_init() returns pci_register_driver() directly without checking its return value, if pci_register_driver() failed, it returns without destroy the newly created debugfs, resulting the debugfs of hinic can never be created later. hinic_module_init() hinic_dbg_register_debugfs() # create debugfs directory pci_register_driver() driver_register() bus_add_driver() priv = kzalloc(...) # OOM happened # return without destroy debugfs directory Fix by removing debugfs when pci_register_driver() returns error. Fixes: 253ac3a97921 ("hinic: add support to query sq info") Signed-off-by: Yuan Can Reviewed-by: Leon Romanovsky Link: https://lore.kernel.org/r/20221110021642.80378-1-yuancan@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/huawei/hinic/hinic_main.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c index 4f1d585485d7..6ec042d48cd1 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_main.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c @@ -1502,8 +1502,15 @@ static struct pci_driver hinic_driver = { static int __init hinic_module_init(void) { + int ret; + hinic_dbg_register_debugfs(HINIC_DRV_NAME); - return pci_register_driver(&hinic_driver); + + ret = pci_register_driver(&hinic_driver); + if (ret) + hinic_dbg_unregister_debugfs(); + + return ret; } static void __exit hinic_module_exit(void) From 417f2d2edf30a443189b8c6c820da991133f27cf Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Thu, 10 Nov 2022 18:30:37 +0800 Subject: [PATCH 0846/1477] net: liquidio: release resources when liquidio driver open failed [ Upstream commit 8979f428a4afc215e390006e5ea19fd4e22c7ca9 ] When liquidio driver open failed, it doesn't release resources. Compile tested only. Fixes: 5b07aee11227 ("liquidio: MSIX support for CN23XX") Fixes: dbc97bfd3918 ("net: liquidio: Add missing null pointer checks") Signed-off-by: Zhengchao Shao Reviewed-by: Leon Romanovsky Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../net/ethernet/cavium/liquidio/lio_main.c | 34 ++++++++++++++----- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index e0d18e917108..c4dc6e2ccd6b 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -1798,13 +1798,10 @@ static int liquidio_open(struct net_device *netdev) ifstate_set(lio, LIO_IFSTATE_RUNNING); - if (OCTEON_CN23XX_PF(oct)) { - if (!oct->msix_on) - if (setup_tx_poll_fn(netdev)) - return -1; - } else { - if (setup_tx_poll_fn(netdev)) - return -1; + if (!OCTEON_CN23XX_PF(oct) || (OCTEON_CN23XX_PF(oct) && !oct->msix_on)) { + ret = setup_tx_poll_fn(netdev); + if (ret) + goto err_poll; } netif_tx_start_all_queues(netdev); @@ -1817,7 +1814,7 @@ static int liquidio_open(struct net_device *netdev) /* tell Octeon to start forwarding packets to host */ ret = send_rx_ctrl_cmd(lio, 1); if (ret) - return ret; + goto err_rx_ctrl; /* start periodical statistics fetch */ INIT_DELAYED_WORK(&lio->stats_wk.work, lio_fetch_stats); @@ -1828,6 +1825,27 @@ static int liquidio_open(struct net_device *netdev) dev_info(&oct->pci_dev->dev, "%s interface is opened\n", netdev->name); + return 0; + +err_rx_ctrl: + if (!OCTEON_CN23XX_PF(oct) || (OCTEON_CN23XX_PF(oct) && !oct->msix_on)) + cleanup_tx_poll_fn(netdev); +err_poll: + if (lio->ptp_clock) { + ptp_clock_unregister(lio->ptp_clock); + lio->ptp_clock = NULL; + } + + if (oct->props[lio->ifidx].napi_enabled == 1) { + list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list) + napi_disable(napi); + + oct->props[lio->ifidx].napi_enabled = 0; + + if (OCTEON_CN23XX_PF(oct)) + oct->droq[0]->ops.poll_mode = 0; + } + return ret; } From 709aa1f73d3e9e9ea16e2c4e44f2874c5d2c382c Mon Sep 17 00:00:00 2001 From: Wang ShaoBo Date: Thu, 10 Nov 2022 19:38:23 +0800 Subject: [PATCH 0847/1477] mISDN: fix misuse of put_device() in mISDN_register_device() [ Upstream commit 2d25107e111a85c56f601a5470f1780ec054e6ac ] We should not release reference by put_device() before calling device_initialize(). Fixes: e7d1d4d9ac0d ("mISDN: fix possible memory leak in mISDN_register_device()") Signed-off-by: Wang ShaoBo Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/isdn/mISDN/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/isdn/mISDN/core.c b/drivers/isdn/mISDN/core.c index 7ea0100f218a..90ee56d07a6e 100644 --- a/drivers/isdn/mISDN/core.c +++ b/drivers/isdn/mISDN/core.c @@ -222,7 +222,7 @@ mISDN_register_device(struct mISDNdevice *dev, err = get_free_devid(); if (err < 0) - goto error1; + return err; dev->id = err; device_initialize(&dev->dev); From fb5ee1560babc51b3adb4a6239e79479838d71c6 Mon Sep 17 00:00:00 2001 From: Chuang Wang Date: Fri, 11 Nov 2022 09:41:30 +0800 Subject: [PATCH 0848/1477] net: macvlan: Use built-in RCU list checking [ Upstream commit 5df1341ea822292275c56744aab9c536d75c33be ] hlist_for_each_entry_rcu() has built-in RCU and lock checking. Pass cond argument to hlist_for_each_entry_rcu() to silence false lockdep warning when CONFIG_PROVE_RCU_LIST is enabled. Execute as follow: ip link add link eth0 type macvlan mode source macaddr add The rtnl_lock is held when macvlan_hash_lookup_source() or macvlan_fill_info_macaddr() are called in the non-RCU read side section. So, pass lockdep_rtnl_is_held() to silence false lockdep warning. Fixes: 79cf79abce71 ("macvlan: add source mode") Signed-off-by: Chuang Wang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/macvlan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 6b269a72388b..5d6b4f76b519 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -139,7 +139,7 @@ static struct macvlan_source_entry *macvlan_hash_lookup_source( u32 idx = macvlan_eth_hash(addr); struct hlist_head *h = &vlan->port->vlan_source_hash[idx]; - hlist_for_each_entry_rcu(entry, h, hlist) { + hlist_for_each_entry_rcu(entry, h, hlist, lockdep_rtnl_is_held()) { if (ether_addr_equal_64bits(entry->addr, addr) && entry->vlan == vlan) return entry; @@ -1614,7 +1614,7 @@ static int macvlan_fill_info_macaddr(struct sk_buff *skb, struct hlist_head *h = &vlan->port->vlan_source_hash[i]; struct macvlan_source_entry *entry; - hlist_for_each_entry_rcu(entry, h, hlist) { + hlist_for_each_entry_rcu(entry, h, hlist, lockdep_rtnl_is_held()) { if (entry->vlan != vlan) continue; if (nla_put(skb, IFLA_MACVLAN_MACADDR, ETH_ALEN, entry->addr)) From 389738f5dbc51f36ce735ee327f6d143b735a95f Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Fri, 11 Nov 2022 09:47:34 +0800 Subject: [PATCH 0849/1477] net: caif: fix double disconnect client in chnl_net_open() [ Upstream commit 8fbb53c8bfd8c56ecf1f78dc821778b58f505503 ] When connecting to client timeout, disconnect client for twice in chnl_net_open(). Remove one. Compile tested only. Fixes: 2aa40aef9deb ("caif: Use link layer MTU instead of fixed MTU") Signed-off-by: Zhengchao Shao Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/caif/chnl_net.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c index 42dc080a4dbb..806fb4d84fd3 100644 --- a/net/caif/chnl_net.c +++ b/net/caif/chnl_net.c @@ -315,9 +315,6 @@ static int chnl_net_open(struct net_device *dev) if (result == 0) { pr_debug("connect timeout\n"); - caif_disconnect_client(dev_net(dev), &priv->chnl); - priv->state = CAIF_DISCONNECTED; - pr_debug("state disconnected\n"); result = -ETIMEDOUT; goto error; } From d6a561bd4c53c5fc8cade48a555d3dc6acfb2c5b Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Fri, 11 Nov 2022 15:04:33 +0800 Subject: [PATCH 0850/1477] bnxt_en: Remove debugfs when pci_register_driver failed [ Upstream commit 991aef4ee4f6eb999924f429b943441a32835c8f ] When pci_register_driver failed, we need to remove debugfs, which will caused a resource leak, fix it. Resource leak logs as follows: [ 52.184456] debugfs: Directory 'bnxt_en' with parent '/' already present! Fixes: cabfb09d87bd ("bnxt_en: add debugfs support for DIM") Signed-off-by: Gaosheng Cui Reviewed-by: Leon Romanovsky Reviewed-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 8311473d537b..92f54e333395 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -13111,8 +13111,16 @@ static struct pci_driver bnxt_pci_driver = { static int __init bnxt_init(void) { + int err; + bnxt_debug_init(); - return pci_register_driver(&bnxt_pci_driver); + err = pci_register_driver(&bnxt_pci_driver); + if (err) { + bnxt_debug_exit(); + return err; + } + + return 0; } static void __exit bnxt_exit(void) From bb9924a6edd9d4a9ef83a5f337af60f8a7a68f98 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 10 Nov 2022 23:24:41 +0800 Subject: [PATCH 0851/1477] xen/pcpu: fix possible memory leak in register_pcpu() [ Upstream commit da36a2a76b01b210ffaa55cdc2c99bc8783697c5 ] In device_add(), dev_set_name() is called to allocate name, if it returns error, the name need be freed. As comment of device_register() says, it should use put_device() to give up the reference in the error path. So fix this by calling put_device(), then the name can be freed in kobject_cleanup(). Fixes: f65c9bb3fb72 ("xen/pcpu: Xen physical cpus online/offline sys interface") Signed-off-by: Yang Yingliang Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20221110152441.401630-1-yangyingliang@huawei.com Signed-off-by: Juergen Gross Signed-off-by: Sasha Levin --- drivers/xen/pcpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/pcpu.c b/drivers/xen/pcpu.c index cdc6daa7a9f6..9cf7085a260b 100644 --- a/drivers/xen/pcpu.c +++ b/drivers/xen/pcpu.c @@ -228,7 +228,7 @@ static int register_pcpu(struct pcpu *pcpu) err = device_register(dev); if (err) { - pcpu_release(dev); + put_device(dev); return err; } From 2d5a495501352f3df1818c798b0e23bfe4d6e859 Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Sun, 13 Nov 2022 09:29:29 +0000 Subject: [PATCH 0852/1477] net: ionic: Fix error handling in ionic_init_module() [ Upstream commit 280c0f7cd0aa4d190619b18243110e052a90775c ] A problem about ionic create debugfs failed is triggered with the following log given: [ 415.799514] debugfs: Directory 'ionic' with parent '/' already present! The reason is that ionic_init_module() returns ionic_bus_register_driver() directly without checking its return value, if ionic_bus_register_driver() failed, it returns without destroy the newly created debugfs, resulting the debugfs of ionic can never be created later. ionic_init_module() ionic_debugfs_create() # create debugfs directory ionic_bus_register_driver() pci_register_driver() driver_register() bus_add_driver() priv = kzalloc(...) # OOM happened # return without destroy debugfs directory Fix by removing debugfs when ionic_bus_register_driver() returns error. Fixes: fbfb8031533c ("ionic: Add hardware init and device commands") Signed-off-by: Yuan Can Acked-by: Shannon Nelson Link: https://lore.kernel.org/r/20221113092929.19161-1-yuancan@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/pensando/ionic/ionic_main.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c index f60ffef33e0c..00b6985edea0 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_main.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c @@ -569,8 +569,14 @@ int ionic_port_reset(struct ionic *ionic) static int __init ionic_init_module(void) { + int ret; + ionic_debugfs_create(); - return ionic_bus_register_driver(); + ret = ionic_bus_register_driver(); + if (ret) + ionic_debugfs_destroy(); + + return ret; } static void __exit ionic_cleanup_module(void) From 6b23a4b252044e4fd23438930d452244818d7000 Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Mon, 14 Nov 2022 02:56:59 +0000 Subject: [PATCH 0853/1477] net: ena: Fix error handling in ena_init() [ Upstream commit d349e9be5a2c2d7588a2c4e4bfa0bb3dc1226769 ] The ena_init() won't destroy workqueue created by create_singlethread_workqueue() when pci_register_driver() failed. Call destroy_workqueue() when pci_register_driver() failed to prevent the resource leak. Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") Signed-off-by: Yuan Can Acked-by: Shay Agroskin Link: https://lore.kernel.org/r/20221114025659.124726-1-yuancan@huawei.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 52414ac2c901..1722d4091ea3 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -4488,13 +4488,19 @@ static struct pci_driver ena_pci_driver = { static int __init ena_init(void) { + int ret; + ena_wq = create_singlethread_workqueue(DRV_MODULE_NAME); if (!ena_wq) { pr_err("Failed to create workqueue\n"); return -ENOMEM; } - return pci_register_driver(&ena_pci_driver); + ret = pci_register_driver(&ena_pci_driver); + if (ret) + destroy_workqueue(ena_wq); + + return ret; } static void __exit ena_cleanup(void) From 9ed51414aef6e59e832e2960f10766dce2d5b1a1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 15 Nov 2022 16:16:43 +0300 Subject: [PATCH 0854/1477] drbd: use after free in drbd_create_device() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a7a1598189228b5007369a9622ccdf587be0730f ] The drbd_destroy_connection() frees the "connection" so use the _safe() iterator to prevent a use after free. Fixes: b6f85ef9538b ("drbd: Iterate over all connections") Signed-off-by: Dan Carpenter Reviewed-by: Christoph Böhmwalder Link: https://lore.kernel.org/r/Y3Jd5iZRbNQ9w6gm@kili Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/drbd/drbd_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 407527ff6b1f..51450f7c81af 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2720,7 +2720,7 @@ static int init_submitter(struct drbd_device *device) enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsigned int minor) { struct drbd_resource *resource = adm_ctx->resource; - struct drbd_connection *connection; + struct drbd_connection *connection, *n; struct drbd_device *device; struct drbd_peer_device *peer_device, *tmp_peer_device; struct gendisk *disk; @@ -2839,7 +2839,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig out_idr_remove_vol: idr_remove(&connection->peer_devices, vnr); out_idr_remove_from_resource: - for_each_connection(connection, resource) { + for_each_connection_safe(connection, n, resource) { peer_device = idr_remove(&connection->peer_devices, vnr); if (peer_device) kref_put(&connection->kref, drbd_destroy_connection); From c24013273ed4a09b1e99720f2a7c8d36dfeb6c2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= Date: Thu, 10 Nov 2022 17:31:44 +0100 Subject: [PATCH 0855/1477] platform/x86/intel: pmc: Don't unconditionally attach Intel PMC when virtualized MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 2dbfb3f33350e1e868d3d7ed4c176d8777150878 ] The current logic in the Intel PMC driver will forcefully attach it when detecting any CPU on the intel_pmc_core_platform_ids array, even if the matching ACPI device is not present. There's no checking in pmc_core_probe() to assert that the PMC device is present, and hence on virtualized environments the PMC device probes successfully, even if the underlying registers are not present. Before commit 21ae43570940 ("platform/x86: intel_pmc_core: Substitute PCI with CPUID enumeration") the driver would check for the presence of a specific PCI device, and that prevented the driver from attaching when running virtualized. Fix by only forcefully attaching the PMC device when not running virtualized. Note that virtualized platforms can still get the device to load if the appropriate ACPI device is present on the tables provided to the VM. Make an exception for the Xen initial domain, which does have full hardware access, and hence can attach to the PMC if present. Fixes: 21ae43570940 ("platform/x86: intel_pmc_core: Substitute PCI with CPUID enumeration") Signed-off-by: Roger Pau Monné Acked-by: David E. Box Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221110163145.80374-1-roger.pau@citrix.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Signed-off-by: Sasha Levin --- drivers/platform/x86/intel_pmc_core_pltdrv.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/platform/x86/intel_pmc_core_pltdrv.c b/drivers/platform/x86/intel_pmc_core_pltdrv.c index 15ca8afdd973..ddfba38c2104 100644 --- a/drivers/platform/x86/intel_pmc_core_pltdrv.c +++ b/drivers/platform/x86/intel_pmc_core_pltdrv.c @@ -18,6 +18,8 @@ #include #include +#include + static void intel_pmc_core_release(struct device *dev) { kfree(dev); @@ -53,6 +55,13 @@ static int __init pmc_core_platform_init(void) if (acpi_dev_present("INT33A1", NULL, -1)) return -ENODEV; + /* + * Skip forcefully attaching the device for VMs. Make an exception for + * Xen dom0, which does have full hardware access. + */ + if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR) && !xen_initial_domain()) + return -ENODEV; + if (!x86_match_cpu(intel_pmc_core_platform_ids)) return -ENODEV; From 3aeb13bc3db2400285eef9002af6e45f93f84d6a Mon Sep 17 00:00:00 2001 From: Anastasia Belova Date: Tue, 15 Nov 2022 17:27:01 +0300 Subject: [PATCH 0856/1477] cifs: add check for returning value of SMB2_close_init [ Upstream commit d520de6cb42e88a1d008b54f935caf9fc05951da ] If the returning value of SMB2_close_init is an error-value, exit the function. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 352d96f3acc6 ("cifs: multichannel: move channel selection above transport layer") Signed-off-by: Anastasia Belova Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/cifs/smb2ops.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 11efd5289ec4..1cc823e96065 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -1266,6 +1266,8 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon, rqst[2].rq_nvec = 1; rc = SMB2_close_init(tcon, server, &rqst[2], COMPOUND_FID, COMPOUND_FID, false); + if (rc) + goto sea_exit; smb2_set_related(&rqst[2]); rc = compound_send_recv(xid, ses, server, From 94822d23310a2718c4214f0a88fabe8a9fe977c6 Mon Sep 17 00:00:00 2001 From: Liu Jian Date: Mon, 14 Nov 2022 17:55:49 +0800 Subject: [PATCH 0857/1477] net: ag71xx: call phylink_disconnect_phy if ag71xx_hw_enable() fail in ag71xx_open() [ Upstream commit c9b895c6878bdb6789dc1d7af60fd10f4a9f1937 ] If ag71xx_hw_enable() fails, call phylink_disconnect_phy() to clean up. And if phylink_of_phy_connect() fails, nothing needs to be done. Compile tested only. Fixes: 892e09153fa3 ("net: ag71xx: port to phylink") Signed-off-by: Liu Jian Reviewed-by: Russell King (Oracle) Link: https://lore.kernel.org/r/20221114095549.40342-1-liujian56@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/atheros/ag71xx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c index c26c9b0c00d8..fe3ca3af431a 100644 --- a/drivers/net/ethernet/atheros/ag71xx.c +++ b/drivers/net/ethernet/atheros/ag71xx.c @@ -1468,7 +1468,7 @@ static int ag71xx_open(struct net_device *ndev) if (ret) { netif_err(ag, link, ndev, "phylink_of_phy_connect filed with err: %i\n", ret); - goto err; + return ret; } max_frame_len = ag71xx_max_frame_len(ndev->mtu); @@ -1489,6 +1489,7 @@ static int ag71xx_open(struct net_device *ndev) err: ag71xx_rings_cleanup(ag); + phylink_disconnect_phy(ag->phylink); return ret; } From 9f00da9c866d506998bf0a3f699ec900730472da Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 14 Nov 2022 11:05:19 +0000 Subject: [PATCH 0858/1477] net/x25: Fix skb leak in x25_lapb_receive_frame() [ Upstream commit 2929cceb2fcf0ded7182562e4888afafece82cce ] x25_lapb_receive_frame() using skb_copy() to get a private copy of skb, the new skb should be freed in the undersized/fragmented skb error handling path. Otherwise there is a memory leak. Fixes: cb101ed2c3c7 ("x25: Handle undersized/fragmented skbs") Signed-off-by: Wei Yongjun Acked-by: Martin Schiller Link: https://lore.kernel.org/r/20221114110519.514538-1-weiyongjun@huaweicloud.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/x25/x25_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c index 25bf72ee6cad..226397add422 100644 --- a/net/x25/x25_dev.c +++ b/net/x25/x25_dev.c @@ -117,7 +117,7 @@ int x25_lapb_receive_frame(struct sk_buff *skb, struct net_device *dev, if (!pskb_may_pull(skb, 1)) { x25_neigh_put(nb); - return 0; + goto drop; } switch (skb->data[0]) { From e13ef43813ebf0584488df98e21eaa049f920d3d Mon Sep 17 00:00:00 2001 From: Zhang Xiaoxu Date: Tue, 15 Nov 2022 18:39:34 +0800 Subject: [PATCH 0859/1477] cifs: Fix wrong return value checking when GETFLAGS [ Upstream commit 92bbd67a55fee50743b42825d1c016e7fd5c79f9 ] The return value of CIFSGetExtAttr is negative, should be checked with -EOPNOTSUPP rather than EOPNOTSUPP. Fixes: 64a5cfa6db94 ("Allow setting per-file compression via SMB2/3") Signed-off-by: Zhang Xiaoxu Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/cifs/ioctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index dcde44ff6cf9..e45598b62242 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -193,7 +193,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) rc = put_user(ExtAttrBits & FS_FL_USER_VISIBLE, (int __user *)arg); - if (rc != EOPNOTSUPP) + if (rc != -EOPNOTSUPP) break; } #endif /* CONFIG_CIFS_POSIX */ @@ -222,7 +222,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) * pSMBFile->fid.netfid, * extAttrBits, * &ExtAttrMask); - * if (rc != EOPNOTSUPP) + * if (rc != -EOPNOTSUPP) * break; */ From 0aeb0de528eccc2b7ff487390a68d2a2bb1083ba Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Mon, 14 Nov 2022 14:22:25 +0000 Subject: [PATCH 0860/1477] net: thunderbolt: Fix error handling in tbnet_init() [ Upstream commit f524b7289bbb0c8ffaa2ba3c34c146e43da54fb2 ] A problem about insmod thunderbolt-net failed is triggered with following log given while lsmod does not show thunderbolt_net: insmod: ERROR: could not insert module thunderbolt-net.ko: File exists The reason is that tbnet_init() returns tb_register_service_driver() directly without checking its return value, if tb_register_service_driver() failed, it returns without removing property directory, resulting the property directory can never be created later. tbnet_init() tb_register_property_dir() # register property directory tb_register_service_driver() driver_register() bus_add_driver() priv = kzalloc(...) # OOM happened # return without remove property directory Fix by remove property directory when tb_register_service_driver() returns error. Fixes: e69b6c02b4c3 ("net: Add support for networking over Thunderbolt cable") Signed-off-by: Yuan Can Acked-by: Mika Westerberg Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/thunderbolt.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/net/thunderbolt.c b/drivers/net/thunderbolt.c index 3160443ef3b9..5d96dc1b00b3 100644 --- a/drivers/net/thunderbolt.c +++ b/drivers/net/thunderbolt.c @@ -1343,12 +1343,21 @@ static int __init tbnet_init(void) TBNET_MATCH_FRAGS_ID); ret = tb_register_property_dir("network", tbnet_dir); - if (ret) { - tb_property_free_dir(tbnet_dir); - return ret; - } + if (ret) + goto err_free_dir; - return tb_register_service_driver(&tbnet_driver); + ret = tb_register_service_driver(&tbnet_driver); + if (ret) + goto err_unregister; + + return 0; + +err_unregister: + tb_unregister_property_dir("network", tbnet_dir); +err_free_dir: + tb_property_free_dir(tbnet_dir); + + return ret; } module_init(tbnet_init); From 5fc19c83132042b6c6a35cd66be4fdf61711760d Mon Sep 17 00:00:00 2001 From: Anastasia Belova Date: Wed, 16 Nov 2022 17:10:27 +0300 Subject: [PATCH 0861/1477] cifs: add check for returning value of SMB2_set_info_init [ Upstream commit a51e5d293dd1c2e7bf6f7be788466cd9b5d280fb ] If the returning value of SMB2_set_info_init is an error-value, exit the function. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 0967e5457954 ("cifs: use a compound for setting an xattr") Signed-off-by: Anastasia Belova Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/cifs/smb2ops.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 1cc823e96065..72368b656b33 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -1256,6 +1256,8 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon, COMPOUND_FID, current->tgid, FILE_FULL_EA_INFORMATION, SMB2_O_INFO_FILE, 0, data, size); + if (rc) + goto sea_exit; smb2_set_next_command(tcon, &rqst[1]); smb2_set_related(&rqst[1]); From 9569eed79bc0c0da0bc9946bede33e3587bd1fb6 Mon Sep 17 00:00:00 2001 From: Wang Wensheng Date: Wed, 9 Nov 2022 09:44:32 +0000 Subject: [PATCH 0862/1477] ftrace: Fix the possible incorrect kernel message commit 08948caebe93482db1adfd2154eba124f66d161d upstream. If the number of mcount entries is an integer multiple of ENTRIES_PER_PAGE, the page count showing on the console would be wrong. Link: https://lkml.kernel.org/r/20221109094434.84046-2-wangwensheng4@huawei.com Cc: Cc: Cc: stable@vger.kernel.org Fixes: 5821e1b74f0d0 ("function tracing: fix wrong pos computing when read buffer has been fulfilled") Signed-off-by: Wang Wensheng Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 8e9ef0f55596..9758bb9cf442 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -6877,7 +6877,7 @@ void __init ftrace_init(void) } pr_info("ftrace: allocating %ld entries in %ld pages\n", - count, count / ENTRIES_PER_PAGE + 1); + count, DIV_ROUND_UP(count, ENTRIES_PER_PAGE)); last_ftrace_enabled = ftrace_enabled = 1; From 6ed60c60ec9050345f024b7e9c7c0db6e8cd31d2 Mon Sep 17 00:00:00 2001 From: Wang Wensheng Date: Wed, 9 Nov 2022 09:44:33 +0000 Subject: [PATCH 0863/1477] ftrace: Optimize the allocation for mcount entries commit bcea02b096333dc74af987cb9685a4dbdd820840 upstream. If we can't allocate this size, try something smaller with half of the size. Its order should be decreased by one instead of divided by two. Link: https://lkml.kernel.org/r/20221109094434.84046-3-wangwensheng4@huawei.com Cc: Cc: Cc: stable@vger.kernel.org Fixes: a79008755497d ("ftrace: Allocate the mcount record pages as groups") Signed-off-by: Wang Wensheng Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 9758bb9cf442..42217126586c 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3178,7 +3178,7 @@ static int ftrace_allocate_records(struct ftrace_page *pg, int count) /* if we can't allocate this size, try something smaller */ if (!order) return -ENOMEM; - order >>= 1; + order--; goto again; } From 6a14828caddad0d989495a72af678adf60992704 Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Wed, 16 Nov 2022 09:52:07 +0800 Subject: [PATCH 0864/1477] ftrace: Fix null pointer dereference in ftrace_add_mod() commit 19ba6c8af9382c4c05dc6a0a79af3013b9a35cd0 upstream. The @ftrace_mod is allocated by kzalloc(), so both the members {prev,next} of @ftrace_mode->list are NULL, it's not a valid state to call list_del(). If kstrdup() for @ftrace_mod->{func|module} fails, it goes to @out_free tag and calls free_ftrace_mod() to destroy @ftrace_mod, then list_del() will write prev->next and next->prev, where null pointer dereference happens. BUG: kernel NULL pointer dereference, address: 0000000000000008 Oops: 0002 [#1] PREEMPT SMP NOPTI Call Trace: ftrace_mod_callback+0x20d/0x220 ? do_filp_open+0xd9/0x140 ftrace_process_regex.isra.51+0xbf/0x130 ftrace_regex_write.isra.52.part.53+0x6e/0x90 vfs_write+0xee/0x3a0 ? __audit_filter_op+0xb1/0x100 ? auditd_test_task+0x38/0x50 ksys_write+0xa5/0xe0 do_syscall_64+0x3a/0x90 entry_SYSCALL_64_after_hwframe+0x63/0xcd Kernel panic - not syncing: Fatal exception So call INIT_LIST_HEAD() to initialize the list member to fix this issue. Link: https://lkml.kernel.org/r/20221116015207.30858-1-xiujianfeng@huawei.com Cc: stable@vger.kernel.org Fixes: 673feb9d76ab ("ftrace: Add :mod: caching infrastructure to trace_array") Signed-off-by: Xiu Jianfeng Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ftrace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 42217126586c..d97c189695cb 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1295,6 +1295,7 @@ static int ftrace_add_mod(struct trace_array *tr, if (!ftrace_mod) return -ENOMEM; + INIT_LIST_HEAD(&ftrace_mod->list); ftrace_mod->func = kstrdup(func, GFP_KERNEL); ftrace_mod->module = kstrdup(module, GFP_KERNEL); ftrace_mod->enable = enable; From 5fdebbeca5dbc2cde6cca9562fc02841e1155c9d Mon Sep 17 00:00:00 2001 From: Daniil Tatianin Date: Mon, 14 Nov 2022 17:31:29 +0300 Subject: [PATCH 0865/1477] ring_buffer: Do not deactivate non-existant pages commit 56f4ca0a79a9f1af98f26c54b9b89ba1f9bcc6bd upstream. rb_head_page_deactivate() expects cpu_buffer to contain a valid list of ->pages, so verify that the list is actually present before calling it. Found by Linux Verification Center (linuxtesting.org) with the SVACE static analysis tool. Link: https://lkml.kernel.org/r/20221114143129.3534443-1-d-tatianin@yandex-team.ru Cc: stable@vger.kernel.org Fixes: 77ae365eca895 ("ring-buffer: make lockless") Signed-off-by: Daniil Tatianin Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ring_buffer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index a12e27815555..1e9de3d49701 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1635,9 +1635,9 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) free_buffer_page(cpu_buffer->reader_page); - rb_head_page_deactivate(cpu_buffer); - if (head) { + rb_head_page_deactivate(cpu_buffer); + list_for_each_entry_safe(bpage, tmp, head, list) { list_del_init(&bpage->list); free_buffer_page(bpage); From 5d4cc7bc1a8d8b05b01800688f0e82781986b905 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 20 Oct 2022 23:14:27 -0400 Subject: [PATCH 0866/1477] tracing/ring-buffer: Have polling block on watermark commit 42fb0a1e84ff525ebe560e2baf9451ab69127e2b upstream. Currently the way polling works on the ring buffer is broken. It will return immediately if there's any data in the ring buffer whereas a read will block until the watermark (defined by the tracefs buffer_percent file) is hit. That is, a select() or poll() will return as if there's data available, but then the following read will block. This is broken for the way select()s and poll()s are supposed to work. Have the polling on the ring buffer also block the same way reads and splice does on the ring buffer. Link: https://lkml.kernel.org/r/20221020231427.41be3f26@gandalf.local.home Cc: Linux Trace Kernel Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Primiano Tucci Cc: stable@vger.kernel.org Fixes: 1e0d6714aceb7 ("ring-buffer: Do not wake up a splice waiter when page is not full") Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- include/linux/ring_buffer.h | 2 +- kernel/trace/ring_buffer.c | 55 ++++++++++++++++++++++++------------- kernel/trace/trace.c | 2 +- 3 files changed, 38 insertions(+), 21 deletions(-) diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index c9237d30c29b..7d5a78f49d43 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -99,7 +99,7 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full); __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, - struct file *filp, poll_table *poll_table); + struct file *filp, poll_table *poll_table, int full); void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu); #define RING_BUFFER_ALL_CPUS -1 diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 1e9de3d49701..9a1f82cafad4 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -784,6 +784,21 @@ size_t ring_buffer_nr_dirty_pages(struct trace_buffer *buffer, int cpu) return cnt - read; } +static __always_inline bool full_hit(struct trace_buffer *buffer, int cpu, int full) +{ + struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; + size_t nr_pages; + size_t dirty; + + nr_pages = cpu_buffer->nr_pages; + if (!nr_pages || !full) + return true; + + dirty = ring_buffer_nr_dirty_pages(buffer, cpu); + + return (dirty * 100) > (full * nr_pages); +} + /* * rb_wake_up_waiters - wake up tasks waiting for ring buffer input * @@ -912,22 +927,20 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full) !ring_buffer_empty_cpu(buffer, cpu)) { unsigned long flags; bool pagebusy; - size_t nr_pages; - size_t dirty; + bool done; if (!full) break; raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page; - nr_pages = cpu_buffer->nr_pages; - dirty = ring_buffer_nr_dirty_pages(buffer, cpu); + done = !pagebusy && full_hit(buffer, cpu, full); + if (!cpu_buffer->shortest_full || cpu_buffer->shortest_full > full) cpu_buffer->shortest_full = full; raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); - if (!pagebusy && - (!nr_pages || (dirty * 100) > full * nr_pages)) + if (done) break; } @@ -953,6 +966,7 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full) * @cpu: the cpu buffer to wait on * @filp: the file descriptor * @poll_table: The poll descriptor + * @full: wait until the percentage of pages are available, if @cpu != RING_BUFFER_ALL_CPUS * * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon * as data is added to any of the @buffer's cpu buffers. Otherwise @@ -962,14 +976,15 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full) * zero otherwise. */ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, - struct file *filp, poll_table *poll_table) + struct file *filp, poll_table *poll_table, int full) { struct ring_buffer_per_cpu *cpu_buffer; struct rb_irq_work *work; - if (cpu == RING_BUFFER_ALL_CPUS) + if (cpu == RING_BUFFER_ALL_CPUS) { work = &buffer->irq_work; - else { + full = 0; + } else { if (!cpumask_test_cpu(cpu, buffer->cpumask)) return -EINVAL; @@ -977,8 +992,14 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, work = &cpu_buffer->irq_work; } - poll_wait(filp, &work->waiters, poll_table); - work->waiters_pending = true; + if (full) { + poll_wait(filp, &work->full_waiters, poll_table); + work->full_waiters_pending = true; + } else { + poll_wait(filp, &work->waiters, poll_table); + work->waiters_pending = true; + } + /* * There's a tight race between setting the waiters_pending and * checking if the ring buffer is empty. Once the waiters_pending bit @@ -994,6 +1015,9 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, */ smp_mb(); + if (full) + return full_hit(buffer, cpu, full) ? EPOLLIN | EPOLLRDNORM : 0; + if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) || (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu))) return EPOLLIN | EPOLLRDNORM; @@ -3033,10 +3057,6 @@ static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, static __always_inline void rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) { - size_t nr_pages; - size_t dirty; - size_t full; - if (buffer->irq_work.waiters_pending) { buffer->irq_work.waiters_pending = false; /* irq_work_queue() supplies it's own memory barriers */ @@ -3060,10 +3080,7 @@ rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) cpu_buffer->last_pages_touch = local_read(&cpu_buffer->pages_touched); - full = cpu_buffer->shortest_full; - nr_pages = cpu_buffer->nr_pages; - dirty = ring_buffer_nr_dirty_pages(buffer, cpu_buffer->cpu); - if (full && nr_pages && (dirty * 100) <= full * nr_pages) + if (!full_hit(buffer, cpu_buffer->cpu, cpu_buffer->shortest_full)) return; cpu_buffer->irq_work.wakeup_full = true; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b7cb9147f0c5..146771d6d007 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6263,7 +6263,7 @@ trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_tabl return EPOLLIN | EPOLLRDNORM; else return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file, - filp, poll_table); + filp, poll_table, iter->tr->buffer_percent); } static __poll_t From 65ba7e7c241122ef0a9e61d1920f2ae9689aa796 Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Thu, 17 Nov 2022 09:23:45 +0800 Subject: [PATCH 0867/1477] tracing: Fix memory leak in test_gen_synth_cmd() and test_empty_synth_event() commit a4527fef9afe5c903c718d0cd24609fe9c754250 upstream. test_gen_synth_cmd() only free buf in fail path, hence buf will leak when there is no failure. Add kfree(buf) to prevent the memleak. The same reason and solution in test_empty_synth_event(). unreferenced object 0xffff8881127de000 (size 2048): comm "modprobe", pid 247, jiffies 4294972316 (age 78.756s) hex dump (first 32 bytes): 20 67 65 6e 5f 73 79 6e 74 68 5f 74 65 73 74 20 gen_synth_test 20 70 69 64 5f 74 20 6e 65 78 74 5f 70 69 64 5f pid_t next_pid_ backtrace: [<000000004254801a>] kmalloc_trace+0x26/0x100 [<0000000039eb1cf5>] 0xffffffffa00083cd [<000000000e8c3bc8>] 0xffffffffa00086ba [<00000000c293d1ea>] do_one_initcall+0xdb/0x480 [<00000000aa189e6d>] do_init_module+0x1cf/0x680 [<00000000d513222b>] load_module+0x6a50/0x70a0 [<000000001fd4d529>] __do_sys_finit_module+0x12f/0x1c0 [<00000000b36c4c0f>] do_syscall_64+0x3f/0x90 [<00000000bbf20cf3>] entry_SYSCALL_64_after_hwframe+0x63/0xcd unreferenced object 0xffff8881127df000 (size 2048): comm "modprobe", pid 247, jiffies 4294972324 (age 78.728s) hex dump (first 32 bytes): 20 65 6d 70 74 79 5f 73 79 6e 74 68 5f 74 65 73 empty_synth_tes 74 20 20 70 69 64 5f 74 20 6e 65 78 74 5f 70 69 t pid_t next_pi backtrace: [<000000004254801a>] kmalloc_trace+0x26/0x100 [<00000000d4db9a3d>] 0xffffffffa0008071 [<00000000c31354a5>] 0xffffffffa00086ce [<00000000c293d1ea>] do_one_initcall+0xdb/0x480 [<00000000aa189e6d>] do_init_module+0x1cf/0x680 [<00000000d513222b>] load_module+0x6a50/0x70a0 [<000000001fd4d529>] __do_sys_finit_module+0x12f/0x1c0 [<00000000b36c4c0f>] do_syscall_64+0x3f/0x90 [<00000000bbf20cf3>] entry_SYSCALL_64_after_hwframe+0x63/0xcd Link: https://lkml.kernel.org/r/20221117012346.22647-2-shangxiaojing@huawei.com Cc: Cc: Cc: Cc: stable@vger.kernel.org Fixes: 9fe41efaca08 ("tracing: Add synth event generation test module") Signed-off-by: Shang XiaoJing Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/synth_event_gen_test.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/kernel/trace/synth_event_gen_test.c b/kernel/trace/synth_event_gen_test.c index edd912cd14aa..a6a2813afb87 100644 --- a/kernel/trace/synth_event_gen_test.c +++ b/kernel/trace/synth_event_gen_test.c @@ -120,15 +120,13 @@ static int __init test_gen_synth_cmd(void) /* Now generate a gen_synth_test event */ ret = synth_event_trace_array(gen_synth_test, vals, ARRAY_SIZE(vals)); - out: + free: + kfree(buf); return ret; delete: /* We got an error after creating the event, delete it */ synth_event_delete("gen_synth_test"); - free: - kfree(buf); - - goto out; + goto free; } /* @@ -227,15 +225,13 @@ static int __init test_empty_synth_event(void) /* Now trace an empty_synth_test event */ ret = synth_event_trace_array(empty_synth_test, vals, ARRAY_SIZE(vals)); - out: + free: + kfree(buf); return ret; delete: /* We got an error after creating the event, delete it */ synth_event_delete("empty_synth_test"); - free: - kfree(buf); - - goto out; + goto free; } static struct synth_field_desc create_synth_test_fields[] = { From 315b149f08229a233d47532eb5da1707b28f764c Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Thu, 17 Nov 2022 09:23:46 +0800 Subject: [PATCH 0868/1477] tracing: Fix wild-memory-access in register_synth_event() commit 1b5f1c34d3f5a664a57a5a7557a50e4e3cc2505c upstream. In register_synth_event(), if set_synth_event_print_fmt() failed, then both trace_remove_event_call() and unregister_trace_event() will be called, which means the trace_event_call will call __unregister_trace_event() twice. As the result, the second unregister will causes the wild-memory-access. register_synth_event set_synth_event_print_fmt failed trace_remove_event_call event_remove if call->event.funcs then __unregister_trace_event (first call) unregister_trace_event __unregister_trace_event (second call) Fix the bug by avoiding to call the second __unregister_trace_event() by checking if the first one is called. general protection fault, probably for non-canonical address 0xfbd59c0000000024: 0000 [#1] SMP KASAN PTI KASAN: maybe wild-memory-access in range [0xdead000000000120-0xdead000000000127] CPU: 0 PID: 3807 Comm: modprobe Not tainted 6.1.0-rc1-00186-g76f33a7eedb4 #299 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.15.0-0-g2dd4b9b3f840-prebuilt.qemu.org 04/01/2014 RIP: 0010:unregister_trace_event+0x6e/0x280 Code: 00 fc ff df 4c 89 ea 48 c1 ea 03 80 3c 02 00 0f 85 0e 02 00 00 48 b8 00 00 00 00 00 fc ff df 4c 8b 63 08 4c 89 e2 48 c1 ea 03 <80> 3c 02 00 0f 85 e2 01 00 00 49 89 2c 24 48 85 ed 74 28 e8 7a 9b RSP: 0018:ffff88810413f370 EFLAGS: 00010a06 RAX: dffffc0000000000 RBX: ffff888105d050b0 RCX: 0000000000000000 RDX: 1bd5a00000000024 RSI: ffff888119e276e0 RDI: ffffffff835a8b20 RBP: dead000000000100 R08: 0000000000000000 R09: fffffbfff0913481 R10: ffffffff8489a407 R11: fffffbfff0913480 R12: dead000000000122 R13: ffff888105d050b8 R14: 0000000000000000 R15: ffff888105d05028 FS: 00007f7823e8d540(0000) GS:ffff888119e00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f7823e7ebec CR3: 000000010a058002 CR4: 0000000000330ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __create_synth_event+0x1e37/0x1eb0 create_or_delete_synth_event+0x110/0x250 synth_event_run_command+0x2f/0x110 test_gen_synth_cmd+0x170/0x2eb [synth_event_gen_test] synth_event_gen_test_init+0x76/0x9bc [synth_event_gen_test] do_one_initcall+0xdb/0x480 do_init_module+0x1cf/0x680 load_module+0x6a50/0x70a0 __do_sys_finit_module+0x12f/0x1c0 do_syscall_64+0x3f/0x90 entry_SYSCALL_64_after_hwframe+0x63/0xcd Link: https://lkml.kernel.org/r/20221117012346.22647-3-shangxiaojing@huawei.com Fixes: 4b147936fa50 ("tracing: Add support for 'synthetic' events") Signed-off-by: Shang XiaoJing Cc: stable@vger.kernel.org Cc: Cc: Cc: Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events_synth.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index 881df991742a..18291ab35657 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -791,10 +791,9 @@ static int register_synth_event(struct synth_event *event) } ret = set_synth_event_print_fmt(call); - if (ret < 0) { + /* unregister_trace_event() will be called inside */ + if (ret < 0) trace_remove_event_call(call); - goto err; - } out: return ret; err: From bb70fcae4115d24b7e8cee17a6da8b1943f546bb Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Fri, 18 Nov 2022 10:15:33 +0900 Subject: [PATCH 0869/1477] tracing: kprobe: Fix potential null-ptr-deref on trace_event_file in kprobe_event_gen_test_exit() commit e0d75267f59d7084e0468bd68beeb1bf9c71d7c0 upstream. When trace_get_event_file() failed, gen_kretprobe_test will be assigned as the error code. If module kprobe_event_gen_test is removed now, the null pointer dereference will happen in kprobe_event_gen_test_exit(). Check if gen_kprobe_test or gen_kretprobe_test is error code or NULL before dereference them. BUG: kernel NULL pointer dereference, address: 0000000000000012 PGD 0 P4D 0 Oops: 0000 [#1] SMP PTI CPU: 3 PID: 2210 Comm: modprobe Not tainted 6.1.0-rc1-00171-g2159299a3b74-dirty #217 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.15.0-0-g2dd4b9b3f840-prebuilt.qemu.org 04/01/2014 RIP: 0010:kprobe_event_gen_test_exit+0x1c/0xb5 [kprobe_event_gen_test] Code: Unable to access opcode bytes at 0xffffffff9ffffff2. RSP: 0018:ffffc900015bfeb8 EFLAGS: 00010246 RAX: ffffffffffffffea RBX: ffffffffa0002080 RCX: 0000000000000000 RDX: ffffffffa0001054 RSI: ffffffffa0001064 RDI: ffffffffdfc6349c RBP: ffffffffa0000000 R08: 0000000000000004 R09: 00000000001e95c0 R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000800 R13: ffffffffa0002420 R14: 0000000000000000 R15: 0000000000000000 FS: 00007f56b75be540(0000) GS:ffff88813bc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffff9ffffff2 CR3: 000000010874a006 CR4: 0000000000330ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __x64_sys_delete_module+0x206/0x380 ? lockdep_hardirqs_on_prepare+0xd8/0x190 ? syscall_enter_from_user_mode+0x1c/0x50 do_syscall_64+0x3f/0x90 entry_SYSCALL_64_after_hwframe+0x63/0xcd Link: https://lore.kernel.org/all/20221108015130.28326-2-shangxiaojing@huawei.com/ Fixes: 64836248dda2 ("tracing: Add kprobe event command generation test module") Signed-off-by: Shang XiaoJing Acked-by: Masami Hiramatsu (Google) Cc: stable@vger.kernel.org Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/kprobe_event_gen_test.c | 44 ++++++++++++++++++---------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/kernel/trace/kprobe_event_gen_test.c b/kernel/trace/kprobe_event_gen_test.c index d81f7c51025c..1c98fafcf333 100644 --- a/kernel/trace/kprobe_event_gen_test.c +++ b/kernel/trace/kprobe_event_gen_test.c @@ -73,6 +73,10 @@ static struct trace_event_file *gen_kretprobe_test; #define KPROBE_GEN_TEST_ARG3 NULL #endif +static bool trace_event_file_is_valid(struct trace_event_file *input) +{ + return input && !IS_ERR(input); +} /* * Test to make sure we can create a kprobe event, then add more @@ -217,10 +221,12 @@ static int __init kprobe_event_gen_test_init(void) ret = test_gen_kretprobe_cmd(); if (ret) { - WARN_ON(trace_array_set_clr_event(gen_kretprobe_test->tr, - "kprobes", - "gen_kretprobe_test", false)); - trace_put_event_file(gen_kretprobe_test); + if (trace_event_file_is_valid(gen_kretprobe_test)) { + WARN_ON(trace_array_set_clr_event(gen_kretprobe_test->tr, + "kprobes", + "gen_kretprobe_test", false)); + trace_put_event_file(gen_kretprobe_test); + } WARN_ON(kprobe_event_delete("gen_kretprobe_test")); } @@ -229,24 +235,30 @@ static int __init kprobe_event_gen_test_init(void) static void __exit kprobe_event_gen_test_exit(void) { - /* Disable the event or you can't remove it */ - WARN_ON(trace_array_set_clr_event(gen_kprobe_test->tr, - "kprobes", - "gen_kprobe_test", false)); + if (trace_event_file_is_valid(gen_kprobe_test)) { + /* Disable the event or you can't remove it */ + WARN_ON(trace_array_set_clr_event(gen_kprobe_test->tr, + "kprobes", + "gen_kprobe_test", false)); + + /* Now give the file and instance back */ + trace_put_event_file(gen_kprobe_test); + } - /* Now give the file and instance back */ - trace_put_event_file(gen_kprobe_test); /* Now unregister and free the event */ WARN_ON(kprobe_event_delete("gen_kprobe_test")); - /* Disable the event or you can't remove it */ - WARN_ON(trace_array_set_clr_event(gen_kretprobe_test->tr, - "kprobes", - "gen_kretprobe_test", false)); + if (trace_event_file_is_valid(gen_kretprobe_test)) { + /* Disable the event or you can't remove it */ + WARN_ON(trace_array_set_clr_event(gen_kretprobe_test->tr, + "kprobes", + "gen_kretprobe_test", false)); + + /* Now give the file and instance back */ + trace_put_event_file(gen_kretprobe_test); + } - /* Now give the file and instance back */ - trace_put_event_file(gen_kretprobe_test); /* Now unregister and free the event */ WARN_ON(kprobe_event_delete("gen_kretprobe_test")); From 28a54854a95923b6266a9479ad660ca2cc0e1d5f Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Fri, 18 Nov 2022 10:15:34 +0900 Subject: [PATCH 0870/1477] tracing: kprobe: Fix potential null-ptr-deref on trace_array in kprobe_event_gen_test_exit() commit 22ea4ca9631eb137e64e5ab899e9c89cb6670959 upstream. When test_gen_kprobe_cmd() failed after kprobe_event_gen_cmd_end(), it will goto delete, which will call kprobe_event_delete() and release the corresponding resource. However, the trace_array in gen_kretprobe_test will point to the invalid resource. Set gen_kretprobe_test to NULL after called kprobe_event_delete() to prevent null-ptr-deref. BUG: kernel NULL pointer dereference, address: 0000000000000070 PGD 0 P4D 0 Oops: 0000 [#1] SMP PTI CPU: 0 PID: 246 Comm: modprobe Tainted: G W 6.1.0-rc1-00174-g9522dc5c87da-dirty #248 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.15.0-0-g2dd4b9b3f840-prebuilt.qemu.org 04/01/2014 RIP: 0010:__ftrace_set_clr_event_nolock+0x53/0x1b0 Code: e8 82 26 fc ff 49 8b 1e c7 44 24 0c ea ff ff ff 49 39 de 0f 84 3c 01 00 00 c7 44 24 18 00 00 00 00 e8 61 26 fc ff 48 8b 6b 10 <44> 8b 65 70 4c 8b 6d 18 41 f7 c4 00 02 00 00 75 2f RSP: 0018:ffffc9000159fe00 EFLAGS: 00010293 RAX: 0000000000000000 RBX: ffff88810971d268 RCX: 0000000000000000 RDX: ffff8881080be600 RSI: ffffffff811b48ff RDI: ffff88810971d058 RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000001 R10: ffffc9000159fe58 R11: 0000000000000001 R12: ffffffffa0001064 R13: ffffffffa000106c R14: ffff88810971d238 R15: 0000000000000000 FS: 00007f89eeff6540(0000) GS:ffff88813b600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000070 CR3: 000000010599e004 CR4: 0000000000330ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __ftrace_set_clr_event+0x3e/0x60 trace_array_set_clr_event+0x35/0x50 ? 0xffffffffa0000000 kprobe_event_gen_test_exit+0xcd/0x10b [kprobe_event_gen_test] __x64_sys_delete_module+0x206/0x380 ? lockdep_hardirqs_on_prepare+0xd8/0x190 ? syscall_enter_from_user_mode+0x1c/0x50 do_syscall_64+0x3f/0x90 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7f89eeb061b7 Link: https://lore.kernel.org/all/20221108015130.28326-3-shangxiaojing@huawei.com/ Fixes: 64836248dda2 ("tracing: Add kprobe event command generation test module") Signed-off-by: Shang XiaoJing Cc: stable@vger.kernel.org Acked-by: Masami Hiramatsu (Google) Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/kprobe_event_gen_test.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/trace/kprobe_event_gen_test.c b/kernel/trace/kprobe_event_gen_test.c index 1c98fafcf333..c736487fc0e4 100644 --- a/kernel/trace/kprobe_event_gen_test.c +++ b/kernel/trace/kprobe_event_gen_test.c @@ -143,6 +143,8 @@ static int __init test_gen_kprobe_cmd(void) kfree(buf); return ret; delete: + if (trace_event_file_is_valid(gen_kprobe_test)) + gen_kprobe_test = NULL; /* We got an error after creating the event, delete it */ ret = kprobe_event_delete("gen_kprobe_test"); goto out; @@ -206,6 +208,8 @@ static int __init test_gen_kretprobe_cmd(void) kfree(buf); return ret; delete: + if (trace_event_file_is_valid(gen_kretprobe_test)) + gen_kretprobe_test = NULL; /* We got an error after creating the event, delete it */ ret = kprobe_event_delete("gen_kretprobe_test"); goto out; From a80369c8ca50bc885d14386087a834659ec54a54 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sat, 12 Nov 2022 15:12:23 +0100 Subject: [PATCH 0871/1477] ALSA: usb-audio: Drop snd_BUG_ON() from snd_usbmidi_output_open() commit ad72c3c3f6eb81d2cb189ec71e888316adada5df upstream. snd_usbmidi_output_open() has a check of the NULL port with snd_BUG_ON(). snd_BUG_ON() was used as this shouldn't have happened, but in reality, the NULL port may be seen when the device gives an invalid endpoint setup at the descriptor, hence the driver skips the allocation. That is, the check itself is valid and snd_BUG_ON() should be dropped from there. Otherwise it's confusing as if it were a real bug, as recently syzbot stumbled on it. Reported-by: syzbot+9abda841d636d86c41da@syzkaller.appspotmail.com Cc: Link: https://lore.kernel.org/r/syzbot+9abda841d636d86c41da@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20221112141223.6144-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/midi.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sound/usb/midi.c b/sound/usb/midi.c index 93fee6e365a6..b02e1a33304f 100644 --- a/sound/usb/midi.c +++ b/sound/usb/midi.c @@ -1149,10 +1149,8 @@ static int snd_usbmidi_output_open(struct snd_rawmidi_substream *substream) port = &umidi->endpoints[i].out->ports[j]; break; } - if (!port) { - snd_BUG(); + if (!port) return -ENXIO; - } substream->runtime->private_data = port; port->state = STATE_UNKNOWN; From c7dcc8948279f0d1e5610fbf1b2411ea88bb5cbb Mon Sep 17 00:00:00 2001 From: Emil Flink Date: Tue, 15 Nov 2022 15:45:01 +0100 Subject: [PATCH 0872/1477] ALSA: hda/realtek: fix speakers for Samsung Galaxy Book Pro commit b18a456330e1c1ca207b57b45872f10336741388 upstream. The Samsung Galaxy Book Pro seems to have the same issue as a few other Samsung laptops, detailed in kernel bug report 207423. Sound from headphone jack works, but not the built-in speakers. alsa-info: http://alsa-project.org/db/?f=b40ba609dc6ae28dc84ad404a0d8a4bbcd8bea6d Signed-off-by: Emil Flink Cc: Link: https://lore.kernel.org/r/20221115144500.7782-1-emil.flink@gmail.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index d427e8939a0e..39ed1462442f 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9007,6 +9007,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x144d, 0xc176, "Samsung Notebook 9 Pro (NP930MBE-K04US)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc189, "Samsung Galaxy Flex Book (NT950QCG-X716)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc18a, "Samsung Galaxy Book Ion (NP930XCJ-K01US)", ALC298_FIXUP_SAMSUNG_AMP), + SND_PCI_QUIRK(0x144d, 0xc1a3, "Samsung Galaxy Book Pro (NP935XDB-KC1SE)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc740, "Samsung Ativ book 8 (NP870Z5G)", ALC269_FIXUP_ATIV_BOOK_8), SND_PCI_QUIRK(0x144d, 0xc812, "Samsung Notebook Pen S (NT950SBE-X58)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc830, "Samsung Galaxy Book Ion (NT950XCJ-X716A)", ALC298_FIXUP_SAMSUNG_AMP), From 100d1e53bb3bea5ec8efaed995b5082032b5e9ab Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 15 Nov 2022 18:02:35 +0100 Subject: [PATCH 0873/1477] ALSA: hda/realtek: Fix the speaker output on Samsung Galaxy Book Pro 360 commit 1abfd71ee8f3ed99c5d0df5d9843a360541d6808 upstream. Samsung Galaxy Book Pro 360 (13" 2021 NP930QBD-ke1US) with codec SSID 144d:c1a6 requires the same workaround for enabling the speaker amp like other Samsung models with ALC298 codec. Link: https://bugzilla.opensuse.org/show_bug.cgi?id=1205100 Cc: Link: https://lore.kernel.org/r/20221115170235.18875-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 39ed1462442f..8011b451902a 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9008,6 +9008,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x144d, 0xc189, "Samsung Galaxy Flex Book (NT950QCG-X716)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc18a, "Samsung Galaxy Book Ion (NP930XCJ-K01US)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc1a3, "Samsung Galaxy Book Pro (NP935XDB-KC1SE)", ALC298_FIXUP_SAMSUNG_AMP), + SND_PCI_QUIRK(0x144d, 0xc1a6, "Samsung Galaxy Book Pro 360 (NP930QBD)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc740, "Samsung Ativ book 8 (NP870Z5G)", ALC269_FIXUP_ATIV_BOOK_8), SND_PCI_QUIRK(0x144d, 0xc812, "Samsung Notebook Pen S (NT950SBE-X58)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc830, "Samsung Galaxy Book Ion (NT950XCJ-X716A)", ALC298_FIXUP_SAMSUNG_AMP), From 15155f7c0e302f9cbf9f0c00cfa4812905a300e7 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 3 Nov 2022 15:46:48 +0100 Subject: [PATCH 0874/1477] Revert "usb: dwc3: disable USB core PHY management" commit 5c294de36e7fb3e0cba0c4e1ef9a5f57bc080d0f upstream. This reverts commit 6000b8d900cd5f52fbcd0776d0cc396e88c8c2ea. The offending commit disabled the USB core PHY management as the dwc3 already manages the PHYs in question. Unfortunately some platforms have started relying on having USB core also controlling the PHY and this is specifically currently needed on some Exynos platforms for PHY calibration or connected device may fail to enumerate. The PHY calibration was previously handled in the dwc3 driver, but to work around some issues related to how the dwc3 driver interacts with xhci (e.g. using multiple drivers) this was moved to USB core by commits 34c7ed72f4f0 ("usb: core: phy: add support for PHY calibration") and a0a465569b45 ("usb: dwc3: remove generic PHY calibrate() calls"). The same PHY obviously should not be controlled from two different places, which for example do no agree on the PHY mode or power state during suspend, but as the offending patch was backported to stable, let's revert it for now. Reported-by: Stefan Agner Link: https://lore.kernel.org/lkml/808bdba846bb60456adf10a3016911ee@agner.ch/ Fixes: 6000b8d900cd ("usb: dwc3: disable USB core PHY management") Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Tested-by: Marek Szyprowski Acked-by: Thinh Nguyen Link: https://lore.kernel.org/r/20221103144648.14197-1-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/host.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/usb/dwc3/host.c b/drivers/usb/dwc3/host.c index 86bc2bec9038..b06ab85f8187 100644 --- a/drivers/usb/dwc3/host.c +++ b/drivers/usb/dwc3/host.c @@ -10,13 +10,8 @@ #include #include -#include "../host/xhci-plat.h" #include "core.h" -static const struct xhci_plat_priv dwc3_xhci_plat_priv = { - .quirks = XHCI_SKIP_PHY_INIT, -}; - static int dwc3_host_get_irq(struct dwc3 *dwc) { struct platform_device *dwc3_pdev = to_platform_device(dwc->dev); @@ -92,11 +87,6 @@ int dwc3_host_init(struct dwc3 *dwc) goto err; } - ret = platform_device_add_data(xhci, &dwc3_xhci_plat_priv, - sizeof(dwc3_xhci_plat_priv)); - if (ret) - goto err; - memset(props, 0, sizeof(struct property_entry) * ARRAY_SIZE(props)); if (dwc->usb3_lpm_capable) From 8cbaf4ed530e2464ff3c7d3abd432b1486bbee77 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 29 Sep 2022 18:52:02 +0200 Subject: [PATCH 0875/1477] slimbus: stream: correct presence rate frequencies commit b9c1939627f8185dec8ba6d741e9573a4c7a5834 upstream. Correct few frequencies in presence rate table - multiplied by 10 (110250 instead of 11025 Hz). Fixes: abb9c9b8b51b ("slimbus: stream: add stream support") Cc: Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20220929165202.410937-1-krzysztof.kozlowski@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/slimbus/stream.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/slimbus/stream.c b/drivers/slimbus/stream.c index 75f87b3d8b95..73a2aa362957 100644 --- a/drivers/slimbus/stream.c +++ b/drivers/slimbus/stream.c @@ -67,10 +67,10 @@ static const int slim_presence_rate_table[] = { 384000, 768000, 0, /* Reserved */ - 110250, - 220500, - 441000, - 882000, + 11025, + 22050, + 44100, + 88200, 176400, 352800, 705600, From eb3af3ea5bcabee193ce31e08fedf55cc3d20b9f Mon Sep 17 00:00:00 2001 From: Mushahid Hussain Date: Mon, 10 Oct 2022 21:57:20 +0500 Subject: [PATCH 0876/1477] speakup: fix a segfault caused by switching consoles commit 0fc801f8018000c8e64a275a20cb1da7c54e46df upstream. This patch fixes a segfault by adding a null check on synth in speakup_con_update(). The segfault can be reproduced as follows: - Login into a text console - Load speakup and speakup_soft modules - Remove speakup_soft - Switch to a graphics console This is caused by lack of a null check on `synth` in speakup_con_update(). Here's the sequence that causes the segfault: - When we remove the speakup_soft, synth_release() sets the synth to null. - After that, when we change the virtual console to graphics console, vt_notifier_call() is fired, which then calls speakup_con_update(). - Inside speakup_con_update() there's no null check on synth, so it calls synth_printf(). - Inside synth_printf(), synth_buffer_add() and synth_start(), both access synth, when it is null and causing a segfault. Therefore adding a null check on synth solves the issue. Fixes: 2610df41489f ("staging: speakup: Add pause command used on switching to graphical mode") Cc: stable Signed-off-by: Mushahid Hussain Signed-off-by: Samuel Thibault Link: https://lore.kernel.org/r/20221010165720.397042-1-mushi.shar@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/accessibility/speakup/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/accessibility/speakup/main.c b/drivers/accessibility/speakup/main.c index 48019660a096..63c5444f0f1a 100644 --- a/drivers/accessibility/speakup/main.c +++ b/drivers/accessibility/speakup/main.c @@ -1780,7 +1780,7 @@ static void speakup_con_update(struct vc_data *vc) { unsigned long flags; - if (!speakup_console[vc->vc_num] || spk_parked) + if (!speakup_console[vc->vc_num] || spk_parked || !synth) return; if (!spin_trylock_irqsave(&speakup_info.spinlock, flags)) /* Speakup output, discard */ From 25c652811ddd99384c6120f22195333174a16de8 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 7 Nov 2022 10:07:53 +0100 Subject: [PATCH 0877/1477] USB: bcma: Make GPIO explicitly optional MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit cd136706b4f925aa5d316642543babac90d45910 upstream. What the code does is to not check the return value from devm_gpiod_get() and then avoid using an erroneous GPIO descriptor with IS_ERR_OR_NULL(). This will miss real errors from the GPIO core that should not be ignored, such as probe deferral. Instead request the GPIO as explicitly optional, which means that if it doesn't exist, the descriptor returned will be NULL. Then we can add error handling and also avoid just doing this on the device tree path, and simplify the site where the optional GPIO descriptor is used. There were some problems with cleaning up this GPIO descriptor use in the past, but this is the proper way to deal with it. Cc: Rafał Miłecki Cc: Chuhong Yuan Signed-off-by: Linus Walleij Cc: stable Link: https://lore.kernel.org/r/20221107090753.1404679-1-linus.walleij@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/bcma-hcd.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/usb/host/bcma-hcd.c b/drivers/usb/host/bcma-hcd.c index 2df52f75f6b3..7558cc4d90cc 100644 --- a/drivers/usb/host/bcma-hcd.c +++ b/drivers/usb/host/bcma-hcd.c @@ -285,7 +285,7 @@ static void bcma_hci_platform_power_gpio(struct bcma_device *dev, bool val) { struct bcma_hcd_device *usb_dev = bcma_get_drvdata(dev); - if (IS_ERR_OR_NULL(usb_dev->gpio_desc)) + if (!usb_dev->gpio_desc) return; gpiod_set_value(usb_dev->gpio_desc, val); @@ -406,9 +406,11 @@ static int bcma_hcd_probe(struct bcma_device *core) return -ENOMEM; usb_dev->core = core; - if (core->dev.of_node) - usb_dev->gpio_desc = devm_gpiod_get(&core->dev, "vcc", - GPIOD_OUT_HIGH); + usb_dev->gpio_desc = devm_gpiod_get_optional(&core->dev, "vcc", + GPIOD_OUT_HIGH); + if (IS_ERR(usb_dev->gpio_desc)) + return dev_err_probe(&core->dev, PTR_ERR(usb_dev->gpio_desc), + "error obtaining VCC GPIO"); switch (core->id.id) { case BCMA_CORE_USB20_HOST: From 878227a3ddb23f26f38fb9e5d460f9f376a44f61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20Monin?= Date: Thu, 13 Oct 2022 16:26:48 +0200 Subject: [PATCH 0878/1477] USB: serial: option: add Sierra Wireless EM9191 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit df3414b0a245f43476061fddd78cee7d6cff797f upstream. Add support for the AT and diag ports, similar to other qualcomm SDX55 modems. In QDL mode, the modem uses a different device ID and support is provided by qcserial in commit 11c52d250b34 ("USB: serial: qcserial: add EM9191 QDL support"). T: Bus=08 Lev=01 Prnt=01 Port=01 Cnt=01 Dev#= 3 Spd=5000 MxCh= 0 D: Ver= 3.20 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 9 #Cfgs= 1 P: Vendor=1199 ProdID=90d3 Rev=00.06 S: Manufacturer=Sierra Wireless, Incorporated S: Product=Sierra Wireless EM9191 S: SerialNumber=xxxxxxxxxxxxxxxx C: #Ifs= 4 Cfg#= 1 Atr=a0 MxPwr=896mA I: If#=0x0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=0e Prot=00 Driver=cdc_mbim I: If#=0x1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I: If#=0x3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) I: If#=0x4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=(none) Signed-off-by: Benoît Monin Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index eea3dd18a044..5dc7b0ead0b5 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -581,6 +581,9 @@ static void option_instat_callback(struct urb *urb); #define OPPO_VENDOR_ID 0x22d9 #define OPPO_PRODUCT_R11 0x276c +/* Sierra Wireless products */ +#define SIERRA_VENDOR_ID 0x1199 +#define SIERRA_PRODUCT_EM9191 0x90d3 /* Device flags */ @@ -2176,6 +2179,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1405, 0xff) }, /* GosunCn GM500 MBIM */ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1406, 0xff) }, /* GosunCn GM500 ECM/NCM */ { USB_DEVICE_AND_INTERFACE_INFO(OPPO_VENDOR_ID, OPPO_PRODUCT_R11, 0xff, 0xff, 0x30) }, + { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0xff, 0x30) }, + { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0, 0) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, option_ids); From de707957d9d45054610a5b1b69de96eca9ac755a Mon Sep 17 00:00:00 2001 From: Davide Tronchin Date: Wed, 16 Nov 2022 16:59:48 +0100 Subject: [PATCH 0879/1477] USB: serial: option: remove old LARA-R6 PID commit 2ec106b96afc19698ff934323b633c0729d4c7f8 upstream. Remove the UBLOX_PRODUCT_R6XX 0x90fa association since LARA-R6 00B final product uses a new USB composition with different PID. 0x90fa PID used only by LARA-R6 internal prototypes. Move 0x90fa PID directly in the option_ids array since used by other Qualcomm based modem vendors as pointed out in: https://lore.kernel.org/all/6572c4e6-d8bc-b8d3-4396-d879e4e76338@gmail.com Signed-off-by: Davide Tronchin Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 5dc7b0ead0b5..d69e250481e6 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -240,7 +240,6 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_PRODUCT_UC15 0x9090 /* These u-blox products use Qualcomm's vendor ID */ #define UBLOX_PRODUCT_R410M 0x90b2 -#define UBLOX_PRODUCT_R6XX 0x90fa /* These Yuga products use Qualcomm's vendor ID */ #define YUGA_PRODUCT_CLM920_NC5 0x9625 @@ -1127,7 +1126,7 @@ static const struct usb_device_id option_ids[] = { /* u-blox products using Qualcomm vendor ID */ { USB_DEVICE(QUALCOMM_VENDOR_ID, UBLOX_PRODUCT_R410M), .driver_info = RSVD(1) | RSVD(3) }, - { USB_DEVICE(QUALCOMM_VENDOR_ID, UBLOX_PRODUCT_R6XX), + { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x90fa), .driver_info = RSVD(3) }, /* Quectel products using Quectel vendor ID */ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC21, 0xff, 0xff, 0xff), From 0e88a3cfa6edcab4bbeab5641d3a74a176a51eeb Mon Sep 17 00:00:00 2001 From: Davide Tronchin Date: Wed, 16 Nov 2022 16:59:49 +0100 Subject: [PATCH 0880/1477] USB: serial: option: add u-blox LARA-R6 00B modem commit d9e37a5c4d80ea25a7171ab8557a449115554e76 upstream. The official LARA-R6 (00B) modem uses 0x908b PID. LARA-R6 00B does not implement a QMI interface on port 4, the reservation (RSVD(4)) has been added to meet other companies that implement QMI on that interface. LARA-R6 00B USB composition exposes the following interfaces: If 0: Diagnostic If 1: AT parser If 2: AT parser If 3: AT parser/alternative functions Signed-off-by: Davide Tronchin Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index d69e250481e6..60820aeb2621 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1126,6 +1126,8 @@ static const struct usb_device_id option_ids[] = { /* u-blox products using Qualcomm vendor ID */ { USB_DEVICE(QUALCOMM_VENDOR_ID, UBLOX_PRODUCT_R410M), .driver_info = RSVD(1) | RSVD(3) }, + { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x908b), /* u-blox LARA-R6 00B */ + .driver_info = RSVD(4) }, { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x90fa), .driver_info = RSVD(3) }, /* Quectel products using Quectel vendor ID */ From 5c44c60358da5a6c92c9be3f7744648db284d9f2 Mon Sep 17 00:00:00 2001 From: Davide Tronchin Date: Wed, 16 Nov 2022 16:59:50 +0100 Subject: [PATCH 0881/1477] USB: serial: option: add u-blox LARA-L6 modem commit c1547f12df8b8e9ca2686accee43213ecd117efe upstream. Add LARA-L6 PIDs for three different USB compositions. LARA-L6 module can be configured (by AT interface) in three different USB modes: * Default mode (Vendor ID: 0x1546 Product ID: 0x1341) with 4 serial interfaces * RmNet mode (Vendor ID: 0x1546 Product ID: 0x1342) with 4 serial interfaces and 1 RmNet virtual network interface * CDC-ECM mode (Vendor ID: 0x1546 Product ID: 0x1343) with 4 serial interface and 1 CDC-ECM virtual network interface In default mode LARA-L6 exposes the following interfaces: If 0: Diagnostic If 1: AT parser If 2: AT parser If 3: AT parser/alternative functions In RmNet mode LARA-L6 exposes the following interfaces: If 0: Diagnostic If 1: AT parser If 2: AT parser If 3: AT parset/alternative functions If 4: RMNET interface In CDC-ECM mode LARA-L6 exposes the following interfaces: If 0: Diagnostic If 1: AT parser If 2: AT parser If 3: AT parset/alternative functions If 4: CDC-ECM interface Signed-off-by: Davide Tronchin [ johan: drop PID defines in favour of comments ] Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 60820aeb2621..fc908e47b7ef 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -162,6 +162,8 @@ static void option_instat_callback(struct urb *urb); #define NOVATELWIRELESS_PRODUCT_G2 0xA010 #define NOVATELWIRELESS_PRODUCT_MC551 0xB001 +#define UBLOX_VENDOR_ID 0x1546 + /* AMOI PRODUCTS */ #define AMOI_VENDOR_ID 0x1614 #define AMOI_PRODUCT_H01 0x0800 @@ -1130,6 +1132,12 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(4) }, { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x90fa), .driver_info = RSVD(3) }, + /* u-blox products */ + { USB_DEVICE(UBLOX_VENDOR_ID, 0x1341) }, /* u-blox LARA-L6 */ + { USB_DEVICE(UBLOX_VENDOR_ID, 0x1342), /* u-blox LARA-L6 (RMNET) */ + .driver_info = RSVD(4) }, + { USB_DEVICE(UBLOX_VENDOR_ID, 0x1343), /* u-blox LARA-L6 (ECM) */ + .driver_info = RSVD(4) }, /* Quectel products using Quectel vendor ID */ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC21, 0xff, 0xff, 0xff), .driver_info = NUMEP2 }, From 249cef723feec8c7a608a97337cb666b86e27624 Mon Sep 17 00:00:00 2001 From: Reinhard Speyerer Date: Wed, 9 Nov 2022 22:24:15 +0100 Subject: [PATCH 0882/1477] USB: serial: option: add Fibocom FM160 0x0111 composition commit 148f4b32b4504d8a32cf82049b7b9499a4b299ab upstream. Add support for the following Fibocom FM160 composition: 0x0111: MBIM + MODEM + DIAG + AT T: Bus=01 Lev=02 Prnt=125 Port=01 Cnt=02 Dev#= 93 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=2cb7 ProdID=0111 Rev= 5.04 S: Manufacturer=Fibocom S: Product=Fibocom FM160 Modem_SN:12345678 S: SerialNumber=12345678 C:* #Ifs= 5 Cfg#= 1 Atr=a0 MxPwr=500mA A: FirstIf#= 0 IfCount= 2 Cls=02(comm.) Sub=0e Prot=00 I:* If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=81(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I:* If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=8e(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=0f(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Reinhard Speyerer Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index fc908e47b7ef..537ef276c78f 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -2179,6 +2179,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x010a, 0xff) }, /* Fibocom MA510 (ECM mode) */ { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0xff, 0x30) }, /* Fibocom FG150 Diag */ { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0, 0) }, /* Fibocom FG150 AT */ + { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0111, 0xff) }, /* Fibocom FM160 (MBIM mode) */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a0, 0xff) }, /* Fibocom NL668-AM/NL652-EU (laptop MBIM) */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a2, 0xff) }, /* Fibocom FM101-GL (laptop MBIM) */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a4, 0xff), /* Fibocom FM101-GL (laptop MBIM) */ From 143ba5c2d2a7e784eeda34dffaa139fcc8315828 Mon Sep 17 00:00:00 2001 From: Nicolas Dumazet Date: Wed, 9 Nov 2022 13:29:46 +0100 Subject: [PATCH 0883/1477] usb: add NO_LPM quirk for Realforce 87U Keyboard commit 181135bb20dcb184edd89817831b888eb8132741 upstream. Before adding this quirk, this (mechanical keyboard) device would not be recognized, logging: new full-speed USB device number 56 using xhci_hcd unable to read config index 0 descriptor/start: -32 chopping to 0 config(s) It would take dozens of plugging/unpuggling cycles for the keyboard to be recognized. Keyboard seems to simply work after applying this quirk. This issue had been reported by users in two places already ([1], [2]) but nobody tried upstreaming a patch yet. After testing I believe their suggested fix (DELAY_INIT + NO_LPM + DEVICE_QUALIFIER) was probably a little overkill. I assume this particular combination was tested because it had been previously suggested in [3], but only NO_LPM seems sufficient for this device. [1]: https://qiita.com/float168/items/fed43d540c8e2201b543 [2]: https://blog.kostic.dev/posts/making-the-realforce-87ub-work-with-usb30-on-Ubuntu/ [3]: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1678477 Cc: stable@vger.kernel.org Signed-off-by: Nicolas Dumazet Link: https://lore.kernel.org/r/20221109122946.706036-1-ndumazet@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index eb3ea45d5d13..6d24d138cc77 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -362,6 +362,9 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x0781, 0x5583), .driver_info = USB_QUIRK_NO_LPM }, { USB_DEVICE(0x0781, 0x5591), .driver_info = USB_QUIRK_NO_LPM }, + /* Realforce 87U Keyboard */ + { USB_DEVICE(0x0853, 0x011b), .driver_info = USB_QUIRK_NO_LPM }, + /* M-Systems Flash Disk Pioneers */ { USB_DEVICE(0x08ec, 0x1000), .driver_info = USB_QUIRK_RESET_RESUME }, From efaab055201b4e44824676f4281ba5bfcaa4a588 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Sun, 18 Sep 2022 11:33:12 +0800 Subject: [PATCH 0884/1477] usb: chipidea: fix deadlock in ci_otg_del_timer commit 7a58b8d6021426b796eebfae80983374d9a80a75 upstream. There is a deadlock in ci_otg_del_timer(), the process is shown below: (thread 1) | (thread 2) ci_otg_del_timer() | ci_otg_hrtimer_func() ... | spin_lock_irqsave() //(1) | ... ... | hrtimer_cancel() | spin_lock_irqsave() //(2) (block forever) We hold ci->lock in position (1) and use hrtimer_cancel() to wait ci_otg_hrtimer_func() to stop, but ci_otg_hrtimer_func() also need ci->lock in position (2). As a result, the hrtimer_cancel() in ci_otg_del_timer() will be blocked forever. This patch extracts hrtimer_cancel() from the protection of spin_lock_irqsave() in order that the ci_otg_hrtimer_func() could obtain the ci->lock. What`s more, there will be no race happen. Because the "next_timer" is always under the protection of spin_lock_irqsave() and we only check whether "next_timer" equals to NUM_OTG_FSM_TIMERS in the following code. Fixes: 3a316ec4c91c ("usb: chipidea: use hrtimer for otg fsm timers") Cc: stable Signed-off-by: Duoming Zhou Link: https://lore.kernel.org/r/20220918033312.94348-1-duoming@zju.edu.cn Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/otg_fsm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/chipidea/otg_fsm.c b/drivers/usb/chipidea/otg_fsm.c index 6ed4b00dba96..7a2a9559693f 100644 --- a/drivers/usb/chipidea/otg_fsm.c +++ b/drivers/usb/chipidea/otg_fsm.c @@ -256,8 +256,10 @@ static void ci_otg_del_timer(struct ci_hdrc *ci, enum otg_fsm_timer t) ci->enabled_otg_timer_bits &= ~(1 << t); if (ci->next_otg_timer == t) { if (ci->enabled_otg_timer_bits == 0) { + spin_unlock_irqrestore(&ci->lock, flags); /* No enabled timers after delete it */ hrtimer_cancel(&ci->otg_fsm_hrtimer); + spin_lock_irqsave(&ci->lock, flags); ci->next_otg_timer = NUM_OTG_FSM_TIMERS; } else { /* Find the next timer */ From 85cc1a2fd8bf33510aed5fafbf61abe21abaf95a Mon Sep 17 00:00:00 2001 From: Rajat Khandelwal Date: Mon, 24 Oct 2022 22:46:11 +0530 Subject: [PATCH 0885/1477] usb: typec: mux: Enter safe mode only when pins need to be reconfigured commit 40bf8f162d0f95e0716e479d7db41443d931765c upstream. There is no point to enter safe mode during DP/TBT configuration if the DP/TBT was already configured in mux. This is because safe mode is only applicable when there is a need to reconfigure the pins in order to avoid damage within/to port partner. In some chrome systems, IOM/mux is already configured before OS comes up. Thus, when driver is probed, it blindly enters safe mode due to PD negotiations but only after gfx driver lowers dp_phy_ownership, will the IOM complete safe mode and send an ack to PMC. Since, that never happens, we see IPC timeout. Hence, allow safe mode only when pin reconfiguration is not required, which makes sense. Fixes: 43d596e32276 ("usb: typec: intel_pmc_mux: Check the port status before connect") Cc: stable Signed-off-by: Rajat Khandelwal Signed-off-by: Lee Shawn C Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20221024171611.181468-1-rajat.khandelwal@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/mux/intel_pmc_mux.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/usb/typec/mux/intel_pmc_mux.c b/drivers/usb/typec/mux/intel_pmc_mux.c index 80daa70e288b..1276112edeff 100644 --- a/drivers/usb/typec/mux/intel_pmc_mux.c +++ b/drivers/usb/typec/mux/intel_pmc_mux.c @@ -339,13 +339,24 @@ pmc_usb_mux_usb4(struct pmc_usb_port *port, struct typec_mux_state *state) return pmc_usb_command(port, (void *)&req, sizeof(req)); } -static int pmc_usb_mux_safe_state(struct pmc_usb_port *port) +static int pmc_usb_mux_safe_state(struct pmc_usb_port *port, + struct typec_mux_state *state) { u8 msg; if (IOM_PORT_ACTIVITY_IS(port->iom_status, SAFE_MODE)) return 0; + if ((IOM_PORT_ACTIVITY_IS(port->iom_status, DP) || + IOM_PORT_ACTIVITY_IS(port->iom_status, DP_MFD)) && + state->alt && state->alt->svid == USB_TYPEC_DP_SID) + return 0; + + if ((IOM_PORT_ACTIVITY_IS(port->iom_status, TBT) || + IOM_PORT_ACTIVITY_IS(port->iom_status, ALT_MODE_TBT_USB)) && + state->alt && state->alt->svid == USB_TYPEC_TBT_SID) + return 0; + msg = PMC_USB_SAFE_MODE; msg |= port->usb3_port << PMC_USB_MSG_USB3_PORT_SHIFT; @@ -413,7 +424,7 @@ pmc_usb_mux_set(struct typec_mux *mux, struct typec_mux_state *state) return 0; if (state->mode == TYPEC_STATE_SAFE) - return pmc_usb_mux_safe_state(port); + return pmc_usb_mux_safe_state(port, state); if (state->mode == TYPEC_STATE_USB) return pmc_usb_connect(port, port->role); From 85d2a8b287a89853c0dcfc5a97b5e9d36376fe37 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Mon, 24 Oct 2022 16:45:11 +0800 Subject: [PATCH 0886/1477] iio: adc: at91_adc: fix possible memory leak in at91_adc_allocate_trigger() commit 65f20301607d07ee279b0804d11a05a62a6c1a1c upstream. If iio_trigger_register() returns error, it should call iio_trigger_free() to give up the reference that hold in iio_trigger_alloc(), so that it can call iio_trig_release() to free memory when the refcount hit to 0. Fixes: 0e589d5fb317 ("ARM: AT91: IIO: Add AT91 ADC driver.") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221024084511.815096-1-yangyingliang@huawei.com Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/at91_adc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iio/adc/at91_adc.c b/drivers/iio/adc/at91_adc.c index 0a793e7cd53e..38d4a910bc52 100644 --- a/drivers/iio/adc/at91_adc.c +++ b/drivers/iio/adc/at91_adc.c @@ -616,8 +616,10 @@ static struct iio_trigger *at91_adc_allocate_trigger(struct iio_dev *idev, trig->ops = &at91_adc_trigger_ops; ret = iio_trigger_register(trig); - if (ret) + if (ret) { + iio_trigger_free(trig); return NULL; + } return trig; } From 8dddf2699da296c84205582aaead6b43dd7e8c4b Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Sat, 22 Oct 2022 15:42:12 +0800 Subject: [PATCH 0887/1477] iio: trigger: sysfs: fix possible memory leak in iio_sysfs_trig_init() commit efa17e90e1711bdb084e3954fa44afb6647331c0 upstream. dev_set_name() allocates memory for name, it need be freed when device_add() fails, call put_device() to give up the reference that hold in device_initialize(), so that it can be freed in kobject_cleanup() when the refcount hit to 0. Fault injection test can trigger this: unreferenced object 0xffff8e8340a7b4c0 (size 32): comm "modprobe", pid 243, jiffies 4294678145 (age 48.845s) hex dump (first 32 bytes): 69 69 6f 5f 73 79 73 66 73 5f 74 72 69 67 67 65 iio_sysfs_trigge 72 00 a7 40 83 8e ff ff 00 86 13 c4 f6 ee ff ff r..@............ backtrace: [<0000000074999de8>] __kmem_cache_alloc_node+0x1e9/0x360 [<00000000497fd30b>] __kmalloc_node_track_caller+0x44/0x1a0 [<000000003636c520>] kstrdup+0x2d/0x60 [<0000000032f84da2>] kobject_set_name_vargs+0x1e/0x90 [<0000000092efe493>] dev_set_name+0x4e/0x70 Fixes: 1f785681a870 ("staging:iio:trigger sysfs userspace trigger rework.") Signed-off-by: Yang Yingliang Cc: Link: https://lore.kernel.org/r/20221022074212.1386424-1-yangyingliang@huawei.com Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/trigger/iio-trig-sysfs.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/iio/trigger/iio-trig-sysfs.c b/drivers/iio/trigger/iio-trig-sysfs.c index 2277d6336ac0..9ed5b9405ade 100644 --- a/drivers/iio/trigger/iio-trig-sysfs.c +++ b/drivers/iio/trigger/iio-trig-sysfs.c @@ -209,9 +209,13 @@ static int iio_sysfs_trigger_remove(int id) static int __init iio_sysfs_trig_init(void) { + int ret; device_initialize(&iio_sysfs_trig_dev); dev_set_name(&iio_sysfs_trig_dev, "iio_sysfs_trigger"); - return device_add(&iio_sysfs_trig_dev); + ret = device_add(&iio_sysfs_trig_dev); + if (ret) + put_device(&iio_sysfs_trig_dev); + return ret; } module_init(iio_sysfs_trig_init); From 46b8bc62c5ea20416a7dd9778624924304922a51 Mon Sep 17 00:00:00 2001 From: Saravanan Sekar Date: Sat, 29 Oct 2022 11:29:53 +0200 Subject: [PATCH 0888/1477] iio: adc: mp2629: fix wrong comparison of channel commit 1eb20332a082fa801fb89c347c5e62de916a4001 upstream. Input voltage channel enum is compared against iio address instead of the channel. Fixes: 7abd9fb64682 ("iio: adc: mp2629: Add support for mp2629 ADC driver") Signed-off-by: Saravanan Sekar Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221029093000.45451-2-sravanhome@gmail.com Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/mp2629_adc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/mp2629_adc.c b/drivers/iio/adc/mp2629_adc.c index 331a9a728217..a9695e1bc172 100644 --- a/drivers/iio/adc/mp2629_adc.c +++ b/drivers/iio/adc/mp2629_adc.c @@ -73,7 +73,7 @@ static int mp2629_read_raw(struct iio_dev *indio_dev, if (ret) return ret; - if (chan->address == MP2629_INPUT_VOLT) + if (chan->channel == MP2629_INPUT_VOLT) rval &= GENMASK(6, 0); *val = rval; return IIO_VAL_INT; From d95b85c5084ad70011988861ee864529eefa1da0 Mon Sep 17 00:00:00 2001 From: Saravanan Sekar Date: Sat, 29 Oct 2022 11:29:55 +0200 Subject: [PATCH 0889/1477] iio: adc: mp2629: fix potential array out of bound access commit ca1547ab15f48dc81624183ae17a2fd1bad06dfc upstream. Add sentinel at end of maps to avoid potential array out of bound access in iio core. Fixes: 7abd9fb64682 ("iio: adc: mp2629: Add support for mp2629 ADC driver") Signed-off-by: Saravanan Sekar Link: https://lore.kernel.org/r/20221029093000.45451-4-sravanhome@gmail.com Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/mp2629_adc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iio/adc/mp2629_adc.c b/drivers/iio/adc/mp2629_adc.c index a9695e1bc172..acd9420c0416 100644 --- a/drivers/iio/adc/mp2629_adc.c +++ b/drivers/iio/adc/mp2629_adc.c @@ -56,7 +56,8 @@ static struct iio_map mp2629_adc_maps[] = { MP2629_MAP(SYSTEM_VOLT, "system-volt"), MP2629_MAP(INPUT_VOLT, "input-volt"), MP2629_MAP(BATT_CURRENT, "batt-current"), - MP2629_MAP(INPUT_CURRENT, "input-current") + MP2629_MAP(INPUT_CURRENT, "input-current"), + { } }; static int mp2629_read_raw(struct iio_dev *indio_dev, From 67a75a9480fc4f73465a90e6a5e5ee12e9de4d39 Mon Sep 17 00:00:00 2001 From: Mitja Spes Date: Fri, 21 Oct 2022 15:58:21 +0200 Subject: [PATCH 0890/1477] iio: pressure: ms5611: changed hardcoded SPI speed to value limited commit 741cec30cc52058d1c10d415f3b98319887e4f73 upstream. Don't hardcode the ms5611 SPI speed, limit it instead. Signed-off-by: Mitja Spes Fixes: c0644160a8b5 ("iio: pressure: add support for MS5611 pressure and temperature sensor") Link: https://lore.kernel.org/r/20221021135827.1444793-3-mitja@lxnav.com Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/pressure/ms5611_spi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/pressure/ms5611_spi.c b/drivers/iio/pressure/ms5611_spi.c index 45d3a7d5be8e..f7743ee3318f 100644 --- a/drivers/iio/pressure/ms5611_spi.c +++ b/drivers/iio/pressure/ms5611_spi.c @@ -94,7 +94,7 @@ static int ms5611_spi_probe(struct spi_device *spi) spi_set_drvdata(spi, indio_dev); spi->mode = SPI_MODE_0; - spi->max_speed_hz = 20000000; + spi->max_speed_hz = min(spi->max_speed_hz, 20000000U); spi->bits_per_word = 8; ret = spi_setup(spi); if (ret < 0) From f59f5a269ca5e43c567aca7f1f52500a0186e9b7 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 1 Nov 2022 16:53:35 -0400 Subject: [PATCH 0891/1477] dm ioctl: fix misbehavior if list_versions races with module loading commit 4fe1ec995483737f3d2a14c3fe1d8fe634972979 upstream. __list_versions will first estimate the required space using the "dm_target_iterate(list_version_get_needed, &needed)" call and then will fill the space using the "dm_target_iterate(list_version_get_info, &iter_info)" call. Each of these calls locks the targets using the "down_read(&_lock)" and "up_read(&_lock)" calls, however between the first and second "dm_target_iterate" there is no lock held and the target modules can be loaded at this point, so the second "dm_target_iterate" call may need more space than what was the first "dm_target_iterate" returned. The code tries to handle this overflow (see the beginning of list_version_get_info), however this handling is incorrect. The code sets "param->data_size = param->data_start + needed" and "iter_info.end = (char *)vers+len" - "needed" is the size returned by the first dm_target_iterate call; "len" is the size of the buffer allocated by userspace. "len" may be greater than "needed"; in this case, the code will write up to "len" bytes into the buffer, however param->data_size is set to "needed", so it may write data past the param->data_size value. The ioctl interface copies only up to param->data_size into userspace, thus part of the result will be truncated. Fix this bug by setting "iter_info.end = (char *)vers + needed;" - this guarantees that the second "dm_target_iterate" call will write only up to the "needed" buffer and it will exit with "DM_BUFFER_FULL_FLAG" if it overflows the "needed" space - in this case, userspace will allocate a larger buffer and retry. Note that there is also a bug in list_version_get_needed - we need to add "strlen(tt->name) + 1" to the needed size, not "strlen(tt->name)". Cc: stable@vger.kernel.org Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-ioctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index b839705654d4..20171c9d8952 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -573,7 +573,7 @@ static void list_version_get_needed(struct target_type *tt, void *needed_param) size_t *needed = needed_param; *needed += sizeof(struct dm_target_versions); - *needed += strlen(tt->name); + *needed += strlen(tt->name) + 1; *needed += ALIGN_MASK; } @@ -638,7 +638,7 @@ static int __list_versions(struct dm_ioctl *param, size_t param_size, const char iter_info.old_vers = NULL; iter_info.vers = vers; iter_info.flags = 0; - iter_info.end = (char *)vers+len; + iter_info.end = (char *)vers + needed; /* * Now loop through filling out the names & versions. From a5eaad87bfca23b851a68f1f233ddd6f0bb25192 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 8 Nov 2022 14:19:49 +0200 Subject: [PATCH 0892/1477] serial: 8250: Fall back to non-DMA Rx if IIR_RDI occurs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit a931237cbea256aff13bb403da13a97b2d1605d9 upstream. DW UART sometimes triggers IIR_RDI during DMA Rx when IIR_RX_TIMEOUT should have been triggered instead. Since IIR_RDI has higher priority than IIR_RX_TIMEOUT, this causes the Rx to hang into interrupt loop. The problem seems to occur at least with some combinations of small-sized transfers (I've reproduced the problem on Elkhart Lake PSE UARTs). If there's already an on-going Rx DMA and IIR_RDI triggers, fall graciously back to non-DMA Rx. That is, behave as if IIR_RX_TIMEOUT had occurred. 8250_omap already considers IIR_RDI similar to this change so its nothing unheard of. Fixes: 75df022b5f89 ("serial: 8250_dma: Fix RX handling") Cc: Co-developed-by: Srikanth Thokala Signed-off-by: Srikanth Thokala Co-developed-by: Aman Kumar Signed-off-by: Aman Kumar Signed-off-by: Ilpo Järvinen Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221108121952.5497-2-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_port.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 7cdfc2458d36..7a15bb14194c 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -1869,6 +1869,10 @@ EXPORT_SYMBOL_GPL(serial8250_modem_status); static bool handle_rx_dma(struct uart_8250_port *up, unsigned int iir) { switch (iir & 0x3f) { + case UART_IIR_RDI: + if (!up->dma->rx_running) + break; + fallthrough; case UART_IIR_RX_TIMEOUT: serial8250_rx_dma_flush(up); fallthrough; From 8679087e93574742d7cceb001d7a1bd558585395 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 8 Nov 2022 14:19:52 +0200 Subject: [PATCH 0893/1477] serial: 8250: Flush DMA Rx on RLSI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 1980860e0c8299316cddaf0992dd9e1258ec9d88 upstream. Returning true from handle_rx_dma() without flushing DMA first creates a data ordering hazard. If DMA Rx has handled any character at the point when RLSI occurs, the non-DMA path handles any pending characters jumping them ahead of those characters that are pending under DMA. Fixes: 75df022b5f89 ("serial: 8250_dma: Fix RX handling") Cc: Signed-off-by: Ilpo Järvinen Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221108121952.5497-5-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_port.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 7a15bb14194c..1f231fcda657 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -1873,10 +1873,9 @@ static bool handle_rx_dma(struct uart_8250_port *up, unsigned int iir) if (!up->dma->rx_running) break; fallthrough; + case UART_IIR_RLSI: case UART_IIR_RX_TIMEOUT: serial8250_rx_dma_flush(up); - fallthrough; - case UART_IIR_RLSI: return true; } return up->dma->rx_dma(up); From 5f4611fe012ff7f2a7801db50b94867ffb858fc4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 8 Nov 2022 14:19:50 +0200 Subject: [PATCH 0894/1477] serial: 8250_lpss: Configure DMA also w/o DMA filter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 1bfcbe5805d0cfc83c3544dcd01e0a282c1f6790 upstream. If the platform doesn't use DMA device filter (as is the case with Elkhart Lake), whole lpss8250_dma_setup() setup is skipped. This results in skipping also *_maxburst setup which is undesirable. Refactor lpss8250_dma_setup() to configure DMA even if filter is not setup. Cc: stable Signed-off-by: Ilpo Järvinen Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221108121952.5497-3-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_lpss.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/tty/serial/8250/8250_lpss.c b/drivers/tty/serial/8250/8250_lpss.c index dfb730b7ea2a..1349c161c192 100644 --- a/drivers/tty/serial/8250/8250_lpss.c +++ b/drivers/tty/serial/8250/8250_lpss.c @@ -268,8 +268,13 @@ static int lpss8250_dma_setup(struct lpss8250 *lpss, struct uart_8250_port *port struct dw_dma_slave *rx_param, *tx_param; struct device *dev = port->port.dev; - if (!lpss->dma_param.dma_dev) + if (!lpss->dma_param.dma_dev) { + dma = port->dma; + if (dma) + goto out_configuration_only; + return 0; + } rx_param = devm_kzalloc(dev, sizeof(*rx_param), GFP_KERNEL); if (!rx_param) @@ -280,16 +285,18 @@ static int lpss8250_dma_setup(struct lpss8250 *lpss, struct uart_8250_port *port return -ENOMEM; *rx_param = lpss->dma_param; - dma->rxconf.src_maxburst = lpss->dma_maxburst; - *tx_param = lpss->dma_param; - dma->txconf.dst_maxburst = lpss->dma_maxburst; dma->fn = lpss8250_dma_filter; dma->rx_param = rx_param; dma->tx_param = tx_param; port->dma = dma; + +out_configuration_only: + dma->rxconf.src_maxburst = lpss->dma_maxburst; + dma->txconf.dst_maxburst = lpss->dma_maxburst; + return 0; } From 24cc679abbf31477d0cc6106ec83c2fbae6b3cdf Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Mon, 7 Nov 2022 10:21:40 -0800 Subject: [PATCH 0895/1477] Input: iforce - invert valid length check when fetching device IDs commit b8ebf250997c5fb253582f42bfe98673801ebebd upstream. syzbot is reporting uninitialized value at iforce_init_device() [1], for commit 6ac0aec6b0a6 ("Input: iforce - allow callers supply data buffer when fetching device IDs") is checking that valid length is shorter than bytes to read. Since iforce_get_id_packet() stores valid length when returning 0, the caller needs to check that valid length is longer than or equals to bytes to read. Reported-by: syzbot Signed-off-by: Tetsuo Handa Fixes: 6ac0aec6b0a6 ("Input: iforce - allow callers supply data buffer when fetching device IDs") Link: https://lore.kernel.org/r/531fb432-7396-ad37-ecba-3e42e7f56d5c@I-love.SAKURA.ne.jp Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/joystick/iforce/iforce-main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/input/joystick/iforce/iforce-main.c b/drivers/input/joystick/iforce/iforce-main.c index b86de1312512..84b87526b7ba 100644 --- a/drivers/input/joystick/iforce/iforce-main.c +++ b/drivers/input/joystick/iforce/iforce-main.c @@ -273,22 +273,22 @@ int iforce_init_device(struct device *parent, u16 bustype, * Get device info. */ - if (!iforce_get_id_packet(iforce, 'M', buf, &len) || len < 3) + if (!iforce_get_id_packet(iforce, 'M', buf, &len) && len >= 3) input_dev->id.vendor = get_unaligned_le16(buf + 1); else dev_warn(&iforce->dev->dev, "Device does not respond to id packet M\n"); - if (!iforce_get_id_packet(iforce, 'P', buf, &len) || len < 3) + if (!iforce_get_id_packet(iforce, 'P', buf, &len) && len >= 3) input_dev->id.product = get_unaligned_le16(buf + 1); else dev_warn(&iforce->dev->dev, "Device does not respond to id packet P\n"); - if (!iforce_get_id_packet(iforce, 'B', buf, &len) || len < 3) + if (!iforce_get_id_packet(iforce, 'B', buf, &len) && len >= 3) iforce->device_memory.end = get_unaligned_le16(buf + 1); else dev_warn(&iforce->dev->dev, "Device does not respond to id packet B\n"); - if (!iforce_get_id_packet(iforce, 'N', buf, &len) || len < 2) + if (!iforce_get_id_packet(iforce, 'N', buf, &len) && len >= 2) ff_effects = buf[1]; else dev_warn(&iforce->dev->dev, "Device does not respond to id packet N\n"); From db744288af730abb66312f40b087d1dbf794c5f4 Mon Sep 17 00:00:00 2001 From: Alban Crequy Date: Thu, 10 Nov 2022 09:56:13 +0100 Subject: [PATCH 0896/1477] maccess: Fix writing offset in case of fault in strncpy_from_kernel_nofault() commit 8678ea06852cd1f819b870c773d43df888d15d46 upstream. If a page fault occurs while copying the first byte, this function resets one byte before dst. As a consequence, an address could be modified and leaded to kernel crashes if case the modified address was accessed later. Fixes: b58294ead14c ("maccess: allow architectures to provide kernel probing directly") Signed-off-by: Alban Crequy Signed-off-by: Andrii Nakryiko Tested-by: Francis Laniel Reviewed-by: Andrew Morton Cc: [5.8] Link: https://lore.kernel.org/bpf/20221110085614.111213-2-albancrequy@linux.microsoft.com Signed-off-by: Greg Kroah-Hartman --- mm/maccess.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/maccess.c b/mm/maccess.c index 3bd70405f2d8..f6ea117a69eb 100644 --- a/mm/maccess.c +++ b/mm/maccess.c @@ -83,7 +83,7 @@ long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr, long count) return src - unsafe_addr; Efault: pagefault_enable(); - dst[-1] = '\0'; + dst[0] = '\0'; return -EFAULT; } #else /* HAVE_GET_KERNEL_NOFAULT */ From d2c7d8f58e9cde8ac8d1f75e9d66c2a813ffe0ab Mon Sep 17 00:00:00 2001 From: Benjamin Block Date: Wed, 16 Nov 2022 11:50:37 +0100 Subject: [PATCH 0897/1477] scsi: zfcp: Fix double free of FSF request when qdio send fails commit 0954256e970ecf371b03a6c9af2cf91b9c4085ff upstream. We used to use the wrong type of integer in 'zfcp_fsf_req_send()' to cache the FSF request ID when sending a new FSF request. This is used in case the sending fails and we need to remove the request from our internal hash table again (so we don't keep an invalid reference and use it when we free the request again). In 'zfcp_fsf_req_send()' we used to cache the ID as 'int' (signed and 32 bit wide), but the rest of the zfcp code (and the firmware specification) handles the ID as 'unsigned long'/'u64' (unsigned and 64 bit wide [s390x ELF ABI]). For one this has the obvious problem that when the ID grows past 32 bit (this can happen reasonably fast) it is truncated to 32 bit when storing it in the cache variable and so doesn't match the original ID anymore. The second less obvious problem is that even when the original ID has not yet grown past 32 bit, as soon as the 32nd bit is set in the original ID (0x80000000 = 2'147'483'648) we will have a mismatch when we cast it back to 'unsigned long'. As the cached variable is of a signed type, the compiler will choose a sign-extending instruction to load the 32 bit variable into a 64 bit register (e.g.: 'lgf %r11,188(%r15)'). So once we pass the cached variable into 'zfcp_reqlist_find_rm()' to remove the request again all the leading zeros will be flipped to ones to extend the sign and won't match the original ID anymore (this has been observed in practice). If we can't successfully remove the request from the hash table again after 'zfcp_qdio_send()' fails (this happens regularly when zfcp cannot notify the adapter about new work because the adapter is already gone during e.g. a ChpID toggle) we will end up with a double free. We unconditionally free the request in the calling function when 'zfcp_fsf_req_send()' fails, but because the request is still in the hash table we end up with a stale memory reference, and once the zfcp adapter is either reset during recovery or shutdown we end up freeing the same memory twice. The resulting stack traces vary depending on the kernel and have no direct correlation to the place where the bug occurs. Here are three examples that have been seen in practice: list_del corruption. next->prev should be 00000001b9d13800, but was 00000000dead4ead. (next=00000001bd131a00) ------------[ cut here ]------------ kernel BUG at lib/list_debug.c:62! monitor event: 0040 ilc:2 [#1] PREEMPT SMP Modules linked in: ... CPU: 9 PID: 1617 Comm: zfcperp0.0.1740 Kdump: loaded Hardware name: ... Krnl PSW : 0704d00180000000 00000003cbeea1f8 (__list_del_entry_valid+0x98/0x140) R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:1 PM:0 RI:0 EA:3 Krnl GPRS: 00000000916d12f1 0000000080000000 000000000000006d 00000003cb665cd6 0000000000000001 0000000000000000 0000000000000000 00000000d28d21e8 00000000d3844000 00000380099efd28 00000001bd131a00 00000001b9d13800 00000000d3290100 0000000000000000 00000003cbeea1f4 00000380099efc70 Krnl Code: 00000003cbeea1e8: c020004f68a7 larl %r2,00000003cc8d7336 00000003cbeea1ee: c0e50027fd65 brasl %r14,00000003cc3e9cb8 #00000003cbeea1f4: af000000 mc 0,0 >00000003cbeea1f8: c02000920440 larl %r2,00000003cd12aa78 00000003cbeea1fe: c0e500289c25 brasl %r14,00000003cc3fda48 00000003cbeea204: b9040043 lgr %r4,%r3 00000003cbeea208: b9040051 lgr %r5,%r1 00000003cbeea20c: b9040032 lgr %r3,%r2 Call Trace: [<00000003cbeea1f8>] __list_del_entry_valid+0x98/0x140 ([<00000003cbeea1f4>] __list_del_entry_valid+0x94/0x140) [<000003ff7ff502fe>] zfcp_fsf_req_dismiss_all+0xde/0x150 [zfcp] [<000003ff7ff49cd0>] zfcp_erp_strategy_do_action+0x160/0x280 [zfcp] [<000003ff7ff4a22e>] zfcp_erp_strategy+0x21e/0xca0 [zfcp] [<000003ff7ff4ad34>] zfcp_erp_thread+0x84/0x1a0 [zfcp] [<00000003cb5eece8>] kthread+0x138/0x150 [<00000003cb557f3c>] __ret_from_fork+0x3c/0x60 [<00000003cc4172ea>] ret_from_fork+0xa/0x40 INFO: lockdep is turned off. Last Breaking-Event-Address: [<00000003cc3e9d04>] _printk+0x4c/0x58 Kernel panic - not syncing: Fatal exception: panic_on_oops or: Unable to handle kernel pointer dereference in virtual kernel address space Failing address: 6b6b6b6b6b6b6000 TEID: 6b6b6b6b6b6b6803 Fault in home space mode while using kernel ASCE. AS:0000000063b10007 R3:0000000000000024 Oops: 0038 ilc:3 [#1] SMP Modules linked in: ... CPU: 10 PID: 0 Comm: swapper/10 Kdump: loaded Hardware name: ... Krnl PSW : 0404d00180000000 000003ff7febaf8e (zfcp_fsf_reqid_check+0x86/0x158 [zfcp]) R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:3 CC:1 PM:0 RI:0 EA:3 Krnl GPRS: 5a6f1cfa89c49ac3 00000000aff2c4c8 6b6b6b6b6b6b6b6b 00000000000002a8 0000000000000000 0000000000000055 0000000000000000 00000000a8515800 0700000000000000 00000000a6e14500 00000000aff2c000 000000008003c44c 000000008093c700 0000000000000010 00000380009ebba8 00000380009ebb48 Krnl Code: 000003ff7febaf7e: a7f4003d brc 15,000003ff7febaff8 000003ff7febaf82: e32020000004 lg %r2,0(%r2) #000003ff7febaf88: ec2100388064 cgrj %r2,%r1,8,000003ff7febaff8 >000003ff7febaf8e: e3b020100020 cg %r11,16(%r2) 000003ff7febaf94: a774fff7 brc 7,000003ff7febaf82 000003ff7febaf98: ec280030007c cgij %r2,0,8,000003ff7febaff8 000003ff7febaf9e: e31020080004 lg %r1,8(%r2) 000003ff7febafa4: e33020000004 lg %r3,0(%r2) Call Trace: [<000003ff7febaf8e>] zfcp_fsf_reqid_check+0x86/0x158 [zfcp] [<000003ff7febbdbc>] zfcp_qdio_int_resp+0x6c/0x170 [zfcp] [<000003ff7febbf90>] zfcp_qdio_irq_tasklet+0xd0/0x108 [zfcp] [<0000000061d90a04>] tasklet_action_common.constprop.0+0xdc/0x128 [<000000006292f300>] __do_softirq+0x130/0x3c0 [<0000000061d906c6>] irq_exit_rcu+0xfe/0x118 [<000000006291e818>] do_io_irq+0xc8/0x168 [<000000006292d516>] io_int_handler+0xd6/0x110 [<000000006292d596>] psw_idle_exit+0x0/0xa ([<0000000061d3be50>] arch_cpu_idle+0x40/0xd0) [<000000006292ceea>] default_idle_call+0x52/0xf8 [<0000000061de4fa4>] do_idle+0xd4/0x168 [<0000000061de51fe>] cpu_startup_entry+0x36/0x40 [<0000000061d4faac>] smp_start_secondary+0x12c/0x138 [<000000006292d88e>] restart_int_handler+0x6e/0x90 Last Breaking-Event-Address: [<000003ff7febaf94>] zfcp_fsf_reqid_check+0x8c/0x158 [zfcp] Kernel panic - not syncing: Fatal exception in interrupt or: Unable to handle kernel pointer dereference in virtual kernel address space Failing address: 523b05d3ae76a000 TEID: 523b05d3ae76a803 Fault in home space mode while using kernel ASCE. AS:0000000077c40007 R3:0000000000000024 Oops: 0038 ilc:3 [#1] SMP Modules linked in: ... CPU: 3 PID: 453 Comm: kworker/3:1H Kdump: loaded Hardware name: ... Workqueue: kblockd blk_mq_run_work_fn Krnl PSW : 0404d00180000000 0000000076fc0312 (__kmalloc+0xd2/0x398) R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:3 CC:1 PM:0 RI:0 EA:3 Krnl GPRS: ffffffffffffffff 523b05d3ae76abf6 0000000000000000 0000000000092a20 0000000000000002 00000007e49b5cc0 00000007eda8f000 0000000000092a20 00000007eda8f000 00000003b02856b9 00000000000000a8 523b05d3ae76abf6 00000007dd662000 00000007eda8f000 0000000076fc02b2 000003e0037637a0 Krnl Code: 0000000076fc0302: c004000000d4 brcl 0,76fc04aa 0000000076fc0308: b904001b lgr %r1,%r11 #0000000076fc030c: e3106020001a algf %r1,32(%r6) >0000000076fc0312: e31010000082 xg %r1,0(%r1) 0000000076fc0318: b9040001 lgr %r0,%r1 0000000076fc031c: e30061700082 xg %r0,368(%r6) 0000000076fc0322: ec59000100d9 aghik %r5,%r9,1 0000000076fc0328: e34003b80004 lg %r4,952 Call Trace: [<0000000076fc0312>] __kmalloc+0xd2/0x398 [<0000000076f318f2>] mempool_alloc+0x72/0x1f8 [<000003ff8027c5f8>] zfcp_fsf_req_create.isra.7+0x40/0x268 [zfcp] [<000003ff8027f1bc>] zfcp_fsf_fcp_cmnd+0xac/0x3f0 [zfcp] [<000003ff80280f1a>] zfcp_scsi_queuecommand+0x122/0x1d0 [zfcp] [<000003ff800b4218>] scsi_queue_rq+0x778/0xa10 [scsi_mod] [<00000000771782a0>] __blk_mq_try_issue_directly+0x130/0x208 [<000000007717a124>] blk_mq_request_issue_directly+0x4c/0xa8 [<000003ff801302e2>] dm_mq_queue_rq+0x2ea/0x468 [dm_mod] [<0000000077178c12>] blk_mq_dispatch_rq_list+0x33a/0x818 [<000000007717f064>] __blk_mq_do_dispatch_sched+0x284/0x2f0 [<000000007717f44c>] __blk_mq_sched_dispatch_requests+0x1c4/0x218 [<000000007717fa7a>] blk_mq_sched_dispatch_requests+0x52/0x90 [<0000000077176d74>] __blk_mq_run_hw_queue+0x9c/0xc0 [<0000000076da6d74>] process_one_work+0x274/0x4d0 [<0000000076da7018>] worker_thread+0x48/0x560 [<0000000076daef18>] kthread+0x140/0x160 [<000000007751d144>] ret_from_fork+0x28/0x30 Last Breaking-Event-Address: [<0000000076fc0474>] __kmalloc+0x234/0x398 Kernel panic - not syncing: Fatal exception: panic_on_oops To fix this, simply change the type of the cache variable to 'unsigned long', like the rest of zfcp and also the argument for 'zfcp_reqlist_find_rm()'. This prevents truncation and wrong sign extension and so can successfully remove the request from the hash table. Fixes: e60a6d69f1f8 ("[SCSI] zfcp: Remove function zfcp_reqlist_find_safe") Cc: #v2.6.34+ Signed-off-by: Benjamin Block Link: https://lore.kernel.org/r/979f6e6019d15f91ba56182f1aaf68d61bf37fc6.1668595505.git.bblock@linux.ibm.com Reviewed-by: Steffen Maier Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/s390/scsi/zfcp_fsf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index 8401c42db541..524947bf21b9 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -866,7 +866,7 @@ static int zfcp_fsf_req_send(struct zfcp_fsf_req *req) const bool is_srb = zfcp_fsf_req_is_status_read_buffer(req); struct zfcp_adapter *adapter = req->adapter; struct zfcp_qdio *qdio = adapter->qdio; - int req_id = req->req_id; + unsigned long req_id = req->req_id; zfcp_reqlist_add(adapter->req_list, req); From deda86a0d84d7cf83cee0b3932bfbbb8c0d7b401 Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Wed, 16 Nov 2022 13:15:44 +0800 Subject: [PATCH 0898/1477] iommu/vt-d: Set SRE bit only when hardware has SRS cap commit 7fc961cf7ffcb130c4e93ee9a5628134f9de700a upstream. SRS cap is the hardware cap telling if the hardware IOMMU can support requests seeking supervisor privilege or not. SRE bit in scalable-mode PASID table entry is treated as Reserved(0) for implementation not supporting SRS cap. Checking SRS cap before setting SRE bit can avoid the non-recoverable fault of "Non-zero reserved field set in PASID Table Entry" caused by setting SRE bit while there is no SRS cap support. The fault messages look like below: DMAR: DRHD: handling fault status reg 2 DMAR: [DMA Read NO_PASID] Request device [00:0d.0] fault addr 0x1154e1000 [fault reason 0x5a] SM: Non-zero reserved field set in PASID Table Entry Fixes: 6f7db75e1c46 ("iommu/vt-d: Add second level page table interface") Cc: stable@vger.kernel.org Signed-off-by: Tina Zhang Link: https://lore.kernel.org/r/20221115070346.1112273-1-tina.zhang@intel.com Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20221116051544.26540-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/intel/pasid.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index fb911b6c418f..86fd49ae7f61 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -669,7 +669,7 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu, * Since it is a second level only translation setup, we should * set SRE bit as well (addresses are expected to be GPAs). */ - if (pasid != PASID_RID2PASID) + if (pasid != PASID_RID2PASID && ecap_srs(iommu->ecap)) pasid_set_sre(pte); pasid_set_present(pte); pasid_flush_caches(iommu, pte, pasid, did); @@ -704,7 +704,8 @@ int intel_pasid_setup_pass_through(struct intel_iommu *iommu, * We should set SRE bit as well since the addresses are expected * to be GPAs. */ - pasid_set_sre(pte); + if (ecap_srs(iommu->ecap)) + pasid_set_sre(pte); pasid_set_present(pte); pasid_flush_caches(iommu, pte, pasid, did); From 05b0f6624dda6c9106023aca6779c56908599d69 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 19 Oct 2022 18:10:53 -0700 Subject: [PATCH 0899/1477] firmware: coreboot: Register bus in module init commit 65946690ed8d972fdb91a74ee75ac0f0f0d68321 upstream. The coreboot_table driver registers a coreboot bus while probing a "coreboot_table" device representing the coreboot table memory region. Probing this device (i.e., registering the bus) is a dependency for the module_init() functions of any driver for this bus (e.g., memconsole-coreboot.c / memconsole_driver_init()). With synchronous probe, this dependency works OK, as the link order in the Makefile ensures coreboot_table_driver_init() (and thus, coreboot_table_probe()) completes before a coreboot device driver tries to add itself to the bus. With asynchronous probe, however, coreboot_table_probe() may race with memconsole_driver_init(), and so we're liable to hit one of these two: 1. coreboot_driver_register() eventually hits "[...] the bus was not initialized.", and the memconsole driver fails to register; or 2. coreboot_driver_register() gets past #1, but still races with bus_register() and hits some other undefined/crashing behavior (e.g., in driver_find() [1]) We can resolve this by registering the bus in our initcall, and only deferring "device" work (scanning the coreboot memory region and creating sub-devices) to probe(). [1] Example failure, using 'driver_async_probe=*' kernel command line: [ 0.114217] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000010 ... [ 0.114307] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 6.1.0-rc1 #63 [ 0.114316] Hardware name: Google Scarlet (DT) ... [ 0.114488] Call trace: [ 0.114494] _raw_spin_lock+0x34/0x60 [ 0.114502] kset_find_obj+0x28/0x84 [ 0.114511] driver_find+0x30/0x50 [ 0.114520] driver_register+0x64/0x10c [ 0.114528] coreboot_driver_register+0x30/0x3c [ 0.114540] memconsole_driver_init+0x24/0x30 [ 0.114550] do_one_initcall+0x154/0x2e0 [ 0.114560] do_initcall_level+0x134/0x160 [ 0.114571] do_initcalls+0x60/0xa0 [ 0.114579] do_basic_setup+0x28/0x34 [ 0.114588] kernel_init_freeable+0xf8/0x150 [ 0.114596] kernel_init+0x2c/0x12c [ 0.114607] ret_from_fork+0x10/0x20 [ 0.114624] Code: 5280002b 1100054a b900092a f9800011 (885ffc01) [ 0.114631] ---[ end trace 0000000000000000 ]--- Fixes: b81e3140e412 ("firmware: coreboot: Make bus registration symmetric") Cc: Signed-off-by: Brian Norris Reviewed-by: Guenter Roeck Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/20221019180934.1.If29e167d8a4771b0bf4a39c89c6946ed764817b9@changeid Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/google/coreboot_table.c | 37 +++++++++++++++++++----- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/drivers/firmware/google/coreboot_table.c b/drivers/firmware/google/coreboot_table.c index 0205987a4fd4..568074148f62 100644 --- a/drivers/firmware/google/coreboot_table.c +++ b/drivers/firmware/google/coreboot_table.c @@ -152,12 +152,8 @@ static int coreboot_table_probe(struct platform_device *pdev) if (!ptr) return -ENOMEM; - ret = bus_register(&coreboot_bus_type); - if (!ret) { - ret = coreboot_table_populate(dev, ptr); - if (ret) - bus_unregister(&coreboot_bus_type); - } + ret = coreboot_table_populate(dev, ptr); + memunmap(ptr); return ret; @@ -172,7 +168,6 @@ static int __cb_dev_unregister(struct device *dev, void *dummy) static int coreboot_table_remove(struct platform_device *pdev) { bus_for_each_dev(&coreboot_bus_type, NULL, NULL, __cb_dev_unregister); - bus_unregister(&coreboot_bus_type); return 0; } @@ -202,6 +197,32 @@ static struct platform_driver coreboot_table_driver = { .of_match_table = of_match_ptr(coreboot_of_match), }, }; -module_platform_driver(coreboot_table_driver); + +static int __init coreboot_table_driver_init(void) +{ + int ret; + + ret = bus_register(&coreboot_bus_type); + if (ret) + return ret; + + ret = platform_driver_register(&coreboot_table_driver); + if (ret) { + bus_unregister(&coreboot_bus_type); + return ret; + } + + return 0; +} + +static void __exit coreboot_table_driver_exit(void) +{ + platform_driver_unregister(&coreboot_table_driver); + bus_unregister(&coreboot_bus_type); +} + +module_init(coreboot_table_driver_init); +module_exit(coreboot_table_driver_exit); + MODULE_AUTHOR("Google, Inc."); MODULE_LICENSE("GPL"); From 8e70b141317826c5c1f377b082497a83bfe9094e Mon Sep 17 00:00:00 2001 From: Yann Gautier Date: Fri, 28 Oct 2022 09:37:40 +0200 Subject: [PATCH 0900/1477] mmc: core: properly select voltage range without power cycle commit 39a72dbfe188291b156dd6523511e3d5761ce775 upstream. In mmc_select_voltage(), if there is no full power cycle, the voltage range selected at the end of the function will be on a single range (e.g. 3.3V/3.4V). To keep a range around the selected voltage (3.2V/3.4V), the mask shift should be reduced by 1. This issue was triggered by using a specific SD-card (Verbatim Premium 16GB UHS-1) on an STM32MP157C-DK2 board. This board cannot do UHS modes and there is no power cycle. And the card was failing to switch to high-speed mode. When adding the range 3.2V/3.3V for this card with the proposed shift change, the card can switch to high-speed mode. Fixes: ce69d37b7d8f ("mmc: core: Prevent violation of specs while initializing cards") Signed-off-by: Yann Gautier Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20221028073740.7259-1-yann.gautier@foss.st.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/core.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index eb82f6aac951..7d9ec91e081b 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -1128,7 +1128,13 @@ u32 mmc_select_voltage(struct mmc_host *host, u32 ocr) mmc_power_cycle(host, ocr); } else { bit = fls(ocr) - 1; - ocr &= 3 << bit; + /* + * The bit variable represents the highest voltage bit set in + * the OCR register. + * To keep a range of 2 values (e.g. 3.2V/3.3V and 3.3V/3.4V), + * we must shift the mask '3' with (bit - 1). + */ + ocr &= 3 << (bit - 1); if (bit != host->ios.vdd) dev_warn(mmc_dev(host), "exceeding card's volts\n"); } From 440653a180f53cbc57e1895c8c14ebb349cf5fd8 Mon Sep 17 00:00:00 2001 From: Chevron Li Date: Fri, 4 Nov 2022 02:55:12 -0700 Subject: [PATCH 0901/1477] mmc: sdhci-pci-o2micro: fix card detect fail issue caused by CD# debounce timeout commit 096cc0cddf58232bded309336961784f1d1c85f8 upstream. The SD card is recognized failed sometimes when resume from suspend. Because CD# debounce time too long then card present report wrong. Finally, card is recognized failed. Signed-off-by: Chevron Li Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20221104095512.4068-1-chevron.li@bayhubtech.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-pci-o2micro.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/mmc/host/sdhci-pci-o2micro.c b/drivers/mmc/host/sdhci-pci-o2micro.c index 8c357e3b78d7..72234790a310 100644 --- a/drivers/mmc/host/sdhci-pci-o2micro.c +++ b/drivers/mmc/host/sdhci-pci-o2micro.c @@ -31,6 +31,7 @@ #define O2_SD_CAPS 0xE0 #define O2_SD_ADMA1 0xE2 #define O2_SD_ADMA2 0xE7 +#define O2_SD_MISC_CTRL2 0xF0 #define O2_SD_INF_MOD 0xF1 #define O2_SD_MISC_CTRL4 0xFC #define O2_SD_MISC_CTRL 0x1C0 @@ -822,6 +823,12 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip) /* Set Tuning Windows to 5 */ pci_write_config_byte(chip->pdev, O2_SD_TUNING_CTRL, 0x55); + //Adjust 1st and 2nd CD debounce time + pci_read_config_dword(chip->pdev, O2_SD_MISC_CTRL2, &scratch_32); + scratch_32 &= 0xFFE7FFFF; + scratch_32 |= 0x00180000; + pci_write_config_dword(chip->pdev, O2_SD_MISC_CTRL2, scratch_32); + pci_write_config_dword(chip->pdev, O2_SD_DETECT_SETTING, 1); /* Lock WP */ ret = pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch); From 4423866d31a06a810db22062ed13389416a66b22 Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Mon, 14 Nov 2022 16:31:00 +0800 Subject: [PATCH 0902/1477] mmc: sdhci-pci: Fix possible memory leak caused by missing pci_dev_put() commit 222cfa0118aa68687ace74aab8fdf77ce8fbd7e6 upstream. pci_get_device() will increase the reference count for the returned pci_dev. We need to use pci_dev_put() to decrease the reference count before amd_probe() returns. There is no problem for the 'smbus_dev == NULL' branch because pci_dev_put() can also handle the NULL input parameter case. Fixes: 659c9bc114a8 ("mmc: sdhci-pci: Build o2micro support in the same module") Signed-off-by: Xiongfeng Wang Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20221114083100.149200-1-wangxiongfeng2@huawei.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-pci-core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c index 7eb9a62ee074..8b02fe3916d1 100644 --- a/drivers/mmc/host/sdhci-pci-core.c +++ b/drivers/mmc/host/sdhci-pci-core.c @@ -1799,6 +1799,8 @@ static int amd_probe(struct sdhci_pci_chip *chip) } } + pci_dev_put(smbus_dev); + if (gen == AMD_CHIPSET_BEFORE_ML || gen == AMD_CHIPSET_CZ) chip->quirks2 |= SDHCI_QUIRK2_CLEAR_TRANSFERMODE_REG_BEFORE_CMD; From c1eb46a65b09a66c93219ca778376cecc04333f4 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Tue, 11 Oct 2022 11:14:17 -0600 Subject: [PATCH 0903/1477] docs: update mediator contact information in CoC doc commit 5fddf8962b429b8303c4a654291ecb6e61a7d747 upstream. Update mediator contact information in CoC interpretation document. Cc: Signed-off-by: Shuah Khan Link: https://lore.kernel.org/r/20221011171417.34286-1-skhan@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- Documentation/process/code-of-conduct-interpretation.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/process/code-of-conduct-interpretation.rst b/Documentation/process/code-of-conduct-interpretation.rst index 4f8a06b00f60..43da2cc2e3b9 100644 --- a/Documentation/process/code-of-conduct-interpretation.rst +++ b/Documentation/process/code-of-conduct-interpretation.rst @@ -51,7 +51,7 @@ the Technical Advisory Board (TAB) or other maintainers if you're uncertain how to handle situations that come up. It will not be considered a violation report unless you want it to be. If you are uncertain about approaching the TAB or any other maintainers, please -reach out to our conflict mediator, Joanna Lee . +reach out to our conflict mediator, Joanna Lee . In the end, "be kind to each other" is really what the end goal is for everybody. We know everyone is human and we all fail at times, but the From 62634b43d3c4e1bf62fd540196f7081bf0885c0a Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Fri, 4 Nov 2022 18:58:49 +0100 Subject: [PATCH 0904/1477] misc/vmw_vmci: fix an infoleak in vmci_host_do_receive_datagram() commit e5b0d06d9b10f5f43101bd6598b076c347f9295f upstream. `struct vmci_event_qp` allocated by qp_notify_peer() contains padding, which may carry uninitialized data to the userspace, as observed by KMSAN: BUG: KMSAN: kernel-infoleak in instrument_copy_to_user ./include/linux/instrumented.h:121 instrument_copy_to_user ./include/linux/instrumented.h:121 _copy_to_user+0x5f/0xb0 lib/usercopy.c:33 copy_to_user ./include/linux/uaccess.h:169 vmci_host_do_receive_datagram drivers/misc/vmw_vmci/vmci_host.c:431 vmci_host_unlocked_ioctl+0x33d/0x43d0 drivers/misc/vmw_vmci/vmci_host.c:925 vfs_ioctl fs/ioctl.c:51 ... Uninit was stored to memory at: kmemdup+0x74/0xb0 mm/util.c:131 dg_dispatch_as_host drivers/misc/vmw_vmci/vmci_datagram.c:271 vmci_datagram_dispatch+0x4f8/0xfc0 drivers/misc/vmw_vmci/vmci_datagram.c:339 qp_notify_peer+0x19a/0x290 drivers/misc/vmw_vmci/vmci_queue_pair.c:1479 qp_broker_attach drivers/misc/vmw_vmci/vmci_queue_pair.c:1662 qp_broker_alloc+0x2977/0x2f30 drivers/misc/vmw_vmci/vmci_queue_pair.c:1750 vmci_qp_broker_alloc+0x96/0xd0 drivers/misc/vmw_vmci/vmci_queue_pair.c:1940 vmci_host_do_alloc_queuepair drivers/misc/vmw_vmci/vmci_host.c:488 vmci_host_unlocked_ioctl+0x24fd/0x43d0 drivers/misc/vmw_vmci/vmci_host.c:927 ... Local variable ev created at: qp_notify_peer+0x54/0x290 drivers/misc/vmw_vmci/vmci_queue_pair.c:1456 qp_broker_attach drivers/misc/vmw_vmci/vmci_queue_pair.c:1662 qp_broker_alloc+0x2977/0x2f30 drivers/misc/vmw_vmci/vmci_queue_pair.c:1750 Bytes 28-31 of 48 are uninitialized Memory access of size 48 starts at ffff888035155e00 Data copied to user address 0000000020000100 Use memset() to prevent the infoleaks. Also speculatively fix qp_notify_peer_local(), which may suffer from the same problem. Reported-by: syzbot+39be4da489ed2493ba25@syzkaller.appspotmail.com Cc: stable Fixes: 06164d2b72aa ("VMCI: queue pairs implementation.") Signed-off-by: Alexander Potapenko Reviewed-by: Vishnu Dasa Link: https://lore.kernel.org/r/20221104175849.2782567-1-glider@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/vmw_vmci/vmci_queue_pair.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.c b/drivers/misc/vmw_vmci/vmci_queue_pair.c index a49782dd903c..d4d388f021cc 100644 --- a/drivers/misc/vmw_vmci/vmci_queue_pair.c +++ b/drivers/misc/vmw_vmci/vmci_queue_pair.c @@ -852,6 +852,7 @@ static int qp_notify_peer_local(bool attach, struct vmci_handle handle) u32 context_id = vmci_get_context_id(); struct vmci_event_qp ev; + memset(&ev, 0, sizeof(ev)); ev.msg.hdr.dst = vmci_make_handle(context_id, VMCI_EVENT_HANDLER); ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID, VMCI_CONTEXT_RESOURCE_ID); @@ -1465,6 +1466,7 @@ static int qp_notify_peer(bool attach, * kernel. */ + memset(&ev, 0, sizeof(ev)); ev.msg.hdr.dst = vmci_make_handle(peer_id, VMCI_EVENT_HANDLER); ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID, VMCI_CONTEXT_RESOURCE_ID); From 5e2f14d77223ab7c0bae83f8f2ab3bde6a2bb028 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Sat, 12 Nov 2022 17:15:08 +0200 Subject: [PATCH 0905/1477] perf/x86/intel/pt: Fix sampling using single range output commit ce0d998be9274dd3a3d971cbeaa6fe28fd2c3062 upstream. Deal with errata TGL052, ADL037 and RPL017 "Trace May Contain Incorrect Data When Configured With Single Range Output Larger Than 4KB" by disabling single range output whenever larger than 4KB. Fixes: 670638477aed ("perf/x86/intel/pt: Opportunistically use single range output mode") Signed-off-by: Adrian Hunter Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20221112151508.13768-1-adrian.hunter@intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/pt.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 95234f46b0fb..d87421acddc3 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -1247,6 +1247,15 @@ static int pt_buffer_try_single(struct pt_buffer *buf, int nr_pages) if (1 << order != nr_pages) goto out; + /* + * Some processors cannot always support single range for more than + * 4KB - refer errata TGL052, ADL037 and RPL017. Future processors might + * also be affected, so for now rather than trying to keep track of + * which ones, just disable it for all. + */ + if (nr_pages > 1) + goto out; + buf->single = true; buf->nr_pages = nr_pages; ret = 0; From b9a5ecf24180f36bab967b4b1dbb112a0fa37255 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 22 Sep 2022 07:54:06 -0700 Subject: [PATCH 0906/1477] nvme: restrict management ioctls to admin commit 23e085b2dead13b51fe86d27069895b740f749c0 upstream. The passthrough commands already have this restriction, but the other operations do not. Require the same capabilities for all users as all of these operations, which include resets and rescans, can be disruptive. Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig Signed-off-by: Ovidiu Panait Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/host/core.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 3f106771d15b..d9c78fe85cb3 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3330,11 +3330,17 @@ static long nvme_dev_ioctl(struct file *file, unsigned int cmd, case NVME_IOCTL_IO_CMD: return nvme_dev_user_cmd(ctrl, argp); case NVME_IOCTL_RESET: + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; dev_warn(ctrl->device, "resetting controller\n"); return nvme_reset_ctrl_sync(ctrl); case NVME_IOCTL_SUBSYS_RESET: + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; return nvme_reset_subsystem(ctrl); case NVME_IOCTL_RESCAN: + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; nvme_queue_scan(ctrl); return 0; default: From 023435a095d22bcbbaeea7e3a8c534b5c57d0d82 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 22 Sep 2022 08:13:47 -0700 Subject: [PATCH 0907/1477] nvme: ensure subsystem reset is single threaded commit 1e866afd4bcdd01a70a5eddb4371158d3035ce03 upstream. The subsystem reset writes to a register, so we have to ensure the device state is capable of handling that otherwise the driver may access unmapped registers. Use the state machine to ensure the subsystem reset doesn't try to write registers on a device already undergoing this type of reset. Link: https://bugzilla.kernel.org/show_bug.cgi?id=214771 Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig Signed-off-by: Ovidiu Panait Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/host/nvme.h | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index abae7ef2ac51..86336496c65c 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -544,11 +544,23 @@ static inline void nvme_fault_inject_fini(struct nvme_fault_inject *fault_inj) static inline void nvme_should_fail(struct request *req) {} #endif +bool nvme_wait_reset(struct nvme_ctrl *ctrl); +int nvme_try_sched_reset(struct nvme_ctrl *ctrl); + static inline int nvme_reset_subsystem(struct nvme_ctrl *ctrl) { + int ret; + if (!ctrl->subsystem) return -ENOTTY; - return ctrl->ops->reg_write32(ctrl, NVME_REG_NSSR, 0x4E564D65); + if (!nvme_wait_reset(ctrl)) + return -EBUSY; + + ret = ctrl->ops->reg_write32(ctrl, NVME_REG_NSSR, 0x4E564D65); + if (ret) + return ret; + + return nvme_try_sched_reset(ctrl); } /* @@ -635,7 +647,6 @@ void nvme_cancel_tagset(struct nvme_ctrl *ctrl); void nvme_cancel_admin_tagset(struct nvme_ctrl *ctrl); bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, enum nvme_ctrl_state new_state); -bool nvme_wait_reset(struct nvme_ctrl *ctrl); int nvme_disable_ctrl(struct nvme_ctrl *ctrl); int nvme_enable_ctrl(struct nvme_ctrl *ctrl); int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl); @@ -688,7 +699,6 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count); void nvme_stop_keep_alive(struct nvme_ctrl *ctrl); int nvme_reset_ctrl(struct nvme_ctrl *ctrl); int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl); -int nvme_try_sched_reset(struct nvme_ctrl *ctrl); int nvme_delete_ctrl(struct nvme_ctrl *ctrl); int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, u8 csi, From 36b5095b07ac3256203694f062f096f5435854f7 Mon Sep 17 00:00:00 2001 From: "Gong, Sishuai" Date: Tue, 27 Apr 2021 15:04:24 +0000 Subject: [PATCH 0908/1477] net: fix a concurrency bug in l2tp_tunnel_register() [ Upstream commit 69e16d01d1de4f1249869de342915f608feb55d5 ] l2tp_tunnel_register() registers a tunnel without fully initializing its attribute. This can allow another kernel thread running l2tp_xmit_core() to access the uninitialized data and then cause a kernel NULL pointer dereference error, as shown below. Thread 1 Thread 2 //l2tp_tunnel_register() list_add_rcu(&tunnel->list, &pn->l2tp_tunnel_list); //pppol2tp_connect() tunnel = l2tp_tunnel_get(sock_net(sk), info.tunnel_id); // Fetch the new tunnel ... //l2tp_xmit_core() struct sock *sk = tunnel->sock; ... bh_lock_sock(sk); //Null pointer error happens tunnel->sock = sk; Fix this bug by initializing tunnel->sock before adding the tunnel into l2tp_tunnel_list. Reviewed-by: Cong Wang Signed-off-by: Sishuai Gong Reported-by: Sishuai Gong Signed-off-by: David S. Miller Stable-dep-of: b68777d54fac ("l2tp: Serialize access to sk_user_data with sk_callback_lock") Signed-off-by: Sasha Levin --- net/l2tp/l2tp_core.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 561b6d67ab8b..dc8987ed08ad 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1480,11 +1480,15 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, tunnel->l2tp_net = net; pn = l2tp_pernet(net); + sk = sock->sk; + sock_hold(sk); + tunnel->sock = sk; + spin_lock_bh(&pn->l2tp_tunnel_list_lock); list_for_each_entry(tunnel_walk, &pn->l2tp_tunnel_list, list) { if (tunnel_walk->tunnel_id == tunnel->tunnel_id) { spin_unlock_bh(&pn->l2tp_tunnel_list_lock); - + sock_put(sk); ret = -EEXIST; goto err_sock; } @@ -1492,10 +1496,6 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, list_add_rcu(&tunnel->list, &pn->l2tp_tunnel_list); spin_unlock_bh(&pn->l2tp_tunnel_list_lock); - sk = sock->sk; - sock_hold(sk); - tunnel->sock = sk; - if (tunnel->encap == L2TP_ENCAPTYPE_UDP) { struct udp_tunnel_sock_cfg udp_cfg = { .sk_user_data = tunnel, From 8208c266fe279d91751224089557cf8e5bd628cc Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Fri, 21 Oct 2022 12:30:13 -0400 Subject: [PATCH 0909/1477] ring-buffer: Include dropped pages in counting dirty patches [ Upstream commit 31029a8b2c7e656a0289194ef16415050ae4c4ac ] The function ring_buffer_nr_dirty_pages() was created to find out how many pages are filled in the ring buffer. There's two running counters. One is incremented whenever a new page is touched (pages_touched) and the other is whenever a page is read (pages_read). The dirty count is the number touched minus the number read. This is used to determine if a blocked task should be woken up if the percentage of the ring buffer it is waiting for is hit. The problem is that it does not take into account dropped pages (when the new writes overwrite pages that were not read). And then the dirty pages will always be greater than the percentage. This makes the "buffer_percent" file inaccurate, as the number of dirty pages end up always being larger than the percentage, event when it's not and this causes user space to be woken up more than it wants to be. Add a new counter to keep track of lost pages, and include that in the accounting of dirty pages so that it is actually accurate. Link: https://lkml.kernel.org/r/20221021123013.55fb6055@gandalf.local.home Fixes: 2c2b0a78b3739 ("ring-buffer: Add percentage of ring buffer full to wake up reader") Signed-off-by: Steven Rostedt (Google) Signed-off-by: Sasha Levin --- kernel/trace/ring_buffer.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 9a1f82cafad4..49ebb8c66268 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -517,6 +517,7 @@ struct ring_buffer_per_cpu { local_t committing; local_t commits; local_t pages_touched; + local_t pages_lost; local_t pages_read; long last_pages_touch; size_t shortest_full; @@ -771,10 +772,18 @@ size_t ring_buffer_nr_pages(struct trace_buffer *buffer, int cpu) size_t ring_buffer_nr_dirty_pages(struct trace_buffer *buffer, int cpu) { size_t read; + size_t lost; size_t cnt; read = local_read(&buffer->buffers[cpu]->pages_read); + lost = local_read(&buffer->buffers[cpu]->pages_lost); cnt = local_read(&buffer->buffers[cpu]->pages_touched); + + if (WARN_ON_ONCE(cnt < lost)) + return 0; + + cnt -= lost; + /* The reader can read an empty page, but not more than that */ if (cnt < read) { WARN_ON_ONCE(read > cnt + 1); @@ -1897,6 +1906,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages) */ local_add(page_entries, &cpu_buffer->overrun); local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes); + local_inc(&cpu_buffer->pages_lost); } /* @@ -2387,6 +2397,7 @@ rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer, */ local_add(entries, &cpu_buffer->overrun); local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes); + local_inc(&cpu_buffer->pages_lost); /* * The entries will be zeroed out when we move the @@ -4981,6 +4992,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) local_set(&cpu_buffer->committing, 0); local_set(&cpu_buffer->commits, 0); local_set(&cpu_buffer->pages_touched, 0); + local_set(&cpu_buffer->pages_lost, 0); local_set(&cpu_buffer->pages_read, 0); cpu_buffer->last_pages_touch = 0; cpu_buffer->shortest_full = 0; From 47c3bdd95505bc28c264ce1e78b985cdb05cc15f Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Fri, 1 Jul 2022 22:47:51 +0200 Subject: [PATCH 0910/1477] usbnet: smsc95xx: Fix deadlock on runtime resume [ Upstream commit 7b960c967f2aa01ab8f45c5a0bd78e754cffdeee ] Commit 05b35e7eb9a1 ("smsc95xx: add phylib support") amended smsc95xx_resume() to call phy_init_hw(). That function waits for the device to runtime resume even though it is placed in the runtime resume path, causing a deadlock. The problem is that phy_init_hw() calls down to smsc95xx_mdiobus_read(), which never uses the _nopm variant of usbnet_read_cmd(). Commit b4df480f68ae ("usbnet: smsc95xx: add reset_resume function with reset operation") causes a similar deadlock on resume if the device was already runtime suspended when entering system sleep: That's because the commit introduced smsc95xx_reset_resume(), which calls down to smsc95xx_reset(), which neglects to use _nopm accessors. Fix by auto-detecting whether a device access is performed by the suspend/resume task_struct and use the _nopm variant if so. This works because the PM core guarantees that suspend/resume callbacks are run in task context. Stacktrace for posterity: INFO: task kworker/2:1:49 blocked for more than 122 seconds. Workqueue: usb_hub_wq hub_event schedule rpm_resume __pm_runtime_resume usb_autopm_get_interface usbnet_read_cmd __smsc95xx_read_reg __smsc95xx_phy_wait_not_busy __smsc95xx_mdio_read smsc95xx_mdiobus_read __mdiobus_read mdiobus_read smsc_phy_reset phy_init_hw smsc95xx_resume usb_resume_interface usb_resume_both usb_runtime_resume __rpm_callback rpm_callback rpm_resume __pm_runtime_resume usb_autoresume_device hub_event process_one_work Fixes: b4df480f68ae ("usbnet: smsc95xx: add reset_resume function with reset operation") Signed-off-by: Lukas Wunner Cc: stable@vger.kernel.org # v3.16+ Cc: Andre Edich Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/smsc95xx.c | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index 65d42f5d42a3..e1cd4c2de2d3 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -61,6 +61,7 @@ struct smsc95xx_priv { u8 suspend_flags; struct mii_bus *mdiobus; struct phy_device *phydev; + struct task_struct *pm_task; }; static bool turbo_mode = true; @@ -70,13 +71,14 @@ MODULE_PARM_DESC(turbo_mode, "Enable multiple frames per Rx transaction"); static int __must_check __smsc95xx_read_reg(struct usbnet *dev, u32 index, u32 *data, int in_pm) { + struct smsc95xx_priv *pdata = dev->driver_priv; u32 buf; int ret; int (*fn)(struct usbnet *, u8, u8, u16, u16, void *, u16); BUG_ON(!dev); - if (!in_pm) + if (current != pdata->pm_task) fn = usbnet_read_cmd; else fn = usbnet_read_cmd_nopm; @@ -100,13 +102,14 @@ static int __must_check __smsc95xx_read_reg(struct usbnet *dev, u32 index, static int __must_check __smsc95xx_write_reg(struct usbnet *dev, u32 index, u32 data, int in_pm) { + struct smsc95xx_priv *pdata = dev->driver_priv; u32 buf; int ret; int (*fn)(struct usbnet *, u8, u8, u16, u16, const void *, u16); BUG_ON(!dev); - if (!in_pm) + if (current != pdata->pm_task) fn = usbnet_write_cmd; else fn = usbnet_write_cmd_nopm; @@ -1468,9 +1471,12 @@ static int smsc95xx_suspend(struct usb_interface *intf, pm_message_t message) u32 val, link_up; int ret; + pdata->pm_task = current; + ret = usbnet_suspend(intf, message); if (ret < 0) { netdev_warn(dev->net, "usbnet_suspend error\n"); + pdata->pm_task = NULL; return ret; } @@ -1717,6 +1723,7 @@ done: if (ret && PMSG_IS_AUTO(message)) usbnet_resume(intf); + pdata->pm_task = NULL; return ret; } @@ -1737,29 +1744,31 @@ static int smsc95xx_resume(struct usb_interface *intf) /* do this first to ensure it's cleared even in error case */ pdata->suspend_flags = 0; + pdata->pm_task = current; + if (suspend_flags & SUSPEND_ALLMODES) { /* clear wake-up sources */ ret = smsc95xx_read_reg_nopm(dev, WUCSR, &val); if (ret < 0) - return ret; + goto done; val &= ~(WUCSR_WAKE_EN_ | WUCSR_MPEN_); ret = smsc95xx_write_reg_nopm(dev, WUCSR, val); if (ret < 0) - return ret; + goto done; /* clear wake-up status */ ret = smsc95xx_read_reg_nopm(dev, PM_CTRL, &val); if (ret < 0) - return ret; + goto done; val &= ~PM_CTL_WOL_EN_; val |= PM_CTL_WUPS_; ret = smsc95xx_write_reg_nopm(dev, PM_CTRL, val); if (ret < 0) - return ret; + goto done; } ret = usbnet_resume(intf); @@ -1767,15 +1776,21 @@ static int smsc95xx_resume(struct usb_interface *intf) netdev_warn(dev->net, "usbnet_resume error\n"); phy_init_hw(pdata->phydev); + +done: + pdata->pm_task = NULL; return ret; } static int smsc95xx_reset_resume(struct usb_interface *intf) { struct usbnet *dev = usb_get_intfdata(intf); + struct smsc95xx_priv *pdata = dev->driver_priv; int ret; + pdata->pm_task = current; ret = smsc95xx_reset(dev); + pdata->pm_task = NULL; if (ret < 0) return ret; From 9fd7bdaffe0e89833f4b1c1d3abd43023e951ec1 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 17 May 2021 20:01:15 -0700 Subject: [PATCH 0911/1477] stddef: Introduce struct_group() helper macro [ Upstream commit 50d7bd38c3aafc4749e05e8d7fcb616979143602 ] Kernel code has a regular need to describe groups of members within a structure usually when they need to be copied or initialized separately from the rest of the surrounding structure. The generally accepted design pattern in C is to use a named sub-struct: struct foo { int one; struct { int two; int three, four; } thing; int five; }; This would allow for traditional references and sizing: memcpy(&dst.thing, &src.thing, sizeof(dst.thing)); However, doing this would mean that referencing struct members enclosed by such named structs would always require including the sub-struct name in identifiers: do_something(dst.thing.three); This has tended to be quite inflexible, especially when such groupings need to be added to established code which causes huge naming churn. Three workarounds exist in the kernel for this problem, and each have other negative properties. To avoid the naming churn, there is a design pattern of adding macro aliases for the named struct: #define f_three thing.three This ends up polluting the global namespace, and makes it difficult to search for identifiers. Another common work-around in kernel code avoids the pollution by avoiding the named struct entirely, instead identifying the group's boundaries using either a pair of empty anonymous structs of a pair of zero-element arrays: struct foo { int one; struct { } start; int two; int three, four; struct { } finish; int five; }; struct foo { int one; int start[0]; int two; int three, four; int finish[0]; int five; }; This allows code to avoid needing to use a sub-struct named for member references within the surrounding structure, but loses the benefits of being able to actually use such a struct, making it rather fragile. Using these requires open-coded calculation of sizes and offsets. The efforts made to avoid common mistakes include lots of comments, or adding various BUILD_BUG_ON()s. Such code is left with no way for the compiler to reason about the boundaries (e.g. the "start" object looks like it's 0 bytes in length), making bounds checking depend on open-coded calculations: if (length > offsetof(struct foo, finish) - offsetof(struct foo, start)) return -EINVAL; memcpy(&dst.start, &src.start, offsetof(struct foo, finish) - offsetof(struct foo, start)); However, the vast majority of places in the kernel that operate on groups of members do so without any identification of the grouping, relying either on comments or implicit knowledge of the struct contents, which is even harder for the compiler to reason about, and results in even more fragile manual sizing, usually depending on member locations outside of the region (e.g. to copy "two" and "three", use the start of "four" to find the size): BUILD_BUG_ON((offsetof(struct foo, four) < offsetof(struct foo, two)) || (offsetof(struct foo, four) < offsetof(struct foo, three)); if (length > offsetof(struct foo, four) - offsetof(struct foo, two)) return -EINVAL; memcpy(&dst.two, &src.two, length); In order to have a regular programmatic way to describe a struct region that can be used for references and sizing, can be examined for bounds checking, avoids forcing the use of intermediate identifiers, and avoids polluting the global namespace, introduce the struct_group() macro. This macro wraps the member declarations to create an anonymous union of an anonymous struct (no intermediate name) and a named struct (for references and sizing): struct foo { int one; struct_group(thing, int two; int three, four; ); int five; }; if (length > sizeof(src.thing)) return -EINVAL; memcpy(&dst.thing, &src.thing, length); do_something(dst.three); There are some rare cases where the resulting struct_group() needs attributes added, so struct_group_attr() is also introduced to allow for specifying struct attributes (e.g. __align(x) or __packed). Additionally, there are places where such declarations would like to have the struct be tagged, so struct_group_tagged() is added. Given there is a need for a handful of UAPI uses too, the underlying __struct_group() macro has been defined in UAPI so it can be used there too. To avoid confusing scripts/kernel-doc, hide the macro from its struct parsing. Co-developed-by: Keith Packard Signed-off-by: Keith Packard Acked-by: Gustavo A. R. Silva Link: https://lore.kernel.org/lkml/20210728023217.GC35706@embeddedor Enhanced-by: Rasmus Villemoes Link: https://lore.kernel.org/lkml/41183a98-bdb9-4ad6-7eab-5a7292a6df84@rasmusvillemoes.dk Enhanced-by: Dan Williams Link: https://lore.kernel.org/lkml/1d9a2e6df2a9a35b2cdd50a9a68cac5991e7e5f0.camel@intel.com Enhanced-by: Daniel Vetter Link: https://lore.kernel.org/lkml/YQKa76A6XuFqgM03@phenom.ffwll.local Acked-by: Dan Williams Signed-off-by: Kees Cook Stable-dep-of: 58e0be1ef611 ("net: use struct_group to copy ip/ipv6 header addresses") Signed-off-by: Sasha Levin --- include/linux/stddef.h | 48 +++++++++++++++++++++++++++++++++++++ include/uapi/linux/stddef.h | 21 ++++++++++++++++ scripts/kernel-doc | 7 ++++++ 3 files changed, 76 insertions(+) diff --git a/include/linux/stddef.h b/include/linux/stddef.h index 998a4ba28eba..938216f8ab7e 100644 --- a/include/linux/stddef.h +++ b/include/linux/stddef.h @@ -36,4 +36,52 @@ enum { #define offsetofend(TYPE, MEMBER) \ (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER)) +/** + * struct_group() - Wrap a set of declarations in a mirrored struct + * + * @NAME: The identifier name of the mirrored sub-struct + * @MEMBERS: The member declarations for the mirrored structs + * + * Used to create an anonymous union of two structs with identical + * layout and size: one anonymous and one named. The former can be + * used normally without sub-struct naming, and the latter can be + * used to reason about the start, end, and size of the group of + * struct members. + */ +#define struct_group(NAME, MEMBERS...) \ + __struct_group(/* no tag */, NAME, /* no attrs */, MEMBERS) + +/** + * struct_group_attr() - Create a struct_group() with trailing attributes + * + * @NAME: The identifier name of the mirrored sub-struct + * @ATTRS: Any struct attributes to apply + * @MEMBERS: The member declarations for the mirrored structs + * + * Used to create an anonymous union of two structs with identical + * layout and size: one anonymous and one named. The former can be + * used normally without sub-struct naming, and the latter can be + * used to reason about the start, end, and size of the group of + * struct members. Includes structure attributes argument. + */ +#define struct_group_attr(NAME, ATTRS, MEMBERS...) \ + __struct_group(/* no tag */, NAME, ATTRS, MEMBERS) + +/** + * struct_group_tagged() - Create a struct_group with a reusable tag + * + * @TAG: The tag name for the named sub-struct + * @NAME: The identifier name of the mirrored sub-struct + * @MEMBERS: The member declarations for the mirrored structs + * + * Used to create an anonymous union of two structs with identical + * layout and size: one anonymous and one named. The former can be + * used normally without sub-struct naming, and the latter can be + * used to reason about the start, end, and size of the group of + * struct members. Includes struct tag argument for the named copy, + * so the specified layout can be reused later. + */ +#define struct_group_tagged(TAG, NAME, MEMBERS...) \ + __struct_group(TAG, NAME, /* no attrs */, MEMBERS) + #endif diff --git a/include/uapi/linux/stddef.h b/include/uapi/linux/stddef.h index ee8220f8dcf5..610204f7c275 100644 --- a/include/uapi/linux/stddef.h +++ b/include/uapi/linux/stddef.h @@ -4,3 +4,24 @@ #ifndef __always_inline #define __always_inline inline #endif + +/** + * __struct_group() - Create a mirrored named and anonyomous struct + * + * @TAG: The tag name for the named sub-struct (usually empty) + * @NAME: The identifier name of the mirrored sub-struct + * @ATTRS: Any struct attributes (usually empty) + * @MEMBERS: The member declarations for the mirrored structs + * + * Used to create an anonymous union of two structs with identical layout + * and size: one anonymous and one named. The former's members can be used + * normally without sub-struct naming, and the latter can be used to + * reason about the start, end, and size of the group of struct members. + * The named struct can also be explicitly tagged for layer reuse, as well + * as both having struct attributes appended. + */ +#define __struct_group(TAG, NAME, ATTRS, MEMBERS...) \ + union { \ + struct { MEMBERS } ATTRS; \ + struct TAG { MEMBERS } ATTRS NAME; \ + } diff --git a/scripts/kernel-doc b/scripts/kernel-doc index 6325bec3f66f..19af6dd160e6 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -1215,6 +1215,13 @@ sub dump_struct($$) { $members =~ s/\s*CRYPTO_MINALIGN_ATTR/ /gos; $members =~ s/\s*____cacheline_aligned_in_smp/ /gos; $members =~ s/\s*____cacheline_aligned/ /gos; + # unwrap struct_group(): + # - first eat non-declaration parameters and rewrite for final match + # - then remove macro, outer parens, and trailing semicolon + $members =~ s/\bstruct_group\s*\(([^,]*,)/STRUCT_GROUP(/gos; + $members =~ s/\bstruct_group_(attr|tagged)\s*\(([^,]*,){2}/STRUCT_GROUP(/gos; + $members =~ s/\b__struct_group\s*\(([^,]*,){3}/STRUCT_GROUP(/gos; + $members =~ s/\bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*;/$2/gos; # replace DECLARE_BITMAP $members =~ s/__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)/DECLARE_BITMAP($1, __ETHTOOL_LINK_MODE_MASK_NBITS)/gos; From 6e9334436d78d1cf12919807e4124032f02650d6 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Tue, 15 Nov 2022 22:24:00 +0800 Subject: [PATCH 0912/1477] net: use struct_group to copy ip/ipv6 header addresses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 58e0be1ef6118c5352b56a4d06e974c5599993a5 ] kernel test robot reported warnings when build bonding module with make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash drivers/net/bonding/: from ../drivers/net/bonding/bond_main.c:35: In function ‘fortify_memcpy_chk’, inlined from ‘iph_to_flow_copy_v4addrs’ at ../include/net/ip.h:566:2, inlined from ‘bond_flow_ip’ at ../drivers/net/bonding/bond_main.c:3984:3: ../include/linux/fortify-string.h:413:25: warning: call to ‘__read_overflow2_field’ declared with attribute warning: detected read beyond size of f ield (2nd parameter); maybe use struct_group()? [-Wattribute-warning] 413 | __read_overflow2_field(q_size_field, size); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In function ‘fortify_memcpy_chk’, inlined from ‘iph_to_flow_copy_v6addrs’ at ../include/net/ipv6.h:900:2, inlined from ‘bond_flow_ip’ at ../drivers/net/bonding/bond_main.c:3994:3: ../include/linux/fortify-string.h:413:25: warning: call to ‘__read_overflow2_field’ declared with attribute warning: detected read beyond size of f ield (2nd parameter); maybe use struct_group()? [-Wattribute-warning] 413 | __read_overflow2_field(q_size_field, size); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is because we try to copy the whole ip/ip6 address to the flow_key, while we only point the to ip/ip6 saddr. Note that since these are UAPI headers, __struct_group() is used to avoid the compiler warnings. Reported-by: kernel test robot Fixes: c3f8324188fa ("net: Add full IPv6 addresses to flow_keys") Signed-off-by: Hangbin Liu Link: https://lore.kernel.org/r/20221115142400.1204786-1-liuhangbin@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- include/net/ip.h | 2 +- include/net/ipv6.h | 2 +- include/uapi/linux/ip.h | 6 ++++-- include/uapi/linux/ipv6.h | 6 ++++-- 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index c5822d7824cd..4b775af57268 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -545,7 +545,7 @@ static inline void iph_to_flow_copy_v4addrs(struct flow_keys *flow, BUILD_BUG_ON(offsetof(typeof(flow->addrs), v4addrs.dst) != offsetof(typeof(flow->addrs), v4addrs.src) + sizeof(flow->addrs.v4addrs.src)); - memcpy(&flow->addrs.v4addrs, &iph->saddr, sizeof(flow->addrs.v4addrs)); + memcpy(&flow->addrs.v4addrs, &iph->addrs, sizeof(flow->addrs.v4addrs)); flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; } diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 60601896d474..89ce8a50f236 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -842,7 +842,7 @@ static inline void iph_to_flow_copy_v6addrs(struct flow_keys *flow, BUILD_BUG_ON(offsetof(typeof(flow->addrs), v6addrs.dst) != offsetof(typeof(flow->addrs), v6addrs.src) + sizeof(flow->addrs.v6addrs.src)); - memcpy(&flow->addrs.v6addrs, &iph->saddr, sizeof(flow->addrs.v6addrs)); + memcpy(&flow->addrs.v6addrs, &iph->addrs, sizeof(flow->addrs.v6addrs)); flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; } diff --git a/include/uapi/linux/ip.h b/include/uapi/linux/ip.h index e42d13b55cf3..d2f143393780 100644 --- a/include/uapi/linux/ip.h +++ b/include/uapi/linux/ip.h @@ -100,8 +100,10 @@ struct iphdr { __u8 ttl; __u8 protocol; __sum16 check; - __be32 saddr; - __be32 daddr; + __struct_group(/* no tag */, addrs, /* no attrs */, + __be32 saddr; + __be32 daddr; + ); /*The options start here. */ }; diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 13e8751bf24a..766ab5c8ee65 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -130,8 +130,10 @@ struct ipv6hdr { __u8 nexthdr; __u8 hop_limit; - struct in6_addr saddr; - struct in6_addr daddr; + __struct_group(/* no tag */, addrs, /* no attrs */, + struct in6_addr saddr; + struct in6_addr daddr; + ); }; From 75205f1b47a88c3fac4f30bd7567e89b2887c7fd Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 15 Nov 2022 09:50:42 +0800 Subject: [PATCH 0913/1477] scsi: target: tcm_loop: Fix possible name leak in tcm_loop_setup_hba_bus() [ Upstream commit bc68e428d4963af0201e92159629ab96948f0893 ] If device_register() fails in tcm_loop_setup_hba_bus(), the name allocated by dev_set_name() need be freed. As comment of device_register() says, it should use put_device() to give up the reference in the error path. So fix this by calling put_device(), then the name can be freed in kobject_cleanup(). The 'tl_hba' will be freed in tcm_loop_release_adapter(), so it don't need goto error label in this case. Fixes: 3703b2c5d041 ("[SCSI] tcm_loop: Add multi-fabric Linux/SCSI LLD fabric module") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221115015042.3652261-1-yangyingliang@huawei.com Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/target/loopback/tcm_loop.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c index 16d5a4e117a2..5ae5d94c5b93 100644 --- a/drivers/target/loopback/tcm_loop.c +++ b/drivers/target/loopback/tcm_loop.c @@ -394,6 +394,7 @@ static int tcm_loop_setup_hba_bus(struct tcm_loop_hba *tl_hba, int tcm_loop_host ret = device_register(&tl_hba->dev); if (ret) { pr_err("device_register() failed for tl_hba->dev: %d\n", ret); + put_device(&tl_hba->dev); return -ENODEV; } @@ -1072,7 +1073,7 @@ check_len: */ ret = tcm_loop_setup_hba_bus(tl_hba, tcm_loop_hba_no_cnt); if (ret) - goto out; + return ERR_PTR(ret); sh = tl_hba->sh; tcm_loop_hba_no_cnt++; From 89ece5ff7dbed52348502db603d5c6bc52b90218 Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Thu, 17 Nov 2022 08:44:21 +0000 Subject: [PATCH 0914/1477] scsi: scsi_debug: Fix possible UAF in sdebug_add_host_helper() [ Upstream commit e208a1d795a08d1ac0398c79ad9c58106531bcc5 ] If device_register() fails in sdebug_add_host_helper(), it will goto clean and sdbg_host will be freed, but sdbg_host->host_list will not be removed from sdebug_host_list, then list traversal may cause UAF. Fix it. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Yuan Can Link: https://lore.kernel.org/r/20221117084421.58918-1-yuancan@huawei.com Acked-by: Douglas Gilbert Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/scsi_debug.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 5eb959b5f701..261b915835b4 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -7079,8 +7079,12 @@ static int sdebug_add_host_helper(int per_host_idx) dev_set_name(&sdbg_host->dev, "adapter%d", sdebug_num_hosts); error = device_register(&sdbg_host->dev); - if (error) + if (error) { + spin_lock(&sdebug_host_list_lock); + list_del(&sdbg_host->host_list); + spin_unlock(&sdebug_host_list_lock); goto clean; + } ++sdebug_num_hosts; return 0; From 7d606ae1abcc3eab5408e42444d789dc7def51b8 Mon Sep 17 00:00:00 2001 From: Li Huafei Date: Fri, 18 Nov 2022 10:15:34 +0900 Subject: [PATCH 0915/1477] kprobes: Skip clearing aggrprobe's post_handler in kprobe-on-ftrace case [ Upstream commit 5dd7caf0bdc5d0bae7cf9776b4d739fb09bd5ebb ] In __unregister_kprobe_top(), if the currently unregistered probe has post_handler but other child probes of the aggrprobe do not have post_handler, the post_handler of the aggrprobe is cleared. If this is a ftrace-based probe, there is a problem. In later calls to disarm_kprobe(), we will use kprobe_ftrace_ops because post_handler is NULL. But we're armed with kprobe_ipmodify_ops. This triggers a WARN in __disarm_kprobe_ftrace() and may even cause use-after-free: Failed to disarm kprobe-ftrace at kernel_clone+0x0/0x3c0 (error -2) WARNING: CPU: 5 PID: 137 at kernel/kprobes.c:1135 __disarm_kprobe_ftrace.isra.21+0xcf/0xe0 Modules linked in: testKprobe_007(-) CPU: 5 PID: 137 Comm: rmmod Not tainted 6.1.0-rc4-dirty #18 [...] Call Trace: __disable_kprobe+0xcd/0xe0 __unregister_kprobe_top+0x12/0x150 ? mutex_lock+0xe/0x30 unregister_kprobes.part.23+0x31/0xa0 unregister_kprobe+0x32/0x40 __x64_sys_delete_module+0x15e/0x260 ? do_user_addr_fault+0x2cd/0x6b0 do_syscall_64+0x3a/0x90 entry_SYSCALL_64_after_hwframe+0x63/0xcd [...] For the kprobe-on-ftrace case, we keep the post_handler setting to identify this aggrprobe armed with kprobe_ipmodify_ops. This way we can disarm it correctly. Link: https://lore.kernel.org/all/20221112070000.35299-1-lihuafei1@huawei.com/ Fixes: 0bc11ed5ab60 ("kprobes: Allow kprobes coexist with livepatch") Reported-by: Zhao Gongyi Suggested-by: Masami Hiramatsu (Google) Signed-off-by: Li Huafei Acked-by: Masami Hiramatsu (Google) Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Sasha Levin --- kernel/kprobes.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index b0f444e86487..75150e755518 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1841,7 +1841,13 @@ static int __unregister_kprobe_top(struct kprobe *p) if ((list_p != p) && (list_p->post_handler)) goto noclean; } - ap->post_handler = NULL; + /* + * For the kprobe-on-ftrace case, we keep the + * post_handler setting to identify this aggrprobe + * armed with kprobe_ipmodify_ops. + */ + if (!kprobe_ftrace(ap)) + ap->post_handler = NULL; } noclean: /* From 3f25add5ecf88de0f8ff2b27b6c0731a1f1b38ed Mon Sep 17 00:00:00 2001 From: Chen Jun Date: Fri, 18 Nov 2022 15:40:03 -0800 Subject: [PATCH 0916/1477] Input: i8042 - fix leaking of platform device on module removal [ Upstream commit 81cd7e8489278d28794e7b272950c3e00c344e44 ] Avoid resetting the module-wide i8042_platform_device pointer in i8042_probe() or i8042_remove(), so that the device can be properly destroyed by i8042_exit() on module unload. Fixes: 9222ba68c3f4 ("Input: i8042 - add deferred probe support") Signed-off-by: Chen Jun Link: https://lore.kernel.org/r/20221109034148.23821-1-chenjun102@huawei.com Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/serio/i8042.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c index a9f68f535b72..8648b4c46138 100644 --- a/drivers/input/serio/i8042.c +++ b/drivers/input/serio/i8042.c @@ -1543,8 +1543,6 @@ static int i8042_probe(struct platform_device *dev) { int error; - i8042_platform_device = dev; - if (i8042_reset == I8042_RESET_ALWAYS) { error = i8042_controller_selftest(); if (error) @@ -1582,7 +1580,6 @@ static int i8042_probe(struct platform_device *dev) i8042_free_aux_ports(); /* in case KBD failed but AUX not */ i8042_free_irqs(); i8042_controller_reset(false); - i8042_platform_device = NULL; return error; } @@ -1592,7 +1589,6 @@ static int i8042_remove(struct platform_device *dev) i8042_unregister_ports(); i8042_free_irqs(); i8042_controller_reset(false); - i8042_platform_device = NULL; return 0; } From 95ebea5a15e465385bc2d8178d2f18b7cdba9b03 Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Tue, 29 Mar 2022 10:12:52 -0700 Subject: [PATCH 0917/1477] uapi/linux/stddef.h: Add include guards commit 55037ed7bdc62151a726f5685f88afa6a82959b1 upstream. Add include guard wrapper define to uapi/linux/stddef.h to prevent macro redefinition errors when stddef.h is included more than once. This was not needed before since the only contents already used a redefinition test. Signed-off-by: Tadeusz Struk Link: https://lore.kernel.org/r/20220329171252.57279-1-tadeusz.struk@linaro.org Fixes: 50d7bd38c3aa ("stddef: Introduce struct_group() helper macro") Cc: stable@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/stddef.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/uapi/linux/stddef.h b/include/uapi/linux/stddef.h index 610204f7c275..c3725b492263 100644 --- a/include/uapi/linux/stddef.h +++ b/include/uapi/linux/stddef.h @@ -1,4 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_LINUX_STDDEF_H +#define _UAPI_LINUX_STDDEF_H + #include #ifndef __always_inline @@ -25,3 +28,4 @@ struct { MEMBERS } ATTRS; \ struct TAG { MEMBERS } ATTRS NAME; \ } +#endif From e929ec98c0c3b10d9c07f3776df0c1a02d7a763e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 7 Oct 2022 15:57:43 -0700 Subject: [PATCH 0918/1477] macvlan: enforce a consistent minimal mtu commit b64085b00044bdf3cd1c9825e9ef5b2e0feae91a upstream. macvlan should enforce a minimal mtu of 68, even at link creation. This patch avoids the current behavior (which could lead to crashes in ipv6 stack if the link is brought up) $ ip link add macvlan1 link eno1 mtu 8 type macvlan # This should fail ! $ ip link sh dev macvlan1 5: macvlan1@eno1: mtu 8 qdisc noop state DOWN mode DEFAULT group default qlen 1000 link/ether 02:47:6c:24:74:82 brd ff:ff:ff:ff:ff:ff $ ip link set macvlan1 mtu 67 Error: mtu less than device minimum. $ ip link set macvlan1 mtu 68 $ ip link set macvlan1 mtu 8 Error: mtu less than device minimum. Fixes: 91572088e3fd ("net: use core MTU range checking in core net infra") Reported-by: syzbot Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/macvlan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 5d6b4f76b519..5869bc2c3aa7 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1176,7 +1176,7 @@ void macvlan_common_setup(struct net_device *dev) { ether_setup(dev); - dev->min_mtu = 0; + /* ether_setup() has set dev->min_mtu to ETH_MIN_MTU. */ dev->max_mtu = ETH_MAX_MTU; dev->priv_flags &= ~IFF_TX_SKB_SHARING; netif_keep_dst(dev); From 35309be06b6feded2ab2cafbc2bca8534c2fa41e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 11 Oct 2022 15:07:48 -0700 Subject: [PATCH 0919/1477] tcp: cdg: allow tcp_cdg_release() to be called multiple times commit 72e560cb8c6f80fc2b4afc5d3634a32465e13a51 upstream. Apparently, mptcp is able to call tcp_disconnect() on an already disconnected flow. This is generally fine, unless current congestion control is CDG, because it might trigger a double-free [1] Instead of fixing MPTCP, and future bugs, we can make tcp_disconnect() more resilient. [1] BUG: KASAN: double-free in slab_free mm/slub.c:3539 [inline] BUG: KASAN: double-free in kfree+0xe2/0x580 mm/slub.c:4567 CPU: 0 PID: 3645 Comm: kworker/0:7 Not tainted 6.0.0-syzkaller-02734-g0326074ff465 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/22/2022 Workqueue: events mptcp_worker Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:317 [inline] print_report.cold+0x2ba/0x719 mm/kasan/report.c:433 kasan_report_invalid_free+0x81/0x190 mm/kasan/report.c:462 ____kasan_slab_free+0x18b/0x1c0 mm/kasan/common.c:356 kasan_slab_free include/linux/kasan.h:200 [inline] slab_free_hook mm/slub.c:1759 [inline] slab_free_freelist_hook+0x8b/0x1c0 mm/slub.c:1785 slab_free mm/slub.c:3539 [inline] kfree+0xe2/0x580 mm/slub.c:4567 tcp_disconnect+0x980/0x1e20 net/ipv4/tcp.c:3145 __mptcp_close_ssk+0x5ca/0x7e0 net/mptcp/protocol.c:2327 mptcp_do_fastclose net/mptcp/protocol.c:2592 [inline] mptcp_worker+0x78c/0xff0 net/mptcp/protocol.c:2627 process_one_work+0x991/0x1610 kernel/workqueue.c:2289 worker_thread+0x665/0x1080 kernel/workqueue.c:2436 kthread+0x2e4/0x3a0 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:306 Allocated by task 3671: kasan_save_stack+0x1e/0x40 mm/kasan/common.c:38 kasan_set_track mm/kasan/common.c:45 [inline] set_alloc_info mm/kasan/common.c:437 [inline] ____kasan_kmalloc mm/kasan/common.c:516 [inline] ____kasan_kmalloc mm/kasan/common.c:475 [inline] __kasan_kmalloc+0xa9/0xd0 mm/kasan/common.c:525 kmalloc_array include/linux/slab.h:640 [inline] kcalloc include/linux/slab.h:671 [inline] tcp_cdg_init+0x10d/0x170 net/ipv4/tcp_cdg.c:380 tcp_init_congestion_control+0xab/0x550 net/ipv4/tcp_cong.c:193 tcp_reinit_congestion_control net/ipv4/tcp_cong.c:217 [inline] tcp_set_congestion_control+0x96c/0xaa0 net/ipv4/tcp_cong.c:391 do_tcp_setsockopt+0x505/0x2320 net/ipv4/tcp.c:3513 tcp_setsockopt+0xd4/0x100 net/ipv4/tcp.c:3801 mptcp_setsockopt+0x35f/0x2570 net/mptcp/sockopt.c:844 __sys_setsockopt+0x2d6/0x690 net/socket.c:2252 __do_sys_setsockopt net/socket.c:2263 [inline] __se_sys_setsockopt net/socket.c:2260 [inline] __x64_sys_setsockopt+0xba/0x150 net/socket.c:2260 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Freed by task 16: kasan_save_stack+0x1e/0x40 mm/kasan/common.c:38 kasan_set_track+0x21/0x30 mm/kasan/common.c:45 kasan_set_free_info+0x20/0x30 mm/kasan/generic.c:370 ____kasan_slab_free mm/kasan/common.c:367 [inline] ____kasan_slab_free+0x166/0x1c0 mm/kasan/common.c:329 kasan_slab_free include/linux/kasan.h:200 [inline] slab_free_hook mm/slub.c:1759 [inline] slab_free_freelist_hook+0x8b/0x1c0 mm/slub.c:1785 slab_free mm/slub.c:3539 [inline] kfree+0xe2/0x580 mm/slub.c:4567 tcp_cleanup_congestion_control+0x70/0x120 net/ipv4/tcp_cong.c:226 tcp_v4_destroy_sock+0xdd/0x750 net/ipv4/tcp_ipv4.c:2254 tcp_v6_destroy_sock+0x11/0x20 net/ipv6/tcp_ipv6.c:1969 inet_csk_destroy_sock+0x196/0x440 net/ipv4/inet_connection_sock.c:1157 tcp_done+0x23b/0x340 net/ipv4/tcp.c:4649 tcp_rcv_state_process+0x40e7/0x4990 net/ipv4/tcp_input.c:6624 tcp_v6_do_rcv+0x3fc/0x13c0 net/ipv6/tcp_ipv6.c:1525 tcp_v6_rcv+0x2e8e/0x3830 net/ipv6/tcp_ipv6.c:1759 ip6_protocol_deliver_rcu+0x2db/0x1950 net/ipv6/ip6_input.c:439 ip6_input_finish+0x14c/0x2c0 net/ipv6/ip6_input.c:484 NF_HOOK include/linux/netfilter.h:302 [inline] NF_HOOK include/linux/netfilter.h:296 [inline] ip6_input+0x9c/0xd0 net/ipv6/ip6_input.c:493 dst_input include/net/dst.h:455 [inline] ip6_rcv_finish+0x193/0x2c0 net/ipv6/ip6_input.c:79 ip_sabotage_in net/bridge/br_netfilter_hooks.c:874 [inline] ip_sabotage_in+0x1fa/0x260 net/bridge/br_netfilter_hooks.c:865 nf_hook_entry_hookfn include/linux/netfilter.h:142 [inline] nf_hook_slow+0xc5/0x1f0 net/netfilter/core.c:614 nf_hook.constprop.0+0x3ac/0x650 include/linux/netfilter.h:257 NF_HOOK include/linux/netfilter.h:300 [inline] ipv6_rcv+0x9e/0x380 net/ipv6/ip6_input.c:309 __netif_receive_skb_one_core+0x114/0x180 net/core/dev.c:5485 __netif_receive_skb+0x1f/0x1c0 net/core/dev.c:5599 netif_receive_skb_internal net/core/dev.c:5685 [inline] netif_receive_skb+0x12f/0x8d0 net/core/dev.c:5744 NF_HOOK include/linux/netfilter.h:302 [inline] NF_HOOK include/linux/netfilter.h:296 [inline] br_pass_frame_up+0x303/0x410 net/bridge/br_input.c:68 br_handle_frame_finish+0x909/0x1aa0 net/bridge/br_input.c:199 br_nf_hook_thresh+0x2f8/0x3d0 net/bridge/br_netfilter_hooks.c:1041 br_nf_pre_routing_finish_ipv6+0x695/0xef0 net/bridge/br_netfilter_ipv6.c:207 NF_HOOK include/linux/netfilter.h:302 [inline] br_nf_pre_routing_ipv6+0x417/0x7c0 net/bridge/br_netfilter_ipv6.c:237 br_nf_pre_routing+0x1496/0x1fe0 net/bridge/br_netfilter_hooks.c:507 nf_hook_entry_hookfn include/linux/netfilter.h:142 [inline] nf_hook_bridge_pre net/bridge/br_input.c:255 [inline] br_handle_frame+0x9c9/0x12d0 net/bridge/br_input.c:399 __netif_receive_skb_core+0x9fe/0x38f0 net/core/dev.c:5379 __netif_receive_skb_one_core+0xae/0x180 net/core/dev.c:5483 __netif_receive_skb+0x1f/0x1c0 net/core/dev.c:5599 process_backlog+0x3a0/0x7c0 net/core/dev.c:5927 __napi_poll+0xb3/0x6d0 net/core/dev.c:6494 napi_poll net/core/dev.c:6561 [inline] net_rx_action+0x9c1/0xd90 net/core/dev.c:6672 __do_softirq+0x1d0/0x9c8 kernel/softirq.c:571 Fixes: 2b0a8c9eee81 ("tcp: add CDG congestion control") Reported-by: syzbot Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_cdg.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c index 709d23801823..56dede4b59d9 100644 --- a/net/ipv4/tcp_cdg.c +++ b/net/ipv4/tcp_cdg.c @@ -375,6 +375,7 @@ static void tcp_cdg_init(struct sock *sk) struct cdg *ca = inet_csk_ca(sk); struct tcp_sock *tp = tcp_sk(sk); + ca->gradients = NULL; /* We silently fall back to window = 1 if allocation fails. */ if (window > 1) ca->gradients = kcalloc(window, sizeof(ca->gradients[0]), @@ -388,6 +389,7 @@ static void tcp_cdg_release(struct sock *sk) struct cdg *ca = inet_csk_ca(sk); kfree(ca->gradients); + ca->gradients = NULL; } static struct tcp_congestion_ops tcp_cdg __read_mostly = { From 7deb7a9d33e4941c5ff190108146d3a56bf69e9d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 12 Oct 2022 13:34:12 +0000 Subject: [PATCH 0920/1477] kcm: avoid potential race in kcm_tx_work commit ec7eede369fe5b0d085ac51fdbb95184f87bfc6c upstream. syzbot found that kcm_tx_work() could crash [1] in: /* Primarily for SOCK_SEQPACKET sockets */ if (likely(sk->sk_socket) && test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { <<*>> clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); sk->sk_write_space(sk); } I think the reason is that another thread might concurrently run in kcm_release() and call sock_orphan(sk) while sk is not locked. kcm_tx_work() find sk->sk_socket being NULL. [1] BUG: KASAN: null-ptr-deref in instrument_atomic_write include/linux/instrumented.h:86 [inline] BUG: KASAN: null-ptr-deref in clear_bit include/asm-generic/bitops/instrumented-atomic.h:41 [inline] BUG: KASAN: null-ptr-deref in kcm_tx_work+0xff/0x160 net/kcm/kcmsock.c:742 Write of size 8 at addr 0000000000000008 by task kworker/u4:3/53 CPU: 0 PID: 53 Comm: kworker/u4:3 Not tainted 5.19.0-rc3-next-20220621-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: kkcmd kcm_tx_work Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 kasan_report+0xbe/0x1f0 mm/kasan/report.c:495 check_region_inline mm/kasan/generic.c:183 [inline] kasan_check_range+0x13d/0x180 mm/kasan/generic.c:189 instrument_atomic_write include/linux/instrumented.h:86 [inline] clear_bit include/asm-generic/bitops/instrumented-atomic.h:41 [inline] kcm_tx_work+0xff/0x160 net/kcm/kcmsock.c:742 process_one_work+0x996/0x1610 kernel/workqueue.c:2289 worker_thread+0x665/0x1080 kernel/workqueue.c:2436 kthread+0x2e9/0x3a0 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:302 Fixes: ab7ac4eb9832 ("kcm: Kernel Connection Multiplexor module") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Tom Herbert Link: https://lore.kernel.org/r/20221012133412.519394-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/kcm/kcmsock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 6b362b362f79..990f69dbedf1 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1844,10 +1844,10 @@ static int kcm_release(struct socket *sock) kcm = kcm_sk(sk); mux = kcm->mux; + lock_sock(sk); sock_orphan(sk); kfree_skb(kcm->seq_skb); - lock_sock(sk); /* Purge queue under lock to avoid race condition with tx_work trying * to act when queue is nonempty. If tx_work runs after this point * it will just return. From 4154b6afa2bd639214ff259d912faad984f7413a Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 13 Nov 2022 16:51:19 -0800 Subject: [PATCH 0921/1477] kcm: close race conditions on sk_receive_queue commit 5121197ecc5db58c07da95eb1ff82b98b121a221 upstream. sk->sk_receive_queue is protected by skb queue lock, but for KCM sockets its RX path takes mux->rx_lock to protect more than just skb queue. However, kcm_recvmsg() still only grabs the skb queue lock, so race conditions still exist. We can teach kcm_recvmsg() to grab mux->rx_lock too but this would introduce a potential performance regression as struct kcm_mux can be shared by multiple KCM sockets. So we have to enforce skb queue lock in requeue_rx_msgs() and handle skb peek case carefully in kcm_wait_data(). Fortunately, skb_recv_datagram() already handles it nicely and is widely used by other sockets, we can just switch to skb_recv_datagram() after getting rid of the unnecessary sock lock in kcm_recvmsg() and kcm_splice_read(). Side note: SOCK_DONE is not used by KCM sockets, so it is safe to get rid of this check too. I ran the original syzbot reproducer for 30 min without seeing any issue. Fixes: ab7ac4eb9832 ("kcm: Kernel Connection Multiplexor module") Reported-by: syzbot+278279efdd2730dd14bf@syzkaller.appspotmail.com Reported-by: shaozhengchao Cc: Paolo Abeni Cc: Tom Herbert Signed-off-by: Cong Wang Link: https://lore.kernel.org/r/20221114005119.597905-1-xiyou.wangcong@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- net/kcm/kcmsock.c | 60 +++++++---------------------------------------- 1 file changed, 8 insertions(+), 52 deletions(-) diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 990f69dbedf1..32b516ab9c47 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -221,7 +221,7 @@ static void requeue_rx_msgs(struct kcm_mux *mux, struct sk_buff_head *head) struct sk_buff *skb; struct kcm_sock *kcm; - while ((skb = __skb_dequeue(head))) { + while ((skb = skb_dequeue(head))) { /* Reset destructor to avoid calling kcm_rcv_ready */ skb->destructor = sock_rfree; skb_orphan(skb); @@ -1084,53 +1084,18 @@ out_error: return err; } -static struct sk_buff *kcm_wait_data(struct sock *sk, int flags, - long timeo, int *err) -{ - struct sk_buff *skb; - - while (!(skb = skb_peek(&sk->sk_receive_queue))) { - if (sk->sk_err) { - *err = sock_error(sk); - return NULL; - } - - if (sock_flag(sk, SOCK_DONE)) - return NULL; - - if ((flags & MSG_DONTWAIT) || !timeo) { - *err = -EAGAIN; - return NULL; - } - - sk_wait_data(sk, &timeo, NULL); - - /* Handle signals */ - if (signal_pending(current)) { - *err = sock_intr_errno(timeo); - return NULL; - } - } - - return skb; -} - static int kcm_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { + int noblock = flags & MSG_DONTWAIT; struct sock *sk = sock->sk; struct kcm_sock *kcm = kcm_sk(sk); int err = 0; - long timeo; struct strp_msg *stm; int copied = 0; struct sk_buff *skb; - timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); - - lock_sock(sk); - - skb = kcm_wait_data(sk, flags, timeo, &err); + skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) goto out; @@ -1161,14 +1126,11 @@ msg_finished: /* Finished with message */ msg->msg_flags |= MSG_EOR; KCM_STATS_INCR(kcm->stats.rx_msgs); - skb_unlink(skb, &sk->sk_receive_queue); - kfree_skb(skb); } } out: - release_sock(sk); - + skb_free_datagram(sk, skb); return copied ? : err; } @@ -1176,9 +1138,9 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags) { + int noblock = flags & MSG_DONTWAIT; struct sock *sk = sock->sk; struct kcm_sock *kcm = kcm_sk(sk); - long timeo; struct strp_msg *stm; int err = 0; ssize_t copied; @@ -1186,11 +1148,7 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos, /* Only support splice for SOCKSEQPACKET */ - timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); - - lock_sock(sk); - - skb = kcm_wait_data(sk, flags, timeo, &err); + skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) goto err_out; @@ -1218,13 +1176,11 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos, * finish reading the message. */ - release_sock(sk); - + skb_free_datagram(sk, skb); return copied; err_out: - release_sock(sk); - + skb_free_datagram(sk, skb); return err; } From f14858bc77c567e089965962877ee726ffad0556 Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Wed, 17 Aug 2022 14:58:44 +0900 Subject: [PATCH 0922/1477] 9p: trans_fd/p9_conn_cancel: drop client lock earlier commit 52f1c45dde9136f964d63a77d19826c8a74e2c7f upstream. syzbot reported a double-lock here and we no longer need this lock after requests have been moved off to local list: just drop the lock earlier. Link: https://lkml.kernel.org/r/20220904064028.1305220-1-asmadeus@codewreck.org Reported-by: syzbot+50f7e8d06c3768dd97f3@syzkaller.appspotmail.com Signed-off-by: Dominique Martinet Tested-by: Schspa Shi Signed-off-by: Greg Kroah-Hartman --- net/9p/trans_fd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 8f528e783a6c..c4096b14d159 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -205,6 +205,8 @@ static void p9_conn_cancel(struct p9_conn *m, int err) list_move(&req->req_list, &cancel_list); } + spin_unlock(&m->client->lock); + list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) { p9_debug(P9_DEBUG_ERROR, "call back req %p\n", req); list_del(&req->req_list); @@ -212,7 +214,6 @@ static void p9_conn_cancel(struct p9_conn *m, int err) req->t_err = err; p9_client_cb(m->client, req, REQ_STATUS_ERROR); } - spin_unlock(&m->client->lock); } static __poll_t From 5fa30be7ba81191b0a0c7239a89befc0c94286d5 Mon Sep 17 00:00:00 2001 From: Andrew Price Date: Wed, 17 Aug 2022 13:22:00 +0100 Subject: [PATCH 0923/1477] gfs2: Check sb_bsize_shift after reading superblock commit 670f8ce56dd0632dc29a0322e188cc73ce3c6b92 upstream. Fuzzers like to scribble over sb_bsize_shift but in reality it's very unlikely that this field would be corrupted on its own. Nevertheless it should be checked to avoid the possibility of messy mount errors due to bad calculations. It's always a fixed value based on the block size so we can just check that it's the expected value. Tested with: mkfs.gfs2 -O -p lock_nolock /dev/vdb for i in 0 -1 64 65 32 33; do gfs2_edit -p sb field sb_bsize_shift $i /dev/vdb mount /dev/vdb /mnt/test && umount /mnt/test done Before this patch we get a withdraw after [ 76.413681] gfs2: fsid=loop0.0: fatal: invalid metadata block [ 76.413681] bh = 19 (type: exp=5, found=4) [ 76.413681] function = gfs2_meta_buffer, file = fs/gfs2/meta_io.c, line = 492 and with UBSAN configured we also get complaints like [ 76.373395] UBSAN: shift-out-of-bounds in fs/gfs2/ops_fstype.c:295:19 [ 76.373815] shift exponent 4294967287 is too large for 64-bit type 'long unsigned int' After the patch, these complaints don't appear, mount fails immediately and we get an explanation in dmesg. Reported-by: syzbot+dcf33a7aae997956fe06@syzkaller.appspotmail.com Signed-off-by: Andrew Price Signed-off-by: Andreas Gruenbacher Signed-off-by: Greg Kroah-Hartman --- fs/gfs2/ops_fstype.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index b9ed6a6dbcf5..23c131568476 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -182,7 +182,10 @@ static int gfs2_check_sb(struct gfs2_sbd *sdp, int silent) pr_warn("Invalid superblock size\n"); return -EINVAL; } - + if (sb->sb_bsize_shift != ffs(sb->sb_bsize) - 1) { + pr_warn("Invalid block size shift\n"); + return -EINVAL; + } return 0; } From a5da76df467a55071c88c7e2612250e91034e4d7 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 26 Aug 2022 15:12:17 +0200 Subject: [PATCH 0924/1477] gfs2: Switch from strlcpy to strscpy commit 204c0300c4e99707e9fb6e57840aa1127060e63f upstream. Switch from strlcpy to strscpy and make sure that @count is the size of the smaller of the source and destination buffers. This prevents reading beyond the end of the source buffer when the source string isn't null terminated. Found by a modified version of syzkaller. Suggested-by: Wolfram Sang Signed-off-by: Andreas Gruenbacher Signed-off-by: Greg Kroah-Hartman --- fs/gfs2/ops_fstype.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 23c131568476..648f7336043f 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -384,8 +384,10 @@ static int init_names(struct gfs2_sbd *sdp, int silent) if (!table[0]) table = sdp->sd_vfs->s_id; - strlcpy(sdp->sd_proto_name, proto, GFS2_FSNAME_LEN); - strlcpy(sdp->sd_table_name, table, GFS2_FSNAME_LEN); + BUILD_BUG_ON(GFS2_LOCKNAME_LEN > GFS2_FSNAME_LEN); + + strscpy(sdp->sd_proto_name, proto, GFS2_LOCKNAME_LEN); + strscpy(sdp->sd_table_name, table, GFS2_LOCKNAME_LEN); table = sdp->sd_table_name; while ((table = strchr(table, '/'))) @@ -1417,13 +1419,13 @@ static int gfs2_parse_param(struct fs_context *fc, struct fs_parameter *param) switch (o) { case Opt_lockproto: - strlcpy(args->ar_lockproto, param->string, GFS2_LOCKNAME_LEN); + strscpy(args->ar_lockproto, param->string, GFS2_LOCKNAME_LEN); break; case Opt_locktable: - strlcpy(args->ar_locktable, param->string, GFS2_LOCKNAME_LEN); + strscpy(args->ar_locktable, param->string, GFS2_LOCKNAME_LEN); break; case Opt_hostdata: - strlcpy(args->ar_hostdata, param->string, GFS2_LOCKNAME_LEN); + strscpy(args->ar_hostdata, param->string, GFS2_LOCKNAME_LEN); break; case Opt_spectator: args->ar_spectator = 1; From a8e2fc8f7b41fa9d9ca5f624f4e4d34fce5b40a9 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sat, 27 Aug 2022 00:27:46 +0900 Subject: [PATCH 0925/1477] 9p/trans_fd: always use O_NONBLOCK read/write commit ef575281b21e9a34dfae544a187c6aac2ae424a9 upstream. syzbot is reporting hung task at p9_fd_close() [1], for p9_mux_poll_stop() from p9_conn_destroy() from p9_fd_close() is failing to interrupt already started kernel_read() from p9_fd_read() from p9_read_work() and/or kernel_write() from p9_fd_write() from p9_write_work() requests. Since p9_socket_open() sets O_NONBLOCK flag, p9_mux_poll_stop() does not need to interrupt kernel_read()/kernel_write(). However, since p9_fd_open() does not set O_NONBLOCK flag, but pipe blocks unless signal is pending, p9_mux_poll_stop() needs to interrupt kernel_read()/kernel_write() when the file descriptor refers to a pipe. In other words, pipe file descriptor needs to be handled as if socket file descriptor. We somehow need to interrupt kernel_read()/kernel_write() on pipes. A minimal change, which this patch is doing, is to set O_NONBLOCK flag from p9_fd_open(), for O_NONBLOCK flag does not affect reading/writing of regular files. But this approach changes O_NONBLOCK flag on userspace- supplied file descriptors (which might break userspace programs), and O_NONBLOCK flag could be changed by userspace. It would be possible to set O_NONBLOCK flag every time p9_fd_read()/p9_fd_write() is invoked, but still remains small race window for clearing O_NONBLOCK flag. If we don't want to manipulate O_NONBLOCK flag, we might be able to surround kernel_read()/kernel_write() with set_thread_flag(TIF_SIGPENDING) and recalc_sigpending(). Since p9_read_work()/p9_write_work() works are processed by kernel threads which process global system_wq workqueue, signals could not be delivered from remote threads when p9_mux_poll_stop() from p9_conn_destroy() from p9_fd_close() is called. Therefore, calling set_thread_flag(TIF_SIGPENDING)/recalc_sigpending() every time would be needed if we count on signals for making kernel_read()/kernel_write() non-blocking. Link: https://lkml.kernel.org/r/345de429-a88b-7097-d177-adecf9fed342@I-love.SAKURA.ne.jp Link: https://syzkaller.appspot.com/bug?extid=8b41a1365f1106fd0f33 [1] Reported-by: syzbot Signed-off-by: Tetsuo Handa Tested-by: syzbot Reviewed-by: Christian Schoenebeck [Dominique: add comment at Christian's suggestion] Signed-off-by: Dominique Martinet Signed-off-by: Greg Kroah-Hartman --- net/9p/trans_fd.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index c4096b14d159..fec6c800c898 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -821,11 +821,14 @@ static int p9_fd_open(struct p9_client *client, int rfd, int wfd) goto out_free_ts; if (!(ts->rd->f_mode & FMODE_READ)) goto out_put_rd; + /* prevent workers from hanging on IO when fd is a pipe */ + ts->rd->f_flags |= O_NONBLOCK; ts->wr = fget(wfd); if (!ts->wr) goto out_put_rd; if (!(ts->wr->f_mode & FMODE_WRITE)) goto out_put_wr; + ts->wr->f_flags |= O_NONBLOCK; client->trans = ts; client->status = Connected; From 294ef12dccc6de01de3322b21a0c235474952b63 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Thu, 15 Sep 2022 17:04:16 +0200 Subject: [PATCH 0926/1477] mm: fs: initialize fsdata passed to write_begin/write_end interface commit 1468c6f4558b1bcd92aa0400f2920f9dc7588402 upstream. Functions implementing the a_ops->write_end() interface accept the `void *fsdata` parameter that is supposed to be initialized by the corresponding a_ops->write_begin() (which accepts `void **fsdata`). However not all a_ops->write_begin() implementations initialize `fsdata` unconditionally, so it may get passed uninitialized to a_ops->write_end(), resulting in undefined behavior. Fix this by initializing fsdata with NULL before the call to write_begin(), rather than doing so in all possible a_ops implementations. This patch covers only the following cases found by running x86 KMSAN under syzkaller: - generic_perform_write() - cont_expand_zero() and generic_cont_expand_simple() - page_symlink() Other cases of passing uninitialized fsdata may persist in the codebase. Link: https://lkml.kernel.org/r/20220915150417.722975-43-glider@google.com Signed-off-by: Alexander Potapenko Cc: Alexander Viro Cc: Alexei Starovoitov Cc: Andrey Konovalov Cc: Andrey Konovalov Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Christoph Hellwig Cc: Christoph Lameter Cc: David Rientjes Cc: Dmitry Vyukov Cc: Eric Biggers Cc: Eric Biggers Cc: Eric Dumazet Cc: Greg Kroah-Hartman Cc: Herbert Xu Cc: Ilya Leoshkevich Cc: Ingo Molnar Cc: Jens Axboe Cc: Joonsoo Kim Cc: Kees Cook Cc: Marco Elver Cc: Mark Rutland Cc: Matthew Wilcox Cc: Michael S. Tsirkin Cc: Pekka Enberg Cc: Peter Zijlstra Cc: Petr Mladek Cc: Stephen Rothwell Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Vegard Nossum Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/buffer.c | 4 ++-- fs/namei.c | 2 +- mm/filemap.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index 23f645657488..ee66abadcbc2 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2350,7 +2350,7 @@ int generic_cont_expand_simple(struct inode *inode, loff_t size) { struct address_space *mapping = inode->i_mapping; struct page *page; - void *fsdata; + void *fsdata = NULL; int err; err = inode_newsize_ok(inode, size); @@ -2376,7 +2376,7 @@ static int cont_expand_zero(struct file *file, struct address_space *mapping, struct inode *inode = mapping->host; unsigned int blocksize = i_blocksize(inode); struct page *page; - void *fsdata; + void *fsdata = NULL; pgoff_t index, curidx; loff_t curpos; unsigned zerofrom, offset, len; diff --git a/fs/namei.c b/fs/namei.c index eba2f13d229d..4375565aca66 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4633,7 +4633,7 @@ int __page_symlink(struct inode *inode, const char *symname, int len, int nofs) { struct address_space *mapping = inode->i_mapping; struct page *page; - void *fsdata; + void *fsdata = NULL; int err; unsigned int flags = 0; if (nofs) diff --git a/mm/filemap.c b/mm/filemap.c index 125b69f59caa..3a983bc1a71c 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -3303,7 +3303,7 @@ ssize_t generic_perform_write(struct file *file, unsigned long offset; /* Offset into pagecache page */ unsigned long bytes; /* Bytes to write to page */ size_t copied; /* Bytes copied from user */ - void *fsdata; + void *fsdata = NULL; offset = (pos & (PAGE_SIZE - 1)); bytes = min_t(unsigned long, PAGE_SIZE - offset, From b825bfbbaafbe8da2037e3a778ad660c59f9e054 Mon Sep 17 00:00:00 2001 From: Hawkins Jiawei Date: Thu, 1 Sep 2022 00:09:34 +0800 Subject: [PATCH 0927/1477] ntfs: fix use-after-free in ntfs_attr_find() commit d85a1bec8e8d552ab13163ca1874dcd82f3d1550 upstream. Patch series "ntfs: fix bugs about Attribute", v2. This patchset fixes three bugs relative to Attribute in record: Patch 1 adds a sanity check to ensure that, attrs_offset field in first mft record loading from disk is within bounds. Patch 2 moves the ATTR_RECORD's bounds checking earlier, to avoid dereferencing ATTR_RECORD before checking this ATTR_RECORD is within bounds. Patch 3 adds an overflow checking to avoid possible forever loop in ntfs_attr_find(). Without patch 1 and patch 2, the kernel triggersa KASAN use-after-free detection as reported by Syzkaller. Although one of patch 1 or patch 2 can fix this, we still need both of them. Because patch 1 fixes the root cause, and patch 2 not only fixes the direct cause, but also fixes the potential out-of-bounds bug. This patch (of 3): Syzkaller reported use-after-free read as follows: ================================================================== BUG: KASAN: use-after-free in ntfs_attr_find+0xc02/0xce0 fs/ntfs/attrib.c:597 Read of size 2 at addr ffff88807e352009 by task syz-executor153/3607 [...] Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:317 [inline] print_report.cold+0x2ba/0x719 mm/kasan/report.c:433 kasan_report+0xb1/0x1e0 mm/kasan/report.c:495 ntfs_attr_find+0xc02/0xce0 fs/ntfs/attrib.c:597 ntfs_attr_lookup+0x1056/0x2070 fs/ntfs/attrib.c:1193 ntfs_read_inode_mount+0x89a/0x2580 fs/ntfs/inode.c:1845 ntfs_fill_super+0x1799/0x9320 fs/ntfs/super.c:2854 mount_bdev+0x34d/0x410 fs/super.c:1400 legacy_get_tree+0x105/0x220 fs/fs_context.c:610 vfs_get_tree+0x89/0x2f0 fs/super.c:1530 do_new_mount fs/namespace.c:3040 [inline] path_mount+0x1326/0x1e20 fs/namespace.c:3370 do_mount fs/namespace.c:3383 [inline] __do_sys_mount fs/namespace.c:3591 [inline] __se_sys_mount fs/namespace.c:3568 [inline] __x64_sys_mount+0x27f/0x300 fs/namespace.c:3568 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd [...] The buggy address belongs to the physical page: page:ffffea0001f8d400 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x7e350 head:ffffea0001f8d400 order:3 compound_mapcount:0 compound_pincount:0 flags: 0xfff00000010200(slab|head|node=0|zone=1|lastcpupid=0x7ff) raw: 00fff00000010200 0000000000000000 dead000000000122 ffff888011842140 raw: 0000000000000000 0000000000040004 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88807e351f00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff88807e351f80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff88807e352000: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88807e352080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88807e352100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== Kernel will loads $MFT/$DATA's first mft record in ntfs_read_inode_mount(). Yet the problem is that after loading, kernel doesn't check whether attrs_offset field is a valid value. To be more specific, if attrs_offset field is larger than bytes_allocated field, then it may trigger the out-of-bounds read bug(reported as use-after-free bug) in ntfs_attr_find(), when kernel tries to access the corresponding mft record's attribute. This patch solves it by adding the sanity check between attrs_offset field and bytes_allocated field, after loading the first mft record. Link: https://lkml.kernel.org/r/20220831160935.3409-1-yin31149@gmail.com Link: https://lkml.kernel.org/r/20220831160935.3409-2-yin31149@gmail.com Signed-off-by: Hawkins Jiawei Cc: Anton Altaparmakov Cc: ChenXiaoSong Cc: syzkaller-bugs Cc: Dan Carpenter Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/ntfs/inode.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index cf222c9225d6..645c4b1b23de 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -1829,6 +1829,13 @@ int ntfs_read_inode_mount(struct inode *vi) goto err_out; } + /* Sanity check offset to the first attribute */ + if (le16_to_cpu(m->attrs_offset) >= le32_to_cpu(m->bytes_allocated)) { + ntfs_error(sb, "Incorrect mft offset to the first attribute %u in superblock.", + le16_to_cpu(m->attrs_offset)); + goto err_out; + } + /* Need this to sanity check attribute list references to $MFT. */ vi->i_generation = ni->seq_no = le16_to_cpu(m->sequence_number); From 6322dda483344abe47d17335809f7bbb730bd88b Mon Sep 17 00:00:00 2001 From: Hawkins Jiawei Date: Thu, 1 Sep 2022 00:09:36 +0800 Subject: [PATCH 0928/1477] ntfs: fix out-of-bounds read in ntfs_attr_find() commit 36a4d82dddbbd421d2b8e79e1cab68c8126d5075 upstream. Kernel iterates over ATTR_RECORDs in mft record in ntfs_attr_find(). To ensure access on these ATTR_RECORDs are within bounds, kernel will do some checking during iteration. The problem is that during checking whether ATTR_RECORD's name is within bounds, kernel will dereferences the ATTR_RECORD name_offset field, before checking this ATTR_RECORD strcture is within bounds. This problem may result out-of-bounds read in ntfs_attr_find(), reported by Syzkaller: ================================================================== BUG: KASAN: use-after-free in ntfs_attr_find+0xc02/0xce0 fs/ntfs/attrib.c:597 Read of size 2 at addr ffff88807e352009 by task syz-executor153/3607 [...] Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:317 [inline] print_report.cold+0x2ba/0x719 mm/kasan/report.c:433 kasan_report+0xb1/0x1e0 mm/kasan/report.c:495 ntfs_attr_find+0xc02/0xce0 fs/ntfs/attrib.c:597 ntfs_attr_lookup+0x1056/0x2070 fs/ntfs/attrib.c:1193 ntfs_read_inode_mount+0x89a/0x2580 fs/ntfs/inode.c:1845 ntfs_fill_super+0x1799/0x9320 fs/ntfs/super.c:2854 mount_bdev+0x34d/0x410 fs/super.c:1400 legacy_get_tree+0x105/0x220 fs/fs_context.c:610 vfs_get_tree+0x89/0x2f0 fs/super.c:1530 do_new_mount fs/namespace.c:3040 [inline] path_mount+0x1326/0x1e20 fs/namespace.c:3370 do_mount fs/namespace.c:3383 [inline] __do_sys_mount fs/namespace.c:3591 [inline] __se_sys_mount fs/namespace.c:3568 [inline] __x64_sys_mount+0x27f/0x300 fs/namespace.c:3568 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd [...] The buggy address belongs to the physical page: page:ffffea0001f8d400 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x7e350 head:ffffea0001f8d400 order:3 compound_mapcount:0 compound_pincount:0 flags: 0xfff00000010200(slab|head|node=0|zone=1|lastcpupid=0x7ff) raw: 00fff00000010200 0000000000000000 dead000000000122 ffff888011842140 raw: 0000000000000000 0000000000040004 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88807e351f00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff88807e351f80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff88807e352000: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88807e352080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88807e352100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== This patch solves it by moving the ATTR_RECORD strcture's bounds checking earlier, then checking whether ATTR_RECORD's name is within bounds. What's more, this patch also add some comments to improve its maintainability. Link: https://lkml.kernel.org/r/20220831160935.3409-3-yin31149@gmail.com Link: https://lore.kernel.org/all/1636796c-c85e-7f47-e96f-e074fee3c7d3@huawei.com/ Link: https://groups.google.com/g/syzkaller-bugs/c/t_XdeKPGTR4/m/LECAuIGcBgAJ Signed-off-by: chenxiaosong (A) Signed-off-by: Dan Carpenter Signed-off-by: Hawkins Jiawei Reported-by: syzbot+5f8dcabe4a3b2c51c607@syzkaller.appspotmail.com Tested-by: syzbot+5f8dcabe4a3b2c51c607@syzkaller.appspotmail.com Cc: Anton Altaparmakov Cc: syzkaller-bugs Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/ntfs/attrib.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c index 914e99173130..f171563e5106 100644 --- a/fs/ntfs/attrib.c +++ b/fs/ntfs/attrib.c @@ -594,11 +594,23 @@ static int ntfs_attr_find(const ATTR_TYPE type, const ntfschar *name, for (;; a = (ATTR_RECORD*)((u8*)a + le32_to_cpu(a->length))) { u8 *mrec_end = (u8 *)ctx->mrec + le32_to_cpu(ctx->mrec->bytes_allocated); - u8 *name_end = (u8 *)a + le16_to_cpu(a->name_offset) + - a->name_length * sizeof(ntfschar); - if ((u8*)a < (u8*)ctx->mrec || (u8*)a > mrec_end || - name_end > mrec_end) + u8 *name_end; + + /* check whether ATTR_RECORD wrap */ + if ((u8 *)a < (u8 *)ctx->mrec) break; + + /* check whether Attribute Record Header is within bounds */ + if ((u8 *)a > mrec_end || + (u8 *)a + sizeof(ATTR_RECORD) > mrec_end) + break; + + /* check whether ATTR_RECORD's name is within bounds */ + name_end = (u8 *)a + le16_to_cpu(a->name_offset) + + a->name_length * sizeof(ntfschar); + if (name_end > mrec_end) + break; + ctx->attr = a; if (unlikely(le32_to_cpu(a->type) > le32_to_cpu(type) || a->type == AT_END)) From 957732a09c3828267c2819d31c425aa793dd475b Mon Sep 17 00:00:00 2001 From: Hawkins Jiawei Date: Thu, 1 Sep 2022 00:09:38 +0800 Subject: [PATCH 0929/1477] ntfs: check overflow when iterating ATTR_RECORDs commit 63095f4f3af59322bea984a6ae44337439348fe0 upstream. Kernel iterates over ATTR_RECORDs in mft record in ntfs_attr_find(). Because the ATTR_RECORDs are next to each other, kernel can get the next ATTR_RECORD from end address of current ATTR_RECORD, through current ATTR_RECORD length field. The problem is that during iteration, when kernel calculates the end address of current ATTR_RECORD, kernel may trigger an integer overflow bug in executing `a = (ATTR_RECORD*)((u8*)a + le32_to_cpu(a->length))`. This may wrap, leading to a forever iteration on 32bit systems. This patch solves it by adding some checks on calculating end address of current ATTR_RECORD during iteration. Link: https://lkml.kernel.org/r/20220831160935.3409-4-yin31149@gmail.com Link: https://lore.kernel.org/all/20220827105842.GM2030@kadam/ Signed-off-by: Hawkins Jiawei Suggested-by: Dan Carpenter Cc: Anton Altaparmakov Cc: chenxiaosong (A) Cc: syzkaller-bugs Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/ntfs/attrib.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c index f171563e5106..c0881d39d36a 100644 --- a/fs/ntfs/attrib.c +++ b/fs/ntfs/attrib.c @@ -617,6 +617,14 @@ static int ntfs_attr_find(const ATTR_TYPE type, const ntfschar *name, return -ENOENT; if (unlikely(!a->length)) break; + + /* check whether ATTR_RECORD's length wrap */ + if ((u8 *)a + le32_to_cpu(a->length) < (u8 *)a) + break; + /* check whether ATTR_RECORD's length is within bounds */ + if ((u8 *)a + le32_to_cpu(a->length) > mrec_end) + break; + if (a->type != type) continue; /* From 7be134eb691f6a54b267dbc321530ce0221a76b1 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 25 Nov 2022 15:51:06 +0100 Subject: [PATCH 0930/1477] Revert "net: broadcom: Fix BCMGENET Kconfig" This reverts commit fbb4e8e6dc7b38b3007354700f03c8ad2d9a2118 which is commit 8d820bc9d12b8beebca836cceaf2bbe68216c2f8 upstream. It causes runtime failures as reported by Naresh and Arnd writes: Greg, please just revert fbb4e8e6dc7b ("net: broadcom: Fix BCMGENET Kconfig") in stable/linux-5.10.y: it depends on e5f31552674e ("ethernet: fix PTP_1588_CLOCK dependencies"), which we probably don't want backported from 5.15 to 5.10. So it should be reverted. Reported-by: Naresh Kamboju Reported-by: Arnd Bergmann Link: https://lore.kernel.org/r/CA+G9fYsXomPXcecPDzDydO3=i2qHDM2RTtGxr0p2YOS6=YcWng@mail.gmail.com Cc: YueHaibing Cc: Florian Fainelli Cc: Jakub Kicinski Cc: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig index 2b6d929d462f..7b79528d6eed 100644 --- a/drivers/net/ethernet/broadcom/Kconfig +++ b/drivers/net/ethernet/broadcom/Kconfig @@ -69,7 +69,7 @@ config BCMGENET select BCM7XXX_PHY select MDIO_BCM_UNIMAC select DIMLIB - select BROADCOM_PHY if (ARCH_BCM2835 && PTP_1588_CLOCK_OPTIONAL) + select BROADCOM_PHY if ARCH_BCM2835 help This driver supports the built-in Ethernet MACs found in the Broadcom BCM7xxx Set Top Box family chipset. From 6d46ef50b123f2da3871690e619f5169eb97af92 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 25 Nov 2022 17:45:57 +0100 Subject: [PATCH 0931/1477] Linux 5.10.156 Link: https://lore.kernel.org/r/20221123084557.945845710@linuxfoundation.org Tested-by: Rudi Heitbaum Tested-by: Pavel Machek (CIP) Tested-by: Jon Hunter Tested-by: Guenter Roeck Tested-by: Sudip Mukherjee Tested-by: Salvatore Bonaccorso Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 8ccf902b3609..166f87bdc190 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 10 -SUBLEVEL = 155 +SUBLEVEL = 156 EXTRAVERSION = NAME = Dare mighty things From 5bc3ece38082faf89a30e03f510de17ef0425509 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 1 Dec 2022 10:32:23 +0000 Subject: [PATCH 0932/1477] Revert "serial: 8250: Let drivers request full 16550A feature probing" This reverts commit e6da7808c955e609b3adf9d18b02f94463616016 which is commit 9906890c89e4dbd900ed87ad3040080339a7f411 upstream. This changes the kernel API and is not needed at all in the 5.10.y branch and will be reverted upstream as well. Change-Id: I7d629d3765be366b77aafabf0e89fac7a5aa73b9 Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_port.c | 3 +-- include/linux/serial_core.h | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 23eb1ce4cf00..e6cadb23850a 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -1023,8 +1023,7 @@ static void autoconfig_16550a(struct uart_8250_port *up) up->port.type = PORT_16550A; up->capabilities |= UART_CAP_FIFO; - if (!IS_ENABLED(CONFIG_SERIAL_8250_16550A_VARIANTS) && - !(up->port.flags & UPF_FULL_PROBE)) + if (!IS_ENABLED(CONFIG_SERIAL_8250_16550A_VARIANTS)) return; /* diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index b16373d10e43..9f60f549d956 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -104,7 +104,7 @@ struct uart_icount { __u32 buf_overrun; }; -typedef u64 __bitwise upf_t; +typedef unsigned int __bitwise upf_t; typedef unsigned int __bitwise upstat_t; struct uart_port { @@ -211,7 +211,6 @@ struct uart_port { #define UPF_FIXED_PORT ((__force upf_t) (1 << 29)) #define UPF_DEAD ((__force upf_t) (1 << 30)) #define UPF_IOREMAP ((__force upf_t) (1 << 31)) -#define UPF_FULL_PROBE ((__force upf_t) (1ULL << 32)) #define __UPF_CHANGE_MASK 0x17fff #define UPF_CHANGE_MASK ((__force upf_t) __UPF_CHANGE_MASK) From e5d2cd6ad8860d82e48d2f99de97904e525827a1 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 1 Dec 2022 11:42:47 +0000 Subject: [PATCH 0933/1477] ANDROID: abi preservation for fscrypt change in 5.10.154 commit 391cceee6d43 ("fscrypt: stop using keyrings subsystem for fscrypt_master_key") changed the pointer types of 2 pointers. These are private pointers to the fscrypt code, which will not matter to any GKI users, but the types change so the CRC needs to be preserved and the .xml file needs to be updates to reflect the type changes that happened. Leaf changes summary: 2 artifacts changed Changed leaf types summary: 2 leaf types changed Removed/Changed/Added functions summary: 0 Removed, 0 Changed, 0 Added function Removed/Changed/Added variables summary: 0 Removed, 0 Changed, 0 Added variable 'struct fscrypt_info at fscrypt_private.h:195:1' changed: type size hasn't changed there are data member changes: type 'key*' of 'fscrypt_info::ci_master_key' changed: pointer type changed from: 'key*' to: 'fscrypt_master_key*' 2694 impacted interfaces 'struct super_block at fs.h:1450:1' changed: type size hasn't changed there are data member changes: type 'key*' of 'super_block::s_master_keys' changed: pointer type changed from: 'key*' to: 'fscrypt_keyring*' 2694 impacted interfaces Bug: 161946584 Cc: Eric Biggers Fixes: 391cceee6d43 ("fscrypt: stop using keyrings subsystem for fscrypt_master_key") Signed-off-by: Greg Kroah-Hartman Change-Id: Ia8bec0c8879e7b6bd4e19baf59f36a22d11f4b9b --- android/abi_gki_aarch64.xml | 1997 ++++++++++++++++++----------------- fs/crypto/fscrypt_private.h | 9 + include/linux/fs.h | 9 + 3 files changed, 1065 insertions(+), 950 deletions(-) diff --git a/android/abi_gki_aarch64.xml b/android/abi_gki_aarch64.xml index c6377fd04f7a..6f4440e64311 100644 --- a/android/abi_gki_aarch64.xml +++ b/android/abi_gki_aarch64.xml @@ -7157,21 +7157,21 @@ - + - + - + - + - + - + @@ -8297,6 +8297,11 @@ + + + + + @@ -9642,30 +9647,30 @@ - + - + - + - + - + - + - + - + - + @@ -10592,12 +10597,12 @@ - + - + - + @@ -12539,9 +12544,9 @@ - + - + @@ -13491,18 +13496,18 @@ - + - + - + - + - + @@ -16726,24 +16731,24 @@ - + - + - + - + - + - + - + @@ -17760,15 +17765,15 @@ - + - + - + - + @@ -18187,12 +18192,12 @@ - + - + - + @@ -18751,42 +18756,42 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -19240,24 +19245,24 @@ - + - + - + - + - + - + - + @@ -21416,7 +21421,7 @@ - + @@ -22139,7 +22144,7 @@ - + @@ -23076,12 +23081,12 @@ - + - + - + @@ -25114,66 +25119,66 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -25242,12 +25247,12 @@ - + - + - + @@ -26754,6 +26759,17 @@ + + + + + + + + + + + @@ -27197,6 +27213,9 @@ + + + @@ -28327,6 +28346,7 @@ + @@ -30033,12 +30053,12 @@ - + - + - + @@ -30478,9 +30498,9 @@ - + - + @@ -31303,6 +31323,20 @@ + + + + + + + + + + + + + + @@ -31395,30 +31429,30 @@ - + - + - + - + - + - + - + - + - + @@ -32742,15 +32776,15 @@ - + - + - + - + @@ -33131,24 +33165,24 @@ - + - + - + - + - + - + - + @@ -38711,12 +38745,12 @@ - + - + - + @@ -40403,30 +40437,30 @@ - + - + - + - + - + - + - + - + - + @@ -41315,21 +41349,21 @@ - + - + - + - + - + - + @@ -42450,21 +42484,21 @@ - + - + - + - + - + - + @@ -44315,81 +44349,81 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -45770,9 +45804,9 @@ - + - + @@ -46439,21 +46473,21 @@ - + - + - + - + - + - + @@ -48106,7 +48140,7 @@ - + @@ -48905,15 +48939,15 @@ - + - + - + - + @@ -51049,21 +51083,21 @@ - + - + - + - + - + - + @@ -53785,18 +53819,18 @@ - + - + - + - + - + @@ -53850,15 +53884,15 @@ - + - + - + - + @@ -55911,12 +55945,12 @@ - + - + - + @@ -56268,30 +56302,30 @@ - + - + - + - + - + - + - + - + - + @@ -56874,114 +56908,114 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -58431,27 +58465,27 @@ - + - + - + - + - + - + - + - + @@ -58960,36 +58994,36 @@ - + - + - + - + - + - + - + - + - + - + - + @@ -61003,96 +61037,96 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -66281,45 +66315,45 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -66817,28 +66851,28 @@ - + - + - + - + - + - + - + - + @@ -67590,27 +67624,27 @@ - + - + - + - + - + - + - + - + @@ -67657,9 +67691,9 @@ - + - + @@ -68011,30 +68045,30 @@ - + - + - + - + - + - + - + - + - + @@ -68111,18 +68145,18 @@ - + - + - + - + - + @@ -68554,12 +68588,12 @@ - + - + - + @@ -68819,18 +68853,18 @@ - + - + - + - + - + @@ -68848,6 +68882,56 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -68928,12 +69012,12 @@ - + - + - + @@ -71663,15 +71747,15 @@ - + - + - + - + @@ -71976,18 +72060,18 @@ - + - + - + - + - + @@ -72890,18 +72974,18 @@ - + - + - + - + - + @@ -73377,15 +73461,15 @@ - + - + - + - + @@ -74159,30 +74243,30 @@ - + - + - + - + - + - + - + - + - + @@ -77641,30 +77725,30 @@ - + - + - + - + - + - + - + - + - + @@ -78552,9 +78636,9 @@ - + - + @@ -78568,6 +78652,17 @@ + + + + + + + + + + + @@ -79183,121 +79278,121 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -79336,21 +79431,21 @@ - + - + - + - + - + - + @@ -81737,18 +81832,18 @@ - + - + - + - + - + @@ -82774,73 +82869,73 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -84077,151 +84172,151 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -87599,39 +87694,40 @@ + - + - + - + - + - + - + - + - + - + - + @@ -92262,9 +92358,9 @@ - + - + @@ -92965,18 +93061,18 @@ - + - + - + - + - + @@ -93523,7 +93619,7 @@ - + @@ -96065,12 +96161,12 @@ - + - + - + @@ -96862,12 +96958,12 @@ - + - + - + @@ -97164,12 +97260,12 @@ - + - + - + @@ -100089,6 +100185,7 @@ + @@ -100275,15 +100372,15 @@ - + - + - + - + @@ -100675,12 +100772,12 @@ - + - + - + @@ -100855,42 +100952,42 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -101291,9 +101388,9 @@ - + - + @@ -102311,33 +102408,33 @@ - + - + - + - + - + - + - + - + - + - + @@ -104688,33 +104785,33 @@ - + - + - + - + - + - + - + - + - + - + @@ -104821,18 +104918,18 @@ - + - + - + - + - + @@ -107755,30 +107852,30 @@ - + - + - + - + - + - + - + - + - + @@ -109809,33 +109906,33 @@ - + - + - + - + - + - + - + - + - + - + @@ -109888,9 +109985,9 @@ - + - + @@ -112596,15 +112693,15 @@ - + - + - + - + @@ -114158,39 +114255,39 @@ - + - + - + - + - + - + - + - + - + - + - + - + @@ -116245,44 +116342,44 @@ - - - - - + + + + + - - - - + + + + - - - - + + + + - - + + - - - - - - + + + + + + - - - + + + @@ -116932,8 +117029,8 @@ - - + + @@ -120843,12 +120940,12 @@ - - - - - - + + + + + + @@ -121157,8 +121254,8 @@ - - + + @@ -123250,8 +123347,8 @@ - - + + @@ -123260,8 +123357,8 @@ - - + + @@ -129572,10 +129669,10 @@ - - - - + + + + @@ -129943,11 +130040,11 @@ - - - - - + + + + + @@ -130174,10 +130271,10 @@ - - - - + + + + @@ -130261,8 +130358,8 @@ - - + + @@ -130309,8 +130406,8 @@ - - + + @@ -130320,9 +130417,9 @@ - - - + + + @@ -130351,15 +130448,15 @@ - - + + - - - - - + + + + + @@ -130367,14 +130464,14 @@ - - - + + + - - - + + + @@ -130392,14 +130489,14 @@ - - - + + + - - - + + + @@ -130419,22 +130516,22 @@ - - - - + + + + - - - - - - + + + + + + - - + + @@ -130449,10 +130546,10 @@ - - - - + + + + @@ -130462,15 +130559,15 @@ - - - - + + + + - - - + + + @@ -131047,12 +131144,12 @@ - - + + - - + + @@ -131136,9 +131233,9 @@ - - - + + + @@ -133817,22 +133914,22 @@ - - + + - - + + - - - - + + + + - - + + @@ -135591,12 +135688,12 @@ - - - - - - + + + + + + @@ -139660,12 +139757,12 @@ - - + + - - + + @@ -140123,12 +140220,12 @@ - - - - - - + + + + + + @@ -141110,16 +141207,16 @@ - - + + - - + + - - + + @@ -141618,45 +141715,45 @@ - - - - + + + + - - - - - + + + + + - - - + + + - - - - + + + + - - + + - - + + - - - + + + @@ -141683,9 +141780,9 @@ - - - + + + @@ -141904,10 +142001,10 @@ - - - - + + + + @@ -144498,12 +144595,12 @@ - - - - - - + + + + + + @@ -144608,8 +144705,8 @@ - - + + @@ -145507,21 +145604,21 @@ - - - + + + - - - + + + - - - - - + + + + + @@ -145537,71 +145634,71 @@ - - + + - - - + + + - - - + + + - - - - - - + + + + + + - - - - - - + + + + + + - - + + - - - + + + - - - + + + - - - - - - - + + + + + + + - - - + + + - - - + + + - - + + @@ -145610,8 +145707,8 @@ - - + + @@ -145892,11 +145989,11 @@ - - - - - + + + + + @@ -145991,12 +146088,12 @@ - - + + - - + + @@ -147249,11 +147346,11 @@ - - - - - + + + + + @@ -147533,11 +147630,11 @@ - - - - - + + + + + @@ -147554,11 +147651,11 @@ - - - - - + + + + + @@ -148252,22 +148349,22 @@ - - - - + + + + - - - - + + + + - - - - + + + + @@ -148338,23 +148435,23 @@ - - - - + + + + - - - - + + + + - - - - - + + + + + @@ -148792,28 +148889,28 @@ - - - - + + + + - - - + + + - - - - + + + + - - - - - + + + + + @@ -148824,38 +148921,38 @@ - - - + + + - - + + - - - - + + + + - - - + + + - - - + + + - - - + + + - - + + @@ -148873,9 +148970,9 @@ - - - + + + @@ -148898,14 +148995,14 @@ - - - + + + - - - + + + @@ -148935,9 +149032,9 @@ - - - + + + @@ -148950,8 +149047,8 @@ - - + + diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index 4f88ff06bce2..f5f056739cb6 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -222,7 +222,16 @@ struct fscrypt_info { * will be NULL if the master key was found in a process-subscribed * keyring rather than in the filesystem-level keyring. */ +#ifdef __GENKSYMS__ + /* + * Android ABI CRC preservation due to commit 391cceee6d43 ("fscrypt: + * stop using keyrings subsystem for fscrypt_master_key") changing this + * type. Size is the same, this is a private field. + */ + struct key *ci_master_key; +#else struct fscrypt_master_key *ci_master_key; +#endif /* * Link in list of inodes that were unlocked with the master key. diff --git a/include/linux/fs.h b/include/linux/fs.h index dcf1c720e26c..79f9fa17f7f8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1471,8 +1471,17 @@ struct super_block { const struct xattr_handler **s_xattr; #ifdef CONFIG_FS_ENCRYPTION const struct fscrypt_operations *s_cop; +#ifdef __GENKSYMS__ + /* + * Android ABI CRC preservation due to commit 391cceee6d43 ("fscrypt: + * stop using keyrings subsystem for fscrypt_master_key") changing this + * type. Size is the same, this is a private field. + */ + struct key *s_master_keys; /* master crypto keys in use */ +#else struct fscrypt_keyring *s_master_keys; /* master crypto keys in use */ #endif +#endif #ifdef CONFIG_FS_VERITY const struct fsverity_operations *s_vop; #endif From 03aabcb88aeeb7221ddb6196ae84ad5fb17b743f Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Mon, 7 Nov 2022 20:48:28 +0800 Subject: [PATCH 0934/1477] scsi: scsi_transport_sas: Fix error handling in sas_phy_add() [ Upstream commit 5d7bebf2dfb0dc97aac1fbace0910e557ecdb16f ] If transport_add_device() fails in sas_phy_add(), the kernel will crash trying to delete the device in transport_remove_device() called from sas_remove_host(). Unable to handle kernel NULL pointer dereference at virtual address 0000000000000108 CPU: 61 PID: 42829 Comm: rmmod Kdump: loaded Tainted: G W 6.1.0-rc1+ #173 pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : device_del+0x54/0x3d0 lr : device_del+0x37c/0x3d0 Call trace: device_del+0x54/0x3d0 attribute_container_class_device_del+0x28/0x38 transport_remove_classdev+0x6c/0x80 attribute_container_device_trigger+0x108/0x110 transport_remove_device+0x28/0x38 sas_phy_delete+0x30/0x60 [scsi_transport_sas] do_sas_phy_delete+0x6c/0x80 [scsi_transport_sas] device_for_each_child+0x68/0xb0 sas_remove_children+0x40/0x50 [scsi_transport_sas] sas_remove_host+0x20/0x38 [scsi_transport_sas] hisi_sas_remove+0x40/0x68 [hisi_sas_main] hisi_sas_v2_remove+0x20/0x30 [hisi_sas_v2_hw] platform_remove+0x2c/0x60 Fix this by checking and handling return value of transport_add_device() in sas_phy_add(). Fixes: c7ebbbce366c ("[SCSI] SAS transport class") Suggested-by: John Garry Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221107124828.115557-1-yangyingliang@huawei.com Reviewed-by: John Garry Reviewed-by: Jason Yan Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/scsi_transport_sas.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c index 4a96fb05731d..c6256fdc24b1 100644 --- a/drivers/scsi/scsi_transport_sas.c +++ b/drivers/scsi/scsi_transport_sas.c @@ -716,12 +716,17 @@ int sas_phy_add(struct sas_phy *phy) int error; error = device_add(&phy->dev); - if (!error) { - transport_add_device(&phy->dev); - transport_configure_device(&phy->dev); - } + if (error) + return error; - return error; + error = transport_add_device(&phy->dev); + if (error) { + device_del(&phy->dev); + return error; + } + transport_configure_device(&phy->dev); + + return 0; } EXPORT_SYMBOL(sas_phy_add); From 4034d06a4dbecab7030a88b735a131357b2dac58 Mon Sep 17 00:00:00 2001 From: Wenchao Hao Date: Wed, 5 Jan 2022 19:13:54 -0500 Subject: [PATCH 0935/1477] ata: libata-scsi: simplify __ata_scsi_queuecmd() [ Upstream commit 84eac327af543f03172085d5ef9f98ea25a51191 ] This patch cleans up the code of __ata_scsi_queuecmd(). Since each branch of the "if" condition check that scmd->cmd_len is not zero, move this check out of the "if" to simplify the conditions being checked in the "else" branch. While at it, avoid the if-else-if-else structure using if-else if structure and remove the redundant rc local variable. This patch does not change the function logic. Signed-off-by: Wenchao Hao Signed-off-by: Damien Le Moal Stable-dep-of: e20e81a24a4d ("ata: libata-core: do not issue non-internal commands once EH is pending") Signed-off-by: Sasha Levin --- drivers/ata/libata-scsi.c | 45 ++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 459ece666c62..70744439359d 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -4034,42 +4034,39 @@ int __ata_scsi_queuecmd(struct scsi_cmnd *scmd, struct ata_device *dev) { u8 scsi_op = scmd->cmnd[0]; ata_xlat_func_t xlat_func; - int rc = 0; + + if (unlikely(!scmd->cmd_len)) + goto bad_cdb_len; if (dev->class == ATA_DEV_ATA || dev->class == ATA_DEV_ZAC) { - if (unlikely(!scmd->cmd_len || scmd->cmd_len > dev->cdb_len)) + if (unlikely(scmd->cmd_len > dev->cdb_len)) goto bad_cdb_len; xlat_func = ata_get_xlat_func(dev, scsi_op); - } else { - if (unlikely(!scmd->cmd_len)) + } else if (likely((scsi_op != ATA_16) || !atapi_passthru16)) { + /* relay SCSI command to ATAPI device */ + int len = COMMAND_SIZE(scsi_op); + + if (unlikely(len > scmd->cmd_len || + len > dev->cdb_len || + scmd->cmd_len > ATAPI_CDB_LEN)) goto bad_cdb_len; - xlat_func = NULL; - if (likely((scsi_op != ATA_16) || !atapi_passthru16)) { - /* relay SCSI command to ATAPI device */ - int len = COMMAND_SIZE(scsi_op); - if (unlikely(len > scmd->cmd_len || - len > dev->cdb_len || - scmd->cmd_len > ATAPI_CDB_LEN)) - goto bad_cdb_len; + xlat_func = atapi_xlat; + } else { + /* ATA_16 passthru, treat as an ATA command */ + if (unlikely(scmd->cmd_len > 16)) + goto bad_cdb_len; - xlat_func = atapi_xlat; - } else { - /* ATA_16 passthru, treat as an ATA command */ - if (unlikely(scmd->cmd_len > 16)) - goto bad_cdb_len; - - xlat_func = ata_get_xlat_func(dev, scsi_op); - } + xlat_func = ata_get_xlat_func(dev, scsi_op); } if (xlat_func) - rc = ata_scsi_translate(dev, scmd, xlat_func); - else - ata_scsi_simulate(dev, scmd); + return ata_scsi_translate(dev, scmd, xlat_func); - return rc; + ata_scsi_simulate(dev, scmd); + + return 0; bad_cdb_len: DPRINTK("bad CDB len=%u, scsi_op=0x%02x, max=%u\n", From f5cbd86ebf284be5d175d6e8aba67956ef16c44f Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Wed, 9 Nov 2022 00:15:34 +0100 Subject: [PATCH 0936/1477] ata: libata-core: do not issue non-internal commands once EH is pending [ Upstream commit e20e81a24a4d58744a29715aac2f795cd1651955 ] While the ATA specification states that a device should return command aborted for all commands queued after the device has entered error state, since ATA only keeps the sense data for the latest command (in non-NCQ case), we really don't want to send block layer commands to the device after it has entered error state. (Only ATA EH commands should be sent, to read the sense data etc.) Currently, scsi_queue_rq() will check if scsi_host_in_recovery() (state is SHOST_RECOVERY), and if so, it will _not_ issue a command via: scsi_dispatch_cmd() -> host->hostt->queuecommand() (ata_scsi_queuecmd()) -> __ata_scsi_queuecmd() -> ata_scsi_translate() -> ata_qc_issue() Before commit e494f6a72839 ("[SCSI] improved eh timeout handler"), when receiving a TFES error IRQ, the call chain looked like this: ahci_error_intr() -> ata_port_abort() -> ata_do_link_abort() -> ata_qc_complete() -> ata_qc_schedule_eh() -> blk_abort_request() -> blk_rq_timed_out() -> q->rq_timed_out_fn() (scsi_times_out()) -> scsi_eh_scmd_add() -> scsi_host_set_state(shost, SHOST_RECOVERY) Which meant that as soon as an error IRQ was serviced, SHOST_RECOVERY would be set. However, after commit e494f6a72839 ("[SCSI] improved eh timeout handler"), scsi_times_out() will instead call scsi_abort_command() which will queue delayed work, and the worker function scmd_eh_abort_handler() will call scsi_eh_scmd_add(), which calls scsi_host_set_state(shost, SHOST_RECOVERY). So now, after the TFES error IRQ has been serviced, we need to wait for the SCSI workqueue to run its work before SHOST_RECOVERY gets set. It is worth noting that, even before commit e494f6a72839 ("[SCSI] improved eh timeout handler"), we could receive an error IRQ from the time when scsi_queue_rq() checks scsi_host_in_recovery(), to the time when ata_scsi_queuecmd() is actually called. In order to handle both the delayed setting of SHOST_RECOVERY and the window where we can receive an error IRQ, add a check against ATA_PFLAG_EH_PENDING (which gets set when servicing the error IRQ), inside ata_scsi_queuecmd() itself, while holding the ap->lock. (Since the ap->lock is held while servicing IRQs.) Fixes: e494f6a72839 ("[SCSI] improved eh timeout handler") Signed-off-by: Niklas Cassel Tested-by: John Garry Signed-off-by: Damien Le Moal Signed-off-by: Sasha Levin --- drivers/ata/libata-scsi.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 70744439359d..f1755efd30a2 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -4032,9 +4032,19 @@ void ata_scsi_dump_cdb(struct ata_port *ap, struct scsi_cmnd *cmd) int __ata_scsi_queuecmd(struct scsi_cmnd *scmd, struct ata_device *dev) { + struct ata_port *ap = dev->link->ap; u8 scsi_op = scmd->cmnd[0]; ata_xlat_func_t xlat_func; + /* + * scsi_queue_rq() will defer commands if scsi_host_in_recovery(). + * However, this check is done without holding the ap->lock (a libata + * specific lock), so we can have received an error irq since then, + * therefore we must check if EH is pending, while holding ap->lock. + */ + if (ap->pflags & (ATA_PFLAG_EH_PENDING | ATA_PFLAG_EH_IN_PROGRESS)) + return SCSI_MLQUEUE_DEVICE_BUSY; + if (unlikely(!scmd->cmd_len)) goto bad_cdb_len; From 89a7f155e6b2b7626fa1a978c7ec04a3a0b7ddef Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Sun, 29 Nov 2020 14:54:05 +0200 Subject: [PATCH 0937/1477] bridge: switchdev: Notify about VLAN protocol changes [ Upstream commit 22ec19f3aee327806c37c9fa1188741574bc6445 ] Drivers that support bridge offload need to be notified about changes to the bridge's VLAN protocol so that they could react accordingly and potentially veto the change. Add a new switchdev attribute to communicate the change to drivers. Signed-off-by: Danielle Ratson Reviewed-by: Petr Machata Acked-by: Nikolay Aleksandrov Signed-off-by: Ido Schimmel Reviewed-by: Ivan Vecera Signed-off-by: Jakub Kicinski Stable-dep-of: 9d45921ee4cb ("bridge: switchdev: Fix memory leaks when changing VLAN protocol") Signed-off-by: Sasha Levin --- include/net/switchdev.h | 2 ++ net/bridge/br_vlan.c | 16 ++++++++++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 8528015590e4..afdf8bd1b4fe 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -38,6 +38,7 @@ enum switchdev_attr_id { SWITCHDEV_ATTR_ID_PORT_MROUTER, SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME, SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING, + SWITCHDEV_ATTR_ID_BRIDGE_VLAN_PROTOCOL, SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED, SWITCHDEV_ATTR_ID_BRIDGE_MROUTER, #if IS_ENABLED(CONFIG_BRIDGE_MRP) @@ -57,6 +58,7 @@ struct switchdev_attr { bool mrouter; /* PORT_MROUTER */ clock_t ageing_time; /* BRIDGE_AGEING_TIME */ bool vlan_filtering; /* BRIDGE_VLAN_FILTERING */ + u16 vlan_protocol; /* BRIDGE_VLAN_PROTOCOL */ bool mc_disabled; /* MC_DISABLED */ #if IS_ENABLED(CONFIG_BRIDGE_MRP) u8 mrp_port_role; /* MRP_PORT_ROLE */ diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 852f4b54e881..7e5968e7aad5 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -855,15 +855,25 @@ EXPORT_SYMBOL_GPL(br_vlan_get_proto); int __br_vlan_set_proto(struct net_bridge *br, __be16 proto) { + struct switchdev_attr attr = { + .orig_dev = br->dev, + .id = SWITCHDEV_ATTR_ID_BRIDGE_VLAN_PROTOCOL, + .flags = SWITCHDEV_F_SKIP_EOPNOTSUPP, + .u.vlan_protocol = ntohs(proto), + }; int err = 0; struct net_bridge_port *p; struct net_bridge_vlan *vlan; struct net_bridge_vlan_group *vg; - __be16 oldproto; + __be16 oldproto = br->vlan_proto; if (br->vlan_proto == proto) return 0; + err = switchdev_port_attr_set(br->dev, &attr); + if (err && err != -EOPNOTSUPP) + return err; + /* Add VLANs for the new proto to the device filter. */ list_for_each_entry(p, &br->port_list, list) { vg = nbp_vlan_group(p); @@ -874,7 +884,6 @@ int __br_vlan_set_proto(struct net_bridge *br, __be16 proto) } } - oldproto = br->vlan_proto; br->vlan_proto = proto; recalculate_group_addr(br); @@ -890,6 +899,9 @@ int __br_vlan_set_proto(struct net_bridge *br, __be16 proto) return 0; err_filt: + attr.u.vlan_protocol = ntohs(oldproto); + switchdev_port_attr_set(br->dev, &attr); + list_for_each_entry_continue_reverse(vlan, &vg->vlan_list, vlist) vlan_vid_del(p->dev, proto, vlan->vid); From 347f1793b573466424c550f2748ed837b6690fe7 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 14 Nov 2022 10:45:09 +0200 Subject: [PATCH 0938/1477] bridge: switchdev: Fix memory leaks when changing VLAN protocol [ Upstream commit 9d45921ee4cb364910097e7d1b7558559c2f9fd2 ] The bridge driver can offload VLANs to the underlying hardware either via switchdev or the 8021q driver. When the former is used, the VLAN is marked in the bridge driver with the 'BR_VLFLAG_ADDED_BY_SWITCHDEV' private flag. To avoid the memory leaks mentioned in the cited commit, the bridge driver will try to delete a VLAN via the 8021q driver if the VLAN is not marked with the previously mentioned flag. When the VLAN protocol of the bridge changes, switchdev drivers are notified via the 'SWITCHDEV_ATTR_ID_BRIDGE_VLAN_PROTOCOL' attribute, but the 8021q driver is also called to add the existing VLANs with the new protocol and delete them with the old protocol. In case the VLANs were offloaded via switchdev, the above behavior is both redundant and buggy. Redundant because the VLANs are already programmed in hardware and drivers that support VLAN protocol change (currently only mlx5) change the protocol upon the switchdev attribute notification. Buggy because the 8021q driver is called despite these VLANs being marked with 'BR_VLFLAG_ADDED_BY_SWITCHDEV'. This leads to memory leaks [1] when the VLANs are deleted. Fix by not calling the 8021q driver for VLANs that were already programmed via switchdev. [1] unreferenced object 0xffff8881f6771200 (size 256): comm "ip", pid 446855, jiffies 4298238841 (age 55.240s) hex dump (first 32 bytes): 00 00 7f 0e 83 88 ff ff 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000012819ac>] vlan_vid_add+0x437/0x750 [<00000000f2281fad>] __br_vlan_set_proto+0x289/0x920 [<000000000632b56f>] br_changelink+0x3d6/0x13f0 [<0000000089d25f04>] __rtnl_newlink+0x8ae/0x14c0 [<00000000f6276baf>] rtnl_newlink+0x5f/0x90 [<00000000746dc902>] rtnetlink_rcv_msg+0x336/0xa00 [<000000001c2241c0>] netlink_rcv_skb+0x11d/0x340 [<0000000010588814>] netlink_unicast+0x438/0x710 [<00000000e1a4cd5c>] netlink_sendmsg+0x788/0xc40 [<00000000e8992d4e>] sock_sendmsg+0xb0/0xe0 [<00000000621b8f91>] ____sys_sendmsg+0x4ff/0x6d0 [<000000000ea26996>] ___sys_sendmsg+0x12e/0x1b0 [<00000000684f7e25>] __sys_sendmsg+0xab/0x130 [<000000004538b104>] do_syscall_64+0x3d/0x90 [<0000000091ed9678>] entry_SYSCALL_64_after_hwframe+0x46/0xb0 Fixes: 279737939a81 ("net: bridge: Fix VLANs memory leak") Reported-by: Vlad Buslov Tested-by: Vlad Buslov Signed-off-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20221114084509.860831-1-idosch@nvidia.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/bridge/br_vlan.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 7e5968e7aad5..1dc5db07650c 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -878,6 +878,8 @@ int __br_vlan_set_proto(struct net_bridge *br, __be16 proto) list_for_each_entry(p, &br->port_list, list) { vg = nbp_vlan_group(p); list_for_each_entry(vlan, &vg->vlan_list, vlist) { + if (vlan->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV) + continue; err = vlan_vid_add(p->dev, proto, vlan->vid); if (err) goto err_filt; @@ -892,8 +894,11 @@ int __br_vlan_set_proto(struct net_bridge *br, __be16 proto) /* Delete VLANs for the old proto from the device filter. */ list_for_each_entry(p, &br->port_list, list) { vg = nbp_vlan_group(p); - list_for_each_entry(vlan, &vg->vlan_list, vlist) + list_for_each_entry(vlan, &vg->vlan_list, vlist) { + if (vlan->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV) + continue; vlan_vid_del(p->dev, oldproto, vlan->vid); + } } return 0; @@ -902,13 +907,19 @@ err_filt: attr.u.vlan_protocol = ntohs(oldproto); switchdev_port_attr_set(br->dev, &attr); - list_for_each_entry_continue_reverse(vlan, &vg->vlan_list, vlist) + list_for_each_entry_continue_reverse(vlan, &vg->vlan_list, vlist) { + if (vlan->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV) + continue; vlan_vid_del(p->dev, proto, vlan->vid); + } list_for_each_entry_continue_reverse(p, &br->port_list, list) { vg = nbp_vlan_group(p); - list_for_each_entry(vlan, &vg->vlan_list, vlist) + list_for_each_entry(vlan, &vg->vlan_list, vlist) { + if (vlan->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV) + continue; vlan_vid_del(p->dev, proto, vlan->vid); + } } return err; From 4f0cea018e0326e53ddfc079fe6379b22099875c Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Thu, 6 Oct 2022 11:33:14 +0200 Subject: [PATCH 0939/1477] drm/display: Don't assume dual mode adaptors support i2c sub-addressing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 5954acbacbd1946b96ce8ee799d309cb0cd3cb9d ] Current dual mode adaptor ("DP++") detection code assumes that all adaptors support i2c sub-addressing for read operations from the DP-HDMI adaptor ID buffer. It has been observed that multiple adaptors do not in fact support this, and always return data starting at register 0. On affected adaptors, the code fails to read the proper registers that would identify the device as a type 2 adaptor, and handles those as type 1, limiting the TMDS clock to 165MHz, even if the according register would announce a higher TMDS clock. Fix this by always reading the ID buffer starting from offset 0, and discarding any bytes before the actual offset of interest. We tried finding authoritative documentation on whether or not this is allowed behaviour, but since all the official VESA docs are paywalled, the best we could come up with was the spec sheet for Texas Instruments' SNx5DP149 chip family.[1] It explicitly mentions that sub-addressing is supported for register writes, but *not* for reads (See NOTE in section 8.5.3). Unless TI openly decided to violate the VESA spec, one could take that as a hint that sub-addressing is in fact not mandated by VESA. The other two adaptors affected used the PS8409(A) and the LT8611, according to the data returned from their ID buffers. [1] https://www.ti.com/lit/ds/symlink/sn75dp149.pdf Cc: stable@vger.kernel.org Signed-off-by: Simon Rettberg Reviewed-by: Rafael Gieschke Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20221006113314.41101987@computer Acked-by: Jani Nikula Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_dp_dual_mode_helper.c | 51 +++++++++++++---------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/drm_dp_dual_mode_helper.c b/drivers/gpu/drm/drm_dp_dual_mode_helper.c index 1c9ea9f7fdaf..f2ff0bfdf54d 100644 --- a/drivers/gpu/drm/drm_dp_dual_mode_helper.c +++ b/drivers/gpu/drm/drm_dp_dual_mode_helper.c @@ -62,23 +62,45 @@ ssize_t drm_dp_dual_mode_read(struct i2c_adapter *adapter, u8 offset, void *buffer, size_t size) { + u8 zero = 0; + char *tmpbuf = NULL; + /* + * As sub-addressing is not supported by all adaptors, + * always explicitly read from the start and discard + * any bytes that come before the requested offset. + * This way, no matter whether the adaptor supports it + * or not, we'll end up reading the proper data. + */ struct i2c_msg msgs[] = { { .addr = DP_DUAL_MODE_SLAVE_ADDRESS, .flags = 0, .len = 1, - .buf = &offset, + .buf = &zero, }, { .addr = DP_DUAL_MODE_SLAVE_ADDRESS, .flags = I2C_M_RD, - .len = size, + .len = size + offset, .buf = buffer, }, }; int ret; + if (offset) { + tmpbuf = kmalloc(size + offset, GFP_KERNEL); + if (!tmpbuf) + return -ENOMEM; + + msgs[1].buf = tmpbuf; + } + ret = i2c_transfer(adapter, msgs, ARRAY_SIZE(msgs)); + if (tmpbuf) + memcpy(buffer, tmpbuf + offset, size); + + kfree(tmpbuf); + if (ret < 0) return ret; if (ret != ARRAY_SIZE(msgs)) @@ -205,18 +227,6 @@ enum drm_dp_dual_mode_type drm_dp_dual_mode_detect(struct i2c_adapter *adapter) if (ret) return DRM_DP_DUAL_MODE_UNKNOWN; - /* - * Sigh. Some (maybe all?) type 1 adaptors are broken and ack - * the offset but ignore it, and instead they just always return - * data from the start of the HDMI ID buffer. So for a broken - * type 1 HDMI adaptor a single byte read will always give us - * 0x44, and for a type 1 DVI adaptor it should give 0x00 - * (assuming it implements any registers). Fortunately neither - * of those values will match the type 2 signature of the - * DP_DUAL_MODE_ADAPTOR_ID register so we can proceed with - * the type 2 adaptor detection safely even in the presence - * of broken type 1 adaptors. - */ ret = drm_dp_dual_mode_read(adapter, DP_DUAL_MODE_ADAPTOR_ID, &adaptor_id, sizeof(adaptor_id)); DRM_DEBUG_KMS("DP dual mode adaptor ID: %02x (err %zd)\n", @@ -231,11 +241,10 @@ enum drm_dp_dual_mode_type drm_dp_dual_mode_detect(struct i2c_adapter *adapter) return DRM_DP_DUAL_MODE_TYPE2_DVI; } /* - * If neither a proper type 1 ID nor a broken type 1 adaptor - * as described above, assume type 1, but let the user know - * that we may have misdetected the type. + * If not a proper type 1 ID, still assume type 1, but let + * the user know that we may have misdetected the type. */ - if (!is_type1_adaptor(adaptor_id) && adaptor_id != hdmi_id[0]) + if (!is_type1_adaptor(adaptor_id)) DRM_ERROR("Unexpected DP dual mode adaptor ID %02x\n", adaptor_id); @@ -339,10 +348,8 @@ EXPORT_SYMBOL(drm_dp_dual_mode_get_tmds_output); * @enable: enable (as opposed to disable) the TMDS output buffers * * Set the state of the TMDS output buffers in the adaptor. For - * type2 this is set via the DP_DUAL_MODE_TMDS_OEN register. As - * some type 1 adaptors have problems with registers (see comments - * in drm_dp_dual_mode_detect()) we avoid touching the register, - * making this function a no-op on type 1 adaptors. + * type2 this is set via the DP_DUAL_MODE_TMDS_OEN register. + * Type1 adaptors do not support any register writes. * * Returns: * 0 on success, negative error code on failure From f4066fb910216e2dde9ccb23faaf79b49ffcc01b Mon Sep 17 00:00:00 2001 From: Leo Savernik Date: Wed, 22 Jun 2022 12:19:21 +0200 Subject: [PATCH 0940/1477] nvme: add a bogus subsystem NQN quirk for Micron MTFDKBA2T0TFH [ Upstream commit 41f38043f884c66af4114a7109cf540d6222f450 ] The Micron MTFDKBA2T0TFH device reports the same subsysem NQN for all devices. Add a quick to ignore it. Signed-off-by: Leo Savernik Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig Stable-dep-of: d5ceb4d1c507 ("nvme-pci: add NVME_QUIRK_BOGUS_NID for Micron Nitro") Signed-off-by: Sasha Levin --- drivers/nvme/host/pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 65f4bf880608..7e2ee636c5f9 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3252,6 +3252,8 @@ static const struct pci_device_id nvme_id_table[] = { { PCI_DEVICE(0x1cc1, 0x8201), /* ADATA SX8200PNP 512GB */ .driver_data = NVME_QUIRK_NO_DEEPEST_PS | NVME_QUIRK_IGNORE_DEV_SUBNQN, }, + { PCI_DEVICE(0x1344, 0x5407), /* Micron Technology Inc NVMe SSD */ + .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN }, { PCI_DEVICE(0x1c5c, 0x1504), /* SK Hynix PC400 */ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, { PCI_DEVICE(0x15b7, 0x2001), /* Sandisk Skyhawk */ From 80c825e1e33b3903ab8b70a6bbbd9ba4402721cd Mon Sep 17 00:00:00 2001 From: Bean Huo Date: Mon, 14 Nov 2022 14:48:52 +0100 Subject: [PATCH 0941/1477] nvme-pci: add NVME_QUIRK_BOGUS_NID for Micron Nitro [ Upstream commit d5ceb4d1c50786d21de3d4b06c3f43109ec56dd8 ] Added a quirk to fix Micron Nitro NVMe reporting duplicate NGUIDs. Cc: Signed-off-by: Bean Huo Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/host/pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 7e2ee636c5f9..089f39103584 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3254,6 +3254,8 @@ static const struct pci_device_id nvme_id_table[] = { NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE(0x1344, 0x5407), /* Micron Technology Inc NVMe SSD */ .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN }, + { PCI_DEVICE(0x1344, 0x6001), /* Micron Nitro NVMe */ + .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1c5c, 0x1504), /* SK Hynix PC400 */ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, { PCI_DEVICE(0x15b7, 0x2001), /* Sandisk Skyhawk */ From 879139bc7afbb4f53aa6e268b3e7a30601d6ba9e Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Wed, 20 Oct 2021 16:21:10 +0200 Subject: [PATCH 0942/1477] iio: ms5611: Simplify IO callback parameters [ Upstream commit dc19fa63ad80a636fdbc1a02153d1ab140cb901f ] The ms5611 passes &indio_dev->dev as a parameter to all its IO callbacks only to directly cast the struct device back to struct iio_dev. And the struct iio_dev is then only used to get the drivers state struct. Simplify this a bit by passing the state struct directly. This makes it a bit easier to follow what the code is doing. Signed-off-by: Lars-Peter Clausen Link: https://lore.kernel.org/r/20211020142110.7060-1-lars@metafoo.de Signed-off-by: Jonathan Cameron Stable-dep-of: 17f442e7e475 ("iio: pressure: ms5611: fixed value compensation bug") Signed-off-by: Sasha Levin --- drivers/iio/pressure/ms5611.h | 6 +++--- drivers/iio/pressure/ms5611_core.c | 7 +++---- drivers/iio/pressure/ms5611_i2c.c | 11 ++++------- drivers/iio/pressure/ms5611_spi.c | 17 +++++++---------- 4 files changed, 17 insertions(+), 24 deletions(-) diff --git a/drivers/iio/pressure/ms5611.h b/drivers/iio/pressure/ms5611.h index bc06271fa38b..345f3902e3e3 100644 --- a/drivers/iio/pressure/ms5611.h +++ b/drivers/iio/pressure/ms5611.h @@ -50,9 +50,9 @@ struct ms5611_state { const struct ms5611_osr *pressure_osr; const struct ms5611_osr *temp_osr; - int (*reset)(struct device *dev); - int (*read_prom_word)(struct device *dev, int index, u16 *word); - int (*read_adc_temp_and_pressure)(struct device *dev, + int (*reset)(struct ms5611_state *st); + int (*read_prom_word)(struct ms5611_state *st, int index, u16 *word); + int (*read_adc_temp_and_pressure)(struct ms5611_state *st, s32 *temp, s32 *pressure); struct ms5611_chip_info *chip_info; diff --git a/drivers/iio/pressure/ms5611_core.c b/drivers/iio/pressure/ms5611_core.c index 214b0d25f598..885ccb7914dc 100644 --- a/drivers/iio/pressure/ms5611_core.c +++ b/drivers/iio/pressure/ms5611_core.c @@ -85,8 +85,7 @@ static int ms5611_read_prom(struct iio_dev *indio_dev) struct ms5611_state *st = iio_priv(indio_dev); for (i = 0; i < MS5611_PROM_WORDS_NB; i++) { - ret = st->read_prom_word(&indio_dev->dev, - i, &st->chip_info->prom[i]); + ret = st->read_prom_word(st, i, &st->chip_info->prom[i]); if (ret < 0) { dev_err(&indio_dev->dev, "failed to read prom at %d\n", i); @@ -108,7 +107,7 @@ static int ms5611_read_temp_and_pressure(struct iio_dev *indio_dev, int ret; struct ms5611_state *st = iio_priv(indio_dev); - ret = st->read_adc_temp_and_pressure(&indio_dev->dev, temp, pressure); + ret = st->read_adc_temp_and_pressure(st, temp, pressure); if (ret < 0) { dev_err(&indio_dev->dev, "failed to read temperature and pressure\n"); @@ -196,7 +195,7 @@ static int ms5611_reset(struct iio_dev *indio_dev) int ret; struct ms5611_state *st = iio_priv(indio_dev); - ret = st->reset(&indio_dev->dev); + ret = st->reset(st); if (ret < 0) { dev_err(&indio_dev->dev, "failed to reset device\n"); return ret; diff --git a/drivers/iio/pressure/ms5611_i2c.c b/drivers/iio/pressure/ms5611_i2c.c index 7c04f730430c..cccc40f7df0b 100644 --- a/drivers/iio/pressure/ms5611_i2c.c +++ b/drivers/iio/pressure/ms5611_i2c.c @@ -20,17 +20,15 @@ #include "ms5611.h" -static int ms5611_i2c_reset(struct device *dev) +static int ms5611_i2c_reset(struct ms5611_state *st) { - struct ms5611_state *st = iio_priv(dev_to_iio_dev(dev)); - return i2c_smbus_write_byte(st->client, MS5611_RESET); } -static int ms5611_i2c_read_prom_word(struct device *dev, int index, u16 *word) +static int ms5611_i2c_read_prom_word(struct ms5611_state *st, int index, + u16 *word) { int ret; - struct ms5611_state *st = iio_priv(dev_to_iio_dev(dev)); ret = i2c_smbus_read_word_swapped(st->client, MS5611_READ_PROM_WORD + (index << 1)); @@ -57,11 +55,10 @@ static int ms5611_i2c_read_adc(struct ms5611_state *st, s32 *val) return 0; } -static int ms5611_i2c_read_adc_temp_and_pressure(struct device *dev, +static int ms5611_i2c_read_adc_temp_and_pressure(struct ms5611_state *st, s32 *temp, s32 *pressure) { int ret; - struct ms5611_state *st = iio_priv(dev_to_iio_dev(dev)); const struct ms5611_osr *osr = st->temp_osr; ret = i2c_smbus_write_byte(st->client, osr->cmd); diff --git a/drivers/iio/pressure/ms5611_spi.c b/drivers/iio/pressure/ms5611_spi.c index f7743ee3318f..3039fe8aa2a2 100644 --- a/drivers/iio/pressure/ms5611_spi.c +++ b/drivers/iio/pressure/ms5611_spi.c @@ -15,18 +15,17 @@ #include "ms5611.h" -static int ms5611_spi_reset(struct device *dev) +static int ms5611_spi_reset(struct ms5611_state *st) { u8 cmd = MS5611_RESET; - struct ms5611_state *st = iio_priv(dev_to_iio_dev(dev)); return spi_write_then_read(st->client, &cmd, 1, NULL, 0); } -static int ms5611_spi_read_prom_word(struct device *dev, int index, u16 *word) +static int ms5611_spi_read_prom_word(struct ms5611_state *st, int index, + u16 *word) { int ret; - struct ms5611_state *st = iio_priv(dev_to_iio_dev(dev)); ret = spi_w8r16be(st->client, MS5611_READ_PROM_WORD + (index << 1)); if (ret < 0) @@ -37,11 +36,10 @@ static int ms5611_spi_read_prom_word(struct device *dev, int index, u16 *word) return 0; } -static int ms5611_spi_read_adc(struct device *dev, s32 *val) +static int ms5611_spi_read_adc(struct ms5611_state *st, s32 *val) { int ret; u8 buf[3] = { MS5611_READ_ADC }; - struct ms5611_state *st = iio_priv(dev_to_iio_dev(dev)); ret = spi_write_then_read(st->client, buf, 1, buf, 3); if (ret < 0) @@ -52,11 +50,10 @@ static int ms5611_spi_read_adc(struct device *dev, s32 *val) return 0; } -static int ms5611_spi_read_adc_temp_and_pressure(struct device *dev, +static int ms5611_spi_read_adc_temp_and_pressure(struct ms5611_state *st, s32 *temp, s32 *pressure) { int ret; - struct ms5611_state *st = iio_priv(dev_to_iio_dev(dev)); const struct ms5611_osr *osr = st->temp_osr; /* @@ -68,7 +65,7 @@ static int ms5611_spi_read_adc_temp_and_pressure(struct device *dev, return ret; usleep_range(osr->conv_usec, osr->conv_usec + (osr->conv_usec / 10UL)); - ret = ms5611_spi_read_adc(dev, temp); + ret = ms5611_spi_read_adc(st, temp); if (ret < 0) return ret; @@ -78,7 +75,7 @@ static int ms5611_spi_read_adc_temp_and_pressure(struct device *dev, return ret; usleep_range(osr->conv_usec, osr->conv_usec + (osr->conv_usec / 10UL)); - return ms5611_spi_read_adc(dev, pressure); + return ms5611_spi_read_adc(st, pressure); } static int ms5611_spi_probe(struct spi_device *spi) From 49c71b68141edd9e938b20955052ec2a308ed8cd Mon Sep 17 00:00:00 2001 From: Mitja Spes Date: Fri, 21 Oct 2022 15:58:20 +0200 Subject: [PATCH 0943/1477] iio: pressure: ms5611: fixed value compensation bug [ Upstream commit 17f442e7e47579d3881fc4d47354eaef09302e6f ] When using multiple instances of this driver the compensation PROM was overwritten by the last initialized sensor. Now each sensor has own PROM storage. Signed-off-by: Mitja Spes Fixes: 9690d81a02dc ("iio: pressure: ms5611: add support for MS5607 temperature and pressure sensor") Link: https://lore.kernel.org/r/20221021135827.1444793-2-mitja@lxnav.com Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/pressure/ms5611.h | 12 +++---- drivers/iio/pressure/ms5611_core.c | 51 ++++++++++++++++-------------- 2 files changed, 31 insertions(+), 32 deletions(-) diff --git a/drivers/iio/pressure/ms5611.h b/drivers/iio/pressure/ms5611.h index 345f3902e3e3..5e2d2d4d87b5 100644 --- a/drivers/iio/pressure/ms5611.h +++ b/drivers/iio/pressure/ms5611.h @@ -25,13 +25,6 @@ enum { MS5607, }; -struct ms5611_chip_info { - u16 prom[MS5611_PROM_WORDS_NB]; - - int (*temp_and_pressure_compensate)(struct ms5611_chip_info *chip_info, - s32 *temp, s32 *pressure); -}; - /* * OverSampling Rate descriptor. * Warning: cmd MUST be kept aligned on a word boundary (see @@ -50,12 +43,15 @@ struct ms5611_state { const struct ms5611_osr *pressure_osr; const struct ms5611_osr *temp_osr; + u16 prom[MS5611_PROM_WORDS_NB]; + int (*reset)(struct ms5611_state *st); int (*read_prom_word)(struct ms5611_state *st, int index, u16 *word); int (*read_adc_temp_and_pressure)(struct ms5611_state *st, s32 *temp, s32 *pressure); - struct ms5611_chip_info *chip_info; + int (*compensate_temp_and_pressure)(struct ms5611_state *st, s32 *temp, + s32 *pressure); struct regulator *vdd; }; diff --git a/drivers/iio/pressure/ms5611_core.c b/drivers/iio/pressure/ms5611_core.c index 885ccb7914dc..874a73b3ea9d 100644 --- a/drivers/iio/pressure/ms5611_core.c +++ b/drivers/iio/pressure/ms5611_core.c @@ -85,7 +85,7 @@ static int ms5611_read_prom(struct iio_dev *indio_dev) struct ms5611_state *st = iio_priv(indio_dev); for (i = 0; i < MS5611_PROM_WORDS_NB; i++) { - ret = st->read_prom_word(st, i, &st->chip_info->prom[i]); + ret = st->read_prom_word(st, i, &st->prom[i]); if (ret < 0) { dev_err(&indio_dev->dev, "failed to read prom at %d\n", i); @@ -93,7 +93,7 @@ static int ms5611_read_prom(struct iio_dev *indio_dev) } } - if (!ms5611_prom_is_valid(st->chip_info->prom, MS5611_PROM_WORDS_NB)) { + if (!ms5611_prom_is_valid(st->prom, MS5611_PROM_WORDS_NB)) { dev_err(&indio_dev->dev, "PROM integrity check failed\n"); return -ENODEV; } @@ -114,21 +114,20 @@ static int ms5611_read_temp_and_pressure(struct iio_dev *indio_dev, return ret; } - return st->chip_info->temp_and_pressure_compensate(st->chip_info, - temp, pressure); + return st->compensate_temp_and_pressure(st, temp, pressure); } -static int ms5611_temp_and_pressure_compensate(struct ms5611_chip_info *chip_info, +static int ms5611_temp_and_pressure_compensate(struct ms5611_state *st, s32 *temp, s32 *pressure) { s32 t = *temp, p = *pressure; s64 off, sens, dt; - dt = t - (chip_info->prom[5] << 8); - off = ((s64)chip_info->prom[2] << 16) + ((chip_info->prom[4] * dt) >> 7); - sens = ((s64)chip_info->prom[1] << 15) + ((chip_info->prom[3] * dt) >> 8); + dt = t - (st->prom[5] << 8); + off = ((s64)st->prom[2] << 16) + ((st->prom[4] * dt) >> 7); + sens = ((s64)st->prom[1] << 15) + ((st->prom[3] * dt) >> 8); - t = 2000 + ((chip_info->prom[6] * dt) >> 23); + t = 2000 + ((st->prom[6] * dt) >> 23); if (t < 2000) { s64 off2, sens2, t2; @@ -154,17 +153,17 @@ static int ms5611_temp_and_pressure_compensate(struct ms5611_chip_info *chip_inf return 0; } -static int ms5607_temp_and_pressure_compensate(struct ms5611_chip_info *chip_info, +static int ms5607_temp_and_pressure_compensate(struct ms5611_state *st, s32 *temp, s32 *pressure) { s32 t = *temp, p = *pressure; s64 off, sens, dt; - dt = t - (chip_info->prom[5] << 8); - off = ((s64)chip_info->prom[2] << 17) + ((chip_info->prom[4] * dt) >> 6); - sens = ((s64)chip_info->prom[1] << 16) + ((chip_info->prom[3] * dt) >> 7); + dt = t - (st->prom[5] << 8); + off = ((s64)st->prom[2] << 17) + ((st->prom[4] * dt) >> 6); + sens = ((s64)st->prom[1] << 16) + ((st->prom[3] * dt) >> 7); - t = 2000 + ((chip_info->prom[6] * dt) >> 23); + t = 2000 + ((st->prom[6] * dt) >> 23); if (t < 2000) { s64 off2, sens2, t2, tmp; @@ -342,15 +341,6 @@ static int ms5611_write_raw(struct iio_dev *indio_dev, static const unsigned long ms5611_scan_masks[] = {0x3, 0}; -static struct ms5611_chip_info chip_info_tbl[] = { - [MS5611] = { - .temp_and_pressure_compensate = ms5611_temp_and_pressure_compensate, - }, - [MS5607] = { - .temp_and_pressure_compensate = ms5607_temp_and_pressure_compensate, - } -}; - static const struct iio_chan_spec ms5611_channels[] = { { .type = IIO_PRESSURE, @@ -433,7 +423,20 @@ int ms5611_probe(struct iio_dev *indio_dev, struct device *dev, struct ms5611_state *st = iio_priv(indio_dev); mutex_init(&st->lock); - st->chip_info = &chip_info_tbl[type]; + + switch (type) { + case MS5611: + st->compensate_temp_and_pressure = + ms5611_temp_and_pressure_compensate; + break; + case MS5607: + st->compensate_temp_and_pressure = + ms5607_temp_and_pressure_compensate; + break; + default: + return -EINVAL; + } + st->temp_osr = &ms5611_avail_temp_osr[ARRAY_SIZE(ms5611_avail_temp_osr) - 1]; st->pressure_osr = From d43219bb33d597075eb36984e6fe263b41a40f11 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Sat, 19 Feb 2022 14:28:33 +0800 Subject: [PATCH 0944/1477] ceph: do not update snapshot context when there is no new snapshot [ Upstream commit 2e586641c950e7f3e7e008404bd783a466b9b590 ] We will only track the uppest parent snapshot realm from which we need to rebuild the snapshot contexts _downward_ in hierarchy. For all the others having no new snapshot we will do nothing. This fix will avoid calling ceph_queue_cap_snap() on some inodes inappropriately. For example, with the code in mainline, suppose there are 2 directory hierarchies (with 6 directories total), like this: /dir_X1/dir_X2/dir_X3/ /dir_Y1/dir_Y2/dir_Y3/ Firstly, make a snapshot under /dir_X1/dir_X2/.snap/snap_X2, then make a root snapshot under /.snap/root_snap. Every time we make snapshots under /dir_Y1/..., the kclient will always try to rebuild the snap context for snap_X2 realm and finally will always try to queue cap snaps for dir_Y2 and dir_Y3, which makes no sense. That's because the snap_X2's seq is 2 and root_snap's seq is 3. So when creating a new snapshot under /dir_Y1/... the new seq will be 4, and the mds will send the kclient a snapshot backtrace in _downward_ order: seqs 4, 3. When ceph_update_snap_trace() is called, it will always rebuild the from the last realm, that's the root_snap. So later when rebuilding the snap context, the current logic will always cause it to rebuild the snap_X2 realm and then try to queue cap snaps for all the inodes related in that realm, even though it's not necessary. This is accompanied by a lot of these sorts of dout messages: "ceph: queue_cap_snap 00000000a42b796b nothing dirty|writing" Fix the logic to avoid this situation. Also, the 'invalidate' word is not precise here. In actuality, it will cause a rebuild of the existing snapshot contexts or just build non-existent ones. Rename it to 'rebuild_snapcs'. URL: https://tracker.ceph.com/issues/44100 Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov Stable-dep-of: 51884d153f7e ("ceph: avoid putting the realm twice when decoding snaps fails") Signed-off-by: Sasha Levin --- fs/ceph/snap.c | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 0369f672a76f..e779f0e2bdb8 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -699,7 +699,8 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc, __le64 *prior_parent_snaps; /* encoded */ struct ceph_snap_realm *realm = NULL; struct ceph_snap_realm *first_realm = NULL; - int invalidate = 0; + struct ceph_snap_realm *realm_to_rebuild = NULL; + int rebuild_snapcs; int err = -ENOMEM; LIST_HEAD(dirty_realms); @@ -707,6 +708,7 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc, dout("update_snap_trace deletion=%d\n", deletion); more: + rebuild_snapcs = 0; ceph_decode_need(&p, e, sizeof(*ri), bad); ri = p; p += sizeof(*ri); @@ -730,7 +732,7 @@ more: err = adjust_snap_realm_parent(mdsc, realm, le64_to_cpu(ri->parent)); if (err < 0) goto fail; - invalidate += err; + rebuild_snapcs += err; if (le64_to_cpu(ri->seq) > realm->seq) { dout("update_snap_trace updating %llx %p %lld -> %lld\n", @@ -755,22 +757,30 @@ more: if (realm->seq > mdsc->last_snap_seq) mdsc->last_snap_seq = realm->seq; - invalidate = 1; + rebuild_snapcs = 1; } else if (!realm->cached_context) { dout("update_snap_trace %llx %p seq %lld new\n", realm->ino, realm, realm->seq); - invalidate = 1; + rebuild_snapcs = 1; } else { dout("update_snap_trace %llx %p seq %lld unchanged\n", realm->ino, realm, realm->seq); } - dout("done with %llx %p, invalidated=%d, %p %p\n", realm->ino, - realm, invalidate, p, e); + dout("done with %llx %p, rebuild_snapcs=%d, %p %p\n", realm->ino, + realm, rebuild_snapcs, p, e); - /* invalidate when we reach the _end_ (root) of the trace */ - if (invalidate && p >= e) - rebuild_snap_realms(realm, &dirty_realms); + /* + * this will always track the uppest parent realm from which + * we need to rebuild the snapshot contexts _downward_ in + * hierarchy. + */ + if (rebuild_snapcs) + realm_to_rebuild = realm; + + /* rebuild_snapcs when we reach the _end_ (root) of the trace */ + if (realm_to_rebuild && p >= e) + rebuild_snap_realms(realm_to_rebuild, &dirty_realms); if (!first_realm) first_realm = realm; From 044bc6d3c2c0e9090b0841e7b723875756534b45 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Wed, 9 Nov 2022 11:00:39 +0800 Subject: [PATCH 0945/1477] ceph: avoid putting the realm twice when decoding snaps fails [ Upstream commit 51884d153f7ec85e18d607b2467820a90e0f4359 ] When decoding the snaps fails it maybe leaving the 'first_realm' and 'realm' pointing to the same snaprealm memory. And then it'll put it twice and could cause random use-after-free, BUG_ON, etc issues. Cc: stable@vger.kernel.org Link: https://tracker.ceph.com/issues/57686 Signed-off-by: Xiubo Li Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov Signed-off-by: Sasha Levin --- fs/ceph/snap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index e779f0e2bdb8..734873be56a7 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -697,7 +697,7 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc, struct ceph_mds_snap_realm *ri; /* encoded */ __le64 *snaps; /* encoded */ __le64 *prior_parent_snaps; /* encoded */ - struct ceph_snap_realm *realm = NULL; + struct ceph_snap_realm *realm; struct ceph_snap_realm *first_realm = NULL; struct ceph_snap_realm *realm_to_rebuild = NULL; int rebuild_snapcs; @@ -708,6 +708,7 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc, dout("update_snap_trace deletion=%d\n", deletion); more: + realm = NULL; rebuild_snapcs = 0; ceph_decode_need(&p, e, sizeof(*ri), bad); ri = p; From 2fcc593b5047183a2b0ed4e4c41ec1e911874911 Mon Sep 17 00:00:00 2001 From: taozhang Date: Sat, 15 Oct 2022 17:38:31 +0800 Subject: [PATCH 0946/1477] wifi: mac80211: fix memory free error when registering wiphy fail [ Upstream commit 50b2e8711462409cd368c41067405aa446dfa2af ] ieee80211_register_hw free the allocated cipher suites when registering wiphy fail, and ieee80211_free_hw will re-free it. set wiphy_ciphers_allocated to false after freeing allocated cipher suites. Signed-off-by: taozhang Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/main.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 73893025922f..ae90ac3be59a 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -1349,8 +1349,10 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) ieee80211_led_exit(local); destroy_workqueue(local->workqueue); fail_workqueue: - if (local->wiphy_ciphers_allocated) + if (local->wiphy_ciphers_allocated) { kfree(local->hw.wiphy->cipher_suites); + local->wiphy_ciphers_allocated = false; + } kfree(local->int_scan_req); return result; } @@ -1420,8 +1422,10 @@ void ieee80211_free_hw(struct ieee80211_hw *hw) mutex_destroy(&local->iflist_mtx); mutex_destroy(&local->mtx); - if (local->wiphy_ciphers_allocated) + if (local->wiphy_ciphers_allocated) { kfree(local->hw.wiphy->cipher_suites); + local->wiphy_ciphers_allocated = false; + } idr_for_each(&local->ack_status_frames, ieee80211_free_ack_frame, NULL); From 92ae6facd12981253c16eb9f2773472c0052f0a2 Mon Sep 17 00:00:00 2001 From: Jonas Jelonek Date: Fri, 14 Oct 2022 16:54:39 +0200 Subject: [PATCH 0947/1477] wifi: mac80211_hwsim: fix debugfs attribute ps with rc table support [ Upstream commit 69188df5f6e4cecc6b76b958979ba363cd5240e8 ] Fixes a warning that occurs when rc table support is enabled (IEEE80211_HW_SUPPORTS_RC_TABLE) in mac80211_hwsim and the PS mode is changed via the exported debugfs attribute. When the PS mode is changed, a packet is broadcasted via hwsim_send_nullfunc by creating and transmitting a plain skb with only header initialized. The ieee80211 rate array in the control buffer is zero-initialized. When ratetbl support is enabled, ieee80211_get_tx_rates is called for the skb with sta parameter set to NULL and thus no ratetbl can be used. The final rate array then looks like [-1,0; 0,0; 0,0; 0,0] which causes the warning in ieee80211_get_tx_rate. The issue is fixed by setting the count of the first rate with idx '0' to 1 and hence ieee80211_get_tx_rates won't overwrite it with idx '-1'. Signed-off-by: Jonas Jelonek Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- drivers/net/wireless/mac80211_hwsim.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index a6d4ff4760ad..255286b2324e 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -775,6 +775,7 @@ static void hwsim_send_nullfunc(struct mac80211_hwsim_data *data, u8 *mac, struct hwsim_vif_priv *vp = (void *)vif->drv_priv; struct sk_buff *skb; struct ieee80211_hdr *hdr; + struct ieee80211_tx_info *cb; if (!vp->assoc) return; @@ -796,6 +797,10 @@ static void hwsim_send_nullfunc(struct mac80211_hwsim_data *data, u8 *mac, memcpy(hdr->addr2, mac, ETH_ALEN); memcpy(hdr->addr3, vp->bssid, ETH_ALEN); + cb = IEEE80211_SKB_CB(skb); + cb->control.rates[0].count = 1; + cb->control.rates[1].idx = -1; + rcu_read_lock(); mac80211_hwsim_tx_frame(data->hw, skb, rcu_dereference(vif->chanctx_conf)->def.chan); From 1c9eb641d13e8dd8c301ead9c395dcc1104c419d Mon Sep 17 00:00:00 2001 From: Emil Renner Berthing Date: Wed, 12 Oct 2022 13:09:28 +0200 Subject: [PATCH 0948/1477] riscv: dts: sifive unleashed: Add PWM controlled LEDs [ Upstream commit 8bc8824d30193eb7755043d5bb65fa7f0d11a595 ] This adds the 4 PWM controlled green LEDs to the HiFive Unleashed device tree. The schematic doesn't specify any special function for the LEDs, so they're added here without any default triggers and named d1, d2, d3 and d4 just like in the schematic. Signed-off-by: Emil Renner Berthing Reviewed-by: Conor Dooley Tested-by: Conor Dooley Link: https://lore.kernel.org/r/20221012110928.352910-1-emil.renner.berthing@canonical.com Signed-off-by: Palmer Dabbelt Signed-off-by: Sasha Levin --- .../boot/dts/sifive/hifive-unleashed-a00.dts | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts index 60846e88ae4b..dddabfbbc7a9 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts @@ -3,6 +3,8 @@ #include "fu540-c000.dtsi" #include +#include +#include /* Clock frequency (in Hz) of the PCB crystal for rtcclk */ #define RTCCLK_FREQ 1000000 @@ -46,6 +48,42 @@ compatible = "gpio-restart"; gpios = <&gpio 10 GPIO_ACTIVE_LOW>; }; + + led-controller { + compatible = "pwm-leds"; + + led-d1 { + pwms = <&pwm0 0 7812500 PWM_POLARITY_INVERTED>; + active-low; + color = ; + max-brightness = <255>; + label = "d1"; + }; + + led-d2 { + pwms = <&pwm0 1 7812500 PWM_POLARITY_INVERTED>; + active-low; + color = ; + max-brightness = <255>; + label = "d2"; + }; + + led-d3 { + pwms = <&pwm0 2 7812500 PWM_POLARITY_INVERTED>; + active-low; + color = ; + max-brightness = <255>; + label = "d3"; + }; + + led-d4 { + pwms = <&pwm0 3 7812500 PWM_POLARITY_INVERTED>; + active-low; + color = ; + max-brightness = <255>; + label = "d4"; + }; + }; }; &uart0 { From 8552e6048ec9e7c904454eb35ccbd1f8572b8e7e Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Mon, 31 Oct 2022 10:10:21 +0800 Subject: [PATCH 0949/1477] audit: fix undefined behavior in bit shift for AUDIT_BIT [ Upstream commit 986d93f55bdeab1cac858d1e47b41fac10b2d7f6 ] Shifting signed 32-bit value by 31 bits is undefined, so changing significant bit to unsigned. The UBSAN warning calltrace like below: UBSAN: shift-out-of-bounds in kernel/auditfilter.c:179:23 left shift of 1 by 31 places cannot be represented in type 'int' Call Trace: dump_stack_lvl+0x7d/0xa5 dump_stack+0x15/0x1b ubsan_epilogue+0xe/0x4e __ubsan_handle_shift_out_of_bounds+0x1e7/0x20c audit_register_class+0x9d/0x137 audit_classes_init+0x4d/0xb8 do_one_initcall+0x76/0x430 kernel_init_freeable+0x3b3/0x422 kernel_init+0x24/0x1e0 ret_from_fork+0x1f/0x30 Signed-off-by: Gaosheng Cui [PM: remove bad 'Fixes' tag as issue predates git, added in v2.6.6-rc1] Signed-off-by: Paul Moore Signed-off-by: Sasha Levin --- include/uapi/linux/audit.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index cd2d8279a5e4..cb4e8e6e86a9 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -182,7 +182,7 @@ #define AUDIT_MAX_KEY_LEN 256 #define AUDIT_BITMASK_SIZE 64 #define AUDIT_WORD(nr) ((__u32)((nr)/32)) -#define AUDIT_BIT(nr) (1 << ((nr) - AUDIT_WORD(nr)*32)) +#define AUDIT_BIT(nr) (1U << ((nr) - AUDIT_WORD(nr)*32)) #define AUDIT_SYSCALL_CLASSES 16 #define AUDIT_CLASS_DIR_WRITE 0 From 8d39913158ad9e59642672ac03822848582c5767 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 24 Oct 2022 18:28:43 +0200 Subject: [PATCH 0950/1477] wifi: airo: do not assign -1 to unsigned char [ Upstream commit e6cb8769452e8236b52134e5cb4a18b8f5986932 ] With char becoming unsigned by default, and with `char` alone being ambiguous and based on architecture, we get a warning when assigning the unchecked output of hex_to_bin() to that unsigned char. Mark `key` as a `u8`, which matches the struct's type, and then check each call to hex_to_bin() before casting. Cc: Kalle Valo Cc: linux-wireless@vger.kernel.org Signed-off-by: Jason A. Donenfeld Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20221024162843.535921-1-Jason@zx2c4.com Signed-off-by: Sasha Levin --- drivers/net/wireless/cisco/airo.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/cisco/airo.c b/drivers/net/wireless/cisco/airo.c index 0569f37e9ed5..8c9c6bfbaeee 100644 --- a/drivers/net/wireless/cisco/airo.c +++ b/drivers/net/wireless/cisco/airo.c @@ -5236,7 +5236,7 @@ static int get_wep_tx_idx(struct airo_info *ai) return -1; } -static int set_wep_key(struct airo_info *ai, u16 index, const char *key, +static int set_wep_key(struct airo_info *ai, u16 index, const u8 *key, u16 keylen, int perm, int lock) { static const unsigned char macaddr[ETH_ALEN] = { 0x01, 0, 0, 0, 0, 0 }; @@ -5287,7 +5287,7 @@ static void proc_wepkey_on_close(struct inode *inode, struct file *file) struct net_device *dev = PDE_DATA(inode); struct airo_info *ai = dev->ml_priv; int i, rc; - char key[16]; + u8 key[16]; u16 index = 0; int j = 0; @@ -5315,12 +5315,22 @@ static void proc_wepkey_on_close(struct inode *inode, struct file *file) } for (i = 0; i < 16*3 && data->wbuffer[i+j]; i++) { + int val; + + if (i % 3 == 2) + continue; + + val = hex_to_bin(data->wbuffer[i+j]); + if (val < 0) { + airo_print_err(ai->dev->name, "WebKey passed invalid key hex"); + return; + } switch(i%3) { case 0: - key[i/3] = hex_to_bin(data->wbuffer[i+j])<<4; + key[i/3] = (u8)val << 4; break; case 1: - key[i/3] |= hex_to_bin(data->wbuffer[i+j]); + key[i/3] |= (u8)val; break; } } From 2c1ca23555ed63ddca59a06214a96ab2bd417c48 Mon Sep 17 00:00:00 2001 From: Nicolas Cavallari Date: Thu, 27 Oct 2022 16:01:33 +0200 Subject: [PATCH 0951/1477] wifi: mac80211: Fix ack frame idr leak when mesh has no route [ Upstream commit 39e7b5de9853bd92ddbfa4b14165babacd7da0ba ] When trying to transmit an data frame with tx_status to a destination that have no route in the mesh, then it is dropped without recrediting the ack_status_frames idr. Once it is exhausted, wpa_supplicant starts failing to do SAE with NL80211_CMD_FRAME and logs "nl80211: Frame command failed". Use ieee80211_free_txskb() instead of kfree_skb() to fix it. Signed-off-by: Nicolas Cavallari Link: https://lore.kernel.org/r/20221027140133.1504-1-nicolas.cavallari@green-communications.fr Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/mesh_pathtbl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 870c8eafef92..c2b051e0610a 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -718,7 +718,7 @@ int mesh_path_send_to_gates(struct mesh_path *mpath) void mesh_path_discard_frame(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { - kfree_skb(skb); + ieee80211_free_txskb(&sdata->local->hw, skb); sdata->u.mesh.mshstats.dropped_frames_no_route++; } From 361a1650989855ccda14f63c1b2e0b0f1f5a7ded Mon Sep 17 00:00:00 2001 From: Sean Nyekjaer Date: Thu, 3 Nov 2022 09:00:42 +0100 Subject: [PATCH 0952/1477] spi: stm32: fix stm32_spi_prepare_mbr() that halves spi clk for every run [ Upstream commit 62aa1a344b0904549f6de7af958e8a1136fd5228 ] When this driver is used with a driver that uses preallocated spi_transfer structs. The speed_hz is halved by every run. This results in: spi_stm32 44004000.spi: SPI transfer setup failed ads7846 spi0.0: SPI transfer failed: -22 Example when running with DIV_ROUND_UP(): - First run; speed_hz = 1000000, spi->clk_rate 125000000 div 125 -> mbrdiv = 7, cur_speed = 976562 - Second run; speed_hz = 976562 div 128,00007 (roundup to 129) -> mbrdiv = 8, cur_speed = 488281 - Third run; speed_hz = 488281 div 256,000131072067109 (roundup to 257) and then -EINVAL is returned. Use DIV_ROUND_CLOSEST to allow to round down and allow us to keep the set speed. Signed-off-by: Sean Nyekjaer Link: https://lore.kernel.org/r/20221103080043.3033414-1-sean@geanix.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-stm32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c index 651a6510fb54..9ec37cf10c01 100644 --- a/drivers/spi/spi-stm32.c +++ b/drivers/spi/spi-stm32.c @@ -443,7 +443,7 @@ static int stm32_spi_prepare_mbr(struct stm32_spi *spi, u32 speed_hz, u32 div, mbrdiv; /* Ensure spi->clk_rate is even */ - div = DIV_ROUND_UP(spi->clk_rate & ~0x1, speed_hz); + div = DIV_ROUND_CLOSEST(spi->clk_rate & ~0x1, speed_hz); /* * SPI framework set xfer->speed_hz to master->max_speed_hz if From f4b8c0710ab672a163bbc205978dddf2c556e548 Mon Sep 17 00:00:00 2001 From: Youlin Li Date: Thu, 3 Nov 2022 17:34:40 +0800 Subject: [PATCH 0953/1477] selftests/bpf: Add verifier test for release_reference() [ Upstream commit 475244f5e06beeda7b557d9dde46a5f439bf3379 ] Add a test case to ensure that released pointer registers will not be leaked into the map. Before fix: ./test_verifier 984 984/u reference tracking: try to leak released ptr reg FAIL Unexpected success to load! verification time 67 usec stack depth 4 processed 23 insns (limit 1000000) max_states_per_insn 0 total_states 2 peak_states 2 mark_read 1 984/p reference tracking: try to leak released ptr reg OK Summary: 1 PASSED, 0 SKIPPED, 1 FAILED After fix: ./test_verifier 984 984/u reference tracking: try to leak released ptr reg OK 984/p reference tracking: try to leak released ptr reg OK Summary: 2 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: Youlin Li Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20221103093440.3161-2-liulin063@gmail.com Signed-off-by: Sasha Levin --- .../selftests/bpf/verifier/ref_tracking.c | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c index 006b5bd99c08..525d810b10b8 100644 --- a/tools/testing/selftests/bpf/verifier/ref_tracking.c +++ b/tools/testing/selftests/bpf/verifier/ref_tracking.c @@ -901,3 +901,39 @@ .result_unpriv = REJECT, .errstr_unpriv = "unknown func", }, +{ + "reference tracking: try to leak released ptr reg", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_9, BPF_REG_0), + + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_2, 8), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_ringbuf_reserve), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_EMIT_CALL(BPF_FUNC_ringbuf_discard), + BPF_MOV64_IMM(BPF_REG_0, 0), + + BPF_STX_MEM(BPF_DW, BPF_REG_9, BPF_REG_8, 0), + BPF_EXIT_INSN() + }, + .fixup_map_array_48b = { 4 }, + .fixup_map_ringbuf = { 11 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R8 !read_ok" +}, From f54a11b6bf82648577af1b66e5896e80a42caafe Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 2 Nov 2022 22:33:12 +0100 Subject: [PATCH 0954/1477] Revert "net: macsec: report real_dev features when HW offloading is enabled" [ Upstream commit 8bcd560ae8784da57c610d857118c5d6576b1a8f ] This reverts commit c850240b6c4132574a00f2da439277ab94265b66. That commit tried to improve the performance of macsec offload by taking advantage of some of the NIC's features, but in doing so, broke macsec offload when the lower device supports both macsec and ipsec offload, as the ipsec offload feature flags (mainly NETIF_F_HW_ESP) were copied from the real device. Since the macsec device doesn't provide xdo_* ops, the XFRM core rejects the registration of the new macsec device in xfrm_api_check. Example perf trace when running ip link add link eni1np1 type macsec port 4 offload mac ip 737 [003] 795.477676: probe:xfrm_dev_event__REGISTER name="macsec0" features=0x1c000080014869 xfrm_dev_event+0x3a notifier_call_chain+0x47 register_netdevice+0x846 macsec_newlink+0x25a ip 737 [003] 795.477687: probe:xfrm_dev_event__return ret=0x8002 (NOTIFY_BAD) notifier_call_chain+0x47 register_netdevice+0x846 macsec_newlink+0x25a dev->features includes NETIF_F_HW_ESP (0x04000000000000), so xfrm_api_check returns NOTIFY_BAD because we don't have dev->xfrmdev_ops on the macsec device. We could probably propagate GSO and a few other features from the lower device, similar to macvlan. This will be done in a future patch. Signed-off-by: Sabrina Dubroca Reviewed-by: Antoine Tenart Reviewed-by: Leon Romanovsky Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/macsec.c | 27 ++++----------------------- 1 file changed, 4 insertions(+), 23 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index f84e3cc0d3ec..c20ebf44acfe 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -2648,11 +2648,6 @@ static int macsec_upd_offload(struct sk_buff *skb, struct genl_info *info) if (ret) goto rollback; - /* Force features update, since they are different for SW MACSec and - * HW offloading cases. - */ - netdev_update_features(dev); - rtnl_unlock(); return 0; @@ -3420,16 +3415,9 @@ static netdev_tx_t macsec_start_xmit(struct sk_buff *skb, return ret; } -#define SW_MACSEC_FEATURES \ +#define MACSEC_FEATURES \ (NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST) -/* If h/w offloading is enabled, use real device features save for - * VLAN_FEATURES - they require additional ops - * HW_MACSEC - no reason to report it - */ -#define REAL_DEV_FEATURES(dev) \ - ((dev)->features & ~(NETIF_F_VLAN_FEATURES | NETIF_F_HW_MACSEC)) - static int macsec_dev_init(struct net_device *dev) { struct macsec_dev *macsec = macsec_priv(dev); @@ -3446,12 +3434,8 @@ static int macsec_dev_init(struct net_device *dev) return err; } - if (macsec_is_offloaded(macsec)) { - dev->features = REAL_DEV_FEATURES(real_dev); - } else { - dev->features = real_dev->features & SW_MACSEC_FEATURES; - dev->features |= NETIF_F_LLTX | NETIF_F_GSO_SOFTWARE; - } + dev->features = real_dev->features & MACSEC_FEATURES; + dev->features |= NETIF_F_LLTX | NETIF_F_GSO_SOFTWARE; dev->needed_headroom = real_dev->needed_headroom + MACSEC_NEEDED_HEADROOM; @@ -3480,10 +3464,7 @@ static netdev_features_t macsec_fix_features(struct net_device *dev, struct macsec_dev *macsec = macsec_priv(dev); struct net_device *real_dev = macsec->real_dev; - if (macsec_is_offloaded(macsec)) - return REAL_DEV_FEATURES(real_dev); - - features &= (real_dev->features & SW_MACSEC_FEATURES) | + features &= (real_dev->features & MACSEC_FEATURES) | NETIF_F_GSO_SOFTWARE | NETIF_F_SOFT_FEATURES; features |= NETIF_F_LLTX; From 7fc62181c1d433b46409e97b2a3c0c954c5bfbad Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 25 Oct 2022 16:11:31 +0200 Subject: [PATCH 0955/1477] platform/x86: touchscreen_dmi: Add info for the RCA Cambio W101 v2 2-in-1 [ Upstream commit 0df044b34bf33e7e35c32b3bf6747fde6279c162 ] Add touchscreen info for the RCA Cambio W101 v2 2-in-1. Link: https://github.com/onitake/gsl-firmware/discussions/193 Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20221025141131.509211-1-hdegoede@redhat.com Signed-off-by: Sasha Levin --- drivers/platform/x86/touchscreen_dmi.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c index ab6a9369649d..110ff1e6ef81 100644 --- a/drivers/platform/x86/touchscreen_dmi.c +++ b/drivers/platform/x86/touchscreen_dmi.c @@ -756,6 +756,22 @@ static const struct ts_dmi_data predia_basic_data = { .properties = predia_basic_props, }; +static const struct property_entry rca_cambio_w101_v2_props[] = { + PROPERTY_ENTRY_U32("touchscreen-min-x", 4), + PROPERTY_ENTRY_U32("touchscreen-min-y", 20), + PROPERTY_ENTRY_U32("touchscreen-size-x", 1644), + PROPERTY_ENTRY_U32("touchscreen-size-y", 874), + PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"), + PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-rca-cambio-w101-v2.fw"), + PROPERTY_ENTRY_U32("silead,max-fingers", 10), + { } +}; + +static const struct ts_dmi_data rca_cambio_w101_v2_data = { + .acpi_name = "MSSL1680:00", + .properties = rca_cambio_w101_v2_props, +}; + static const struct property_entry rwc_nanote_p8_props[] = { PROPERTY_ENTRY_U32("touchscreen-min-y", 46), PROPERTY_ENTRY_U32("touchscreen-size-x", 1728), @@ -1341,6 +1357,15 @@ const struct dmi_system_id touchscreen_dmi_table[] = { DMI_EXACT_MATCH(DMI_BOARD_NAME, "0E57"), }, }, + { + /* RCA Cambio W101 v2 */ + /* https://github.com/onitake/gsl-firmware/discussions/193 */ + .driver_data = (void *)&rca_cambio_w101_v2_data, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "RCA"), + DMI_MATCH(DMI_PRODUCT_NAME, "W101SA23T1"), + }, + }, { /* RWC NANOTE P8 */ .driver_data = (void *)&rwc_nanote_p8_data, From 2574903ee260df5281ffc8edea10f8278548abf7 Mon Sep 17 00:00:00 2001 From: Brian King Date: Wed, 26 Oct 2022 13:13:56 -0500 Subject: [PATCH 0956/1477] scsi: ibmvfc: Avoid path failures during live migration [ Upstream commit 62fa3ce05d5d73c5eccc40b2db493f55fecfc446 ] Fix an issue reported when performing a live migration when multipath is configured with a short fast fail timeout of 5 seconds and also to have no_path_retry set to fail. In this scenario, all paths would go into the devloss state while the ibmvfc driver went through discovery to log back in. On a loaded system, the discovery might take longer than 5 seconds, which was resulting in all paths being marked failed, which then resulted in a read only filesystem. This patch changes the migration code in ibmvfc to avoid deleting rports at all in this scenario, so we avoid losing all paths. Signed-off-by: Brian King Link: https://lore.kernel.org/r/20221026181356.148517-1-brking@linux.vnet.ibm.com Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/ibmvscsi/ibmvfc.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index f6d6539c657f..b793e342ab7c 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -635,8 +635,13 @@ static void ibmvfc_init_host(struct ibmvfc_host *vhost) memset(vhost->async_crq.msgs, 0, PAGE_SIZE); vhost->async_crq.cur = 0; - list_for_each_entry(tgt, &vhost->targets, queue) - ibmvfc_del_tgt(tgt); + list_for_each_entry(tgt, &vhost->targets, queue) { + if (vhost->client_migrated) + tgt->need_login = 1; + else + ibmvfc_del_tgt(tgt); + } + scsi_block_requests(vhost->host); ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_INIT); vhost->job_step = ibmvfc_npiv_login; @@ -2822,9 +2827,12 @@ static void ibmvfc_handle_crq(struct ibmvfc_crq *crq, struct ibmvfc_host *vhost) /* We need to re-setup the interpartition connection */ dev_info(vhost->dev, "Partition migrated, Re-enabling adapter\n"); vhost->client_migrated = 1; + + scsi_block_requests(vhost->host); ibmvfc_purge_requests(vhost, DID_REQUEUE); - ibmvfc_link_down(vhost, IBMVFC_LINK_DOWN); + ibmvfc_set_host_state(vhost, IBMVFC_LINK_DOWN); ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_REENABLE); + wake_up(&vhost->work_wait_q); } else if (crq->format == IBMVFC_PARTNER_FAILED || crq->format == IBMVFC_PARTNER_DEREGISTER) { dev_err(vhost->dev, "Host partner adapter deregistered or failed (rc=%d)\n", crq->format); ibmvfc_purge_requests(vhost, DID_ERROR); From f7ce6fb04e0479528ca0116924f9aaa8ce4f7840 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 2 Nov 2022 12:32:48 -0700 Subject: [PATCH 0957/1477] scsi: scsi_debug: Make the READ CAPACITY response compliant with ZBC [ Upstream commit ecb8c2580d37dbb641451049376d80c8afaa387f ] From ZBC-1: - RC BASIS = 0: The RETURNED LOGICAL BLOCK ADDRESS field indicates the highest LBA of a contiguous range of zones that are not sequential write required zones starting with the first zone. - RC BASIS = 1: The RETURNED LOGICAL BLOCK ADDRESS field indicates the LBA of the last logical block on the logical unit. The current scsi_debug READ CAPACITY response does not comply with the above if there are one or more sequential write required zones. SCSI initiators need a way to retrieve the largest valid LBA from SCSI devices. Reporting the largest valid LBA if there are one or more sequential zones requires to set the RC BASIS field in the READ CAPACITY response to one. Hence this patch. Cc: Douglas Gilbert Cc: Damien Le Moal Suggested-by: Damien Le Moal Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20221102193248.3177608-1-bvanassche@acm.org Reviewed-by: Damien Le Moal Acked-by: Douglas Gilbert Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/scsi_debug.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 261b915835b4..cc20621bb49d 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -1878,6 +1878,13 @@ static int resp_readcap16(struct scsi_cmnd *scp, arr[14] |= 0x40; } + /* + * Since the scsi_debug READ CAPACITY implementation always reports the + * total disk capacity, set RC BASIS = 1 for host-managed ZBC devices. + */ + if (devip->zmodel == BLK_ZONED_HM) + arr[12] |= 1 << 4; + arr[15] = sdebug_lowest_aligned & 0xff; if (have_dif_prot) { From d29bde868945ba04f0d01ea7cb542fdbdde59fd4 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 6 Nov 2022 22:50:52 +0100 Subject: [PATCH 0958/1477] drm: panel-orientation-quirks: Add quirk for Acer Switch V 10 (SW5-017) [ Upstream commit 653f2d94fcda200b02bd79cea2e0307b26c1b747 ] Like the Acer Switch One 10 S1003, for which there already is a quirk, the Acer Switch V 10 (SW5-017) has a 800x1280 portrait screen mounted in the tablet part of a landscape oriented 2-in-1. Add a quirk for this. Cc: Rudolf Polzer Signed-off-by: Hans de Goede Acked-by: Simon Ser Link: https://patchwork.freedesktop.org/patch/msgid/20221106215052.66995-1-hdegoede@redhat.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_panel_orientation_quirks.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index 083273736c83..ca0fefeaab20 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -128,6 +128,12 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "One S1003"), }, .driver_data = (void *)&lcd800x1280_rightside_up, + }, { /* Acer Switch V 10 (SW5-017) */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "SW5-017"), + }, + .driver_data = (void *)&lcd800x1280_rightside_up, }, { /* Anbernic Win600 */ .matches = { DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Anbernic"), From fa5f2c72d39f530740b634b4574df78fba919930 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Tue, 8 Nov 2022 18:34:34 +0800 Subject: [PATCH 0959/1477] block, bfq: fix null pointer dereference in bfq_bio_bfqg() [ Upstream commit f02be9002c480cd3ec0fcf184ad27cf531bd6ece ] Out test found a following problem in kernel 5.10, and the same problem should exist in mainline: BUG: kernel NULL pointer dereference, address: 0000000000000094 PGD 0 P4D 0 Oops: 0000 [#1] SMP CPU: 7 PID: 155 Comm: kworker/7:1 Not tainted 5.10.0-01932-g19e0ace2ca1d-dirty 4 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20190727_073836-b4 Workqueue: kthrotld blk_throtl_dispatch_work_fn RIP: 0010:bfq_bio_bfqg+0x52/0xc0 Code: 94 00 00 00 00 75 2e 48 8b 40 30 48 83 05 35 06 c8 0b 01 48 85 c0 74 3d 4b RSP: 0018:ffffc90001a1fba0 EFLAGS: 00010002 RAX: ffff888100d60400 RBX: ffff8881132e7000 RCX: 0000000000000000 RDX: 0000000000000017 RSI: ffff888103580a18 RDI: ffff888103580a18 RBP: ffff8881132e7000 R08: 0000000000000000 R09: ffffc90001a1fe10 R10: 0000000000000a20 R11: 0000000000034320 R12: 0000000000000000 R13: ffff888103580a18 R14: ffff888114447000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88881fdc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000094 CR3: 0000000100cdb000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: bfq_bic_update_cgroup+0x3c/0x350 ? ioc_create_icq+0x42/0x270 bfq_init_rq+0xfd/0x1060 bfq_insert_requests+0x20f/0x1cc0 ? ioc_create_icq+0x122/0x270 blk_mq_sched_insert_requests+0x86/0x1d0 blk_mq_flush_plug_list+0x193/0x2a0 blk_flush_plug_list+0x127/0x170 blk_finish_plug+0x31/0x50 blk_throtl_dispatch_work_fn+0x151/0x190 process_one_work+0x27c/0x5f0 worker_thread+0x28b/0x6b0 ? rescuer_thread+0x590/0x590 kthread+0x153/0x1b0 ? kthread_flush_work+0x170/0x170 ret_from_fork+0x1f/0x30 Modules linked in: CR2: 0000000000000094 ---[ end trace e2e59ac014314547 ]--- RIP: 0010:bfq_bio_bfqg+0x52/0xc0 Code: 94 00 00 00 00 75 2e 48 8b 40 30 48 83 05 35 06 c8 0b 01 48 85 c0 74 3d 4b RSP: 0018:ffffc90001a1fba0 EFLAGS: 00010002 RAX: ffff888100d60400 RBX: ffff8881132e7000 RCX: 0000000000000000 RDX: 0000000000000017 RSI: ffff888103580a18 RDI: ffff888103580a18 RBP: ffff8881132e7000 R08: 0000000000000000 R09: ffffc90001a1fe10 R10: 0000000000000a20 R11: 0000000000034320 R12: 0000000000000000 R13: ffff888103580a18 R14: ffff888114447000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88881fdc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000094 CR3: 0000000100cdb000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Root cause is quite complex: 1) use bfq elevator for the test device. 2) create a cgroup CG 3) config blk throtl in CG blkg_conf_prep blkg_create 4) create a thread T1 and issue async io in CG: bio_init bio_associate_blkg ... submit_bio submit_bio_noacct blk_throtl_bio -> io is throttled // io submit is done 5) switch elevator: bfq_exit_queue blkcg_deactivate_policy list_for_each_entry(blkg, &q->blkg_list, q_node) blkg->pd[] = NULL // bfq policy is removed 5) thread t1 exist, then remove the cgroup CG: blkcg_unpin_online blkcg_destroy_blkgs blkg_destroy list_del_init(&blkg->q_node) // blkg is removed from queue list 6) switch elevator back to bfq bfq_init_queue bfq_create_group_hierarchy blkcg_activate_policy list_for_each_entry_reverse(blkg, &q->blkg_list) // blkg is removed from list, hence bfq policy is still NULL 7) throttled io is dispatched to bfq: bfq_insert_requests bfq_init_rq bfq_bic_update_cgroup bfq_bio_bfqg bfqg = blkg_to_bfqg(blkg) // bfqg is NULL because bfq policy is NULL The problem is only possible in bfq because only bfq can be deactivated and activated while queue is online, while others can only be deactivated while the device is removed. Fix the problem in bfq by checking if blkg is online before calling blkg_to_bfqg(). Signed-off-by: Yu Kuai Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20221108103434.2853269-1-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/bfq-cgroup.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index be6733558b83..badb90352bf3 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -611,6 +611,10 @@ struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio) struct bfq_group *bfqg; while (blkg) { + if (!blkg->online) { + blkg = blkg->parent; + continue; + } bfqg = blkg_to_bfqg(blkg); if (bfqg->online) { bio_associate_blkg_from_css(bio, &blkg->blkcg->css); From 81cc6d8400acae90bba53ed8fb556766ff60253f Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 31 Oct 2022 14:57:28 -0700 Subject: [PATCH 0960/1477] arm64/syscall: Include asm/ptrace.h in syscall_wrapper header. [ Upstream commit acfc35cfcee5df419391671ef1a631f43feee4e3 ] Add the same change for ARM64 as done in the commit 9440c4294160 ("x86/syscall: Include asm/ptrace.h in syscall_wrapper header") to make sure all syscalls see 'struct pt_regs' definition and resulted BTF for '__arm64_sys_*(struct pt_regs *regs)' functions point to actual struct. Without this patch, the BPF verifier refuses to load a tracing prog which accesses pt_regs. bpf(BPF_PROG_LOAD, {prog_type=0x1a, ...}, 128) = -1 EACCES With this patch, we can see the correct error, which saves us time in debugging the prog. bpf(BPF_PROG_LOAD, {prog_type=0x1a, ...}, 128) = 4 bpf(BPF_RAW_TRACEPOINT_OPEN, {raw_tracepoint={name=NULL, prog_fd=4}}, 128) = -1 ENOTSUPP Signed-off-by: Kuniyuki Iwashima Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20221031215728.50389-1-kuniyu@amazon.com Signed-off-by: Catalin Marinas Signed-off-by: Sasha Levin --- arch/arm64/include/asm/syscall_wrapper.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/syscall_wrapper.h b/arch/arm64/include/asm/syscall_wrapper.h index b383b4802a7b..d30217c21eff 100644 --- a/arch/arm64/include/asm/syscall_wrapper.h +++ b/arch/arm64/include/asm/syscall_wrapper.h @@ -8,7 +8,7 @@ #ifndef __ASM_SYSCALL_WRAPPER_H #define __ASM_SYSCALL_WRAPPER_H -struct pt_regs; +#include #define SC_ARM64_REGS_TO_ARGS(x, ...) \ __MAP(x,__SC_ARGS \ From c0bb600f076832764b43ae4ef6ee003d9a71f7f9 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 8 Nov 2022 10:13:23 -0700 Subject: [PATCH 0961/1477] RISC-V: vdso: Do not add missing symbols to version section in linker script [ Upstream commit fcae44fd36d052e956e69a64642fc03820968d78 ] Recently, ld.lld moved from '--undefined-version' to '--no-undefined-version' as the default, which breaks the compat vDSO build: ld.lld: error: version script assignment of 'LINUX_4.15' to symbol '__vdso_gettimeofday' failed: symbol not defined ld.lld: error: version script assignment of 'LINUX_4.15' to symbol '__vdso_clock_gettime' failed: symbol not defined ld.lld: error: version script assignment of 'LINUX_4.15' to symbol '__vdso_clock_getres' failed: symbol not defined These symbols are not present in the compat vDSO or the regular vDSO for 32-bit but they are unconditionally included in the version section of the linker script, which is prohibited with '--no-undefined-version'. Fix this issue by only including the symbols that are actually exported in the version section of the linker script. Link: https://github.com/ClangBuiltLinux/linux/issues/1756 Signed-off-by: Nathan Chancellor Tested-by: Conor Dooley Link: https://lore.kernel.org/r/20221108171324.3377226-1-nathan@kernel.org/ Signed-off-by: Palmer Dabbelt Signed-off-by: Sasha Levin --- arch/riscv/kernel/vdso/Makefile | 3 +++ arch/riscv/kernel/vdso/vdso.lds.S | 2 ++ 2 files changed, 5 insertions(+) diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 926ab3960f9e..c92b55a0ec1c 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -28,6 +28,9 @@ obj-vdso := $(addprefix $(obj)/, $(obj-vdso)) obj-y += vdso.o vdso-syms.o CPPFLAGS_vdso.lds += -P -C -U$(ARCH) +ifneq ($(filter vgettimeofday, $(vdso-syms)),) +CPPFLAGS_vdso.lds += -DHAS_VGETTIMEOFDAY +endif # Disable -pg to prevent insert call site CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S index e6f558bca71b..b3e58402c342 100644 --- a/arch/riscv/kernel/vdso/vdso.lds.S +++ b/arch/riscv/kernel/vdso/vdso.lds.S @@ -64,9 +64,11 @@ VERSION LINUX_4.15 { global: __vdso_rt_sigreturn; +#ifdef HAS_VGETTIMEOFDAY __vdso_gettimeofday; __vdso_clock_gettime; __vdso_clock_getres; +#endif __vdso_getcpu; __vdso_flush_icache; local: *; From 836bbdfcf8ef4b9b8a10c1f6fee3cff2b72bedc7 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 28 Oct 2022 15:23:44 +0200 Subject: [PATCH 0962/1477] MIPS: pic32: treat port as signed integer [ Upstream commit 648060902aa302331b5d6e4f26d8ee0761d239ab ] get_port_from_cmdline() returns an int, yet is assigned to a char, which is wrong in its own right, but also, with char becoming unsigned, this poses problems, because -1 is used as an error value. Further complicating things, fw_init_early_console() is only ever called with a -1 argument. Fix this up by removing the unused argument from fw_init_early_console() and treating port as a proper signed integer. Cc: Thomas Bogendoerfer Signed-off-by: Jason A. Donenfeld Signed-off-by: Thomas Bogendoerfer Signed-off-by: Sasha Levin --- arch/mips/include/asm/fw/fw.h | 2 +- arch/mips/pic32/pic32mzda/early_console.c | 13 ++++++------- arch/mips/pic32/pic32mzda/init.c | 2 +- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/arch/mips/include/asm/fw/fw.h b/arch/mips/include/asm/fw/fw.h index d0ef8b4892bb..d0494ce4b337 100644 --- a/arch/mips/include/asm/fw/fw.h +++ b/arch/mips/include/asm/fw/fw.h @@ -26,6 +26,6 @@ extern char *fw_getcmdline(void); extern void fw_meminit(void); extern char *fw_getenv(char *name); extern unsigned long fw_getenvl(char *name); -extern void fw_init_early_console(char port); +extern void fw_init_early_console(void); #endif /* __ASM_FW_H_ */ diff --git a/arch/mips/pic32/pic32mzda/early_console.c b/arch/mips/pic32/pic32mzda/early_console.c index 25372e62783b..3cd1b408fa1c 100644 --- a/arch/mips/pic32/pic32mzda/early_console.c +++ b/arch/mips/pic32/pic32mzda/early_console.c @@ -27,7 +27,7 @@ #define U_BRG(x) (UART_BASE(x) + 0x40) static void __iomem *uart_base; -static char console_port = -1; +static int console_port = -1; static int __init configure_uart_pins(int port) { @@ -47,7 +47,7 @@ static int __init configure_uart_pins(int port) return 0; } -static void __init configure_uart(char port, int baud) +static void __init configure_uart(int port, int baud) { u32 pbclk; @@ -60,7 +60,7 @@ static void __init configure_uart(char port, int baud) uart_base + PIC32_SET(U_STA(port))); } -static void __init setup_early_console(char port, int baud) +static void __init setup_early_console(int port, int baud) { if (configure_uart_pins(port)) return; @@ -130,16 +130,15 @@ _out: return baud; } -void __init fw_init_early_console(char port) +void __init fw_init_early_console(void) { char *arch_cmdline = pic32_getcmdline(); - int baud = -1; + int baud, port; uart_base = ioremap(PIC32_BASE_UART, 0xc00); baud = get_baud_from_cmdline(arch_cmdline); - if (port == -1) - port = get_port_from_cmdline(arch_cmdline); + port = get_port_from_cmdline(arch_cmdline); if (port == -1) port = EARLY_CONSOLE_PORT; diff --git a/arch/mips/pic32/pic32mzda/init.c b/arch/mips/pic32/pic32mzda/init.c index f232c77ff526..488c0bee7ebf 100644 --- a/arch/mips/pic32/pic32mzda/init.c +++ b/arch/mips/pic32/pic32mzda/init.c @@ -60,7 +60,7 @@ void __init plat_mem_setup(void) strlcpy(arcs_cmdline, boot_command_line, COMMAND_LINE_SIZE); #ifdef CONFIG_EARLY_PRINTK - fw_init_early_console(-1); + fw_init_early_console(); #endif pic32_config_init(); } From 497653f6d2392c8d4abb041f01e1a19781d995d0 Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Sun, 9 Oct 2022 22:16:43 +0300 Subject: [PATCH 0963/1477] xfrm: fix "disable_policy" on ipv4 early demux [ Upstream commit 3a5913183aa1b14148c723bda030e6102ad73008 ] The commit in the "Fixes" tag tried to avoid a case where policy check is ignored due to dst caching in next hops. However, when the traffic is locally consumed, the dst may be cached in a local TCP or UDP socket as part of early demux. In this case the "disable_policy" flag is not checked as ip_route_input_noref() was only called before caching, and thus, packets after the initial packet in a flow will be dropped if not matching policies. Fix by checking the "disable_policy" flag also when a valid dst is already available. Link: https://bugzilla.kernel.org/show_bug.cgi?id=216557 Reported-by: Monil Patel Fixes: e6175a2ed1f1 ("xfrm: fix "disable_policy" flag use when arriving from different devices") Signed-off-by: Eyal Birger ---- v2: use dev instead of skb->dev Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/ipv4/ip_input.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index f6b3237e88ca..eccd7897e7aa 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -361,6 +361,11 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk, iph->tos, dev); if (unlikely(err)) goto drop_error; + } else { + struct in_device *in_dev = __in_dev_get_rcu(dev); + + if (in_dev && IN_DEV_ORCONF(in_dev, NOPOLICY)) + IPCB(skb)->flags |= IPSKB_NOPOLICY; } #ifdef CONFIG_IP_ROUTE_CLASSID From 51969d679ba4567e10f6c15061d1dad1063406e7 Mon Sep 17 00:00:00 2001 From: Christian Langrock Date: Mon, 17 Oct 2022 08:34:47 +0200 Subject: [PATCH 0964/1477] xfrm: replay: Fix ESN wrap around for GSO [ Upstream commit 4b549ccce941798703f159b227aa28c716aa78fa ] When using GSO it can happen that the wrong seq_hi is used for the last packets before the wrap around. This can lead to double usage of a sequence number. To avoid this, we should serialize this last GSO packet. Fixes: d7dbefc45cf5 ("xfrm: Add xfrm_replay_overflow functions for offloading") Co-developed-by: Steffen Klassert Signed-off-by: Christian Langrock Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/ipv4/esp4_offload.c | 3 +++ net/ipv6/esp6_offload.c | 3 +++ net/xfrm/xfrm_device.c | 15 ++++++++++++++- net/xfrm/xfrm_replay.c | 2 +- 4 files changed, 21 insertions(+), 2 deletions(-) diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index 3450c9ba2728..84257678160a 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -312,6 +312,9 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_ xo->seq.low += skb_shinfo(skb)->gso_segs; } + if (xo->seq.low < seq) + xo->seq.hi++; + esp.seqno = cpu_to_be64(seq + ((u64)xo->seq.hi << 32)); ip_hdr(skb)->tot_len = htons(skb->len); diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index 1c3f02d05d2b..7608be04d0f5 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -343,6 +343,9 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features xo->seq.low += skb_shinfo(skb)->gso_segs; } + if (xo->seq.low < seq) + xo->seq.hi++; + esp.seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32)); len = skb->len - sizeof(struct ipv6hdr); diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index c255aac6b816..8b8e957a69c3 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -97,6 +97,18 @@ static void xfrm_outer_mode_prep(struct xfrm_state *x, struct sk_buff *skb) } } +static inline bool xmit_xfrm_check_overflow(struct sk_buff *skb) +{ + struct xfrm_offload *xo = xfrm_offload(skb); + __u32 seq = xo->seq.low; + + seq += skb_shinfo(skb)->gso_segs; + if (unlikely(seq < xo->seq.low)) + return true; + + return false; +} + struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features, bool *again) { int err; @@ -134,7 +146,8 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur return skb; } - if (skb_is_gso(skb) && unlikely(x->xso.dev != dev)) { + if (skb_is_gso(skb) && (unlikely(x->xso.dev != dev) || + unlikely(xmit_xfrm_check_overflow(skb)))) { struct sk_buff *segs; /* Packet got rerouted, fixup features and segment it. */ diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index c6a4338a0d08..65d009e3b6bb 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -657,7 +657,7 @@ static int xfrm_replay_overflow_offload_esn(struct xfrm_state *x, struct sk_buff oseq += skb_shinfo(skb)->gso_segs; } - if (unlikely(oseq < replay_esn->oseq)) { + if (unlikely(xo->seq.low < replay_esn->oseq)) { XFRM_SKB_CB(skb)->seq.output.hi = ++oseq_hi; xo->seq.hi = oseq_hi; replay_esn->oseq_hi = oseq_hi; From b7000254c12537ea63d9304ac8b2f5379fdc258e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 25 Oct 2022 14:06:48 +0800 Subject: [PATCH 0965/1477] af_key: Fix send_acquire race with pfkey_register [ Upstream commit 7f57f8165cb6d2c206e2b9ada53b9e2d6d8af42f ] The function pfkey_send_acquire may race with pfkey_register (which could even be in a different name space). This may result in a buffer overrun. Allocating the maximum amount of memory that could be used prevents this. Reported-by: syzbot+1e9af9185d8850e2c2fa@syzkaller.appspotmail.com Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Herbert Xu Reviewed-by: Sabrina Dubroca Reviewed-by: Eric Dumazet Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/key/af_key.c | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/net/key/af_key.c b/net/key/af_key.c index 05e271098888..8bc7d399987b 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2909,7 +2909,7 @@ static int count_ah_combs(const struct xfrm_tmpl *t) break; if (!aalg->pfkey_supported) continue; - if (aalg_tmpl_set(t, aalg) && aalg->available) + if (aalg_tmpl_set(t, aalg)) sz += sizeof(struct sadb_comb); } return sz + sizeof(struct sadb_prop); @@ -2927,7 +2927,7 @@ static int count_esp_combs(const struct xfrm_tmpl *t) if (!ealg->pfkey_supported) continue; - if (!(ealg_tmpl_set(t, ealg) && ealg->available)) + if (!(ealg_tmpl_set(t, ealg))) continue; for (k = 1; ; k++) { @@ -2938,16 +2938,17 @@ static int count_esp_combs(const struct xfrm_tmpl *t) if (!aalg->pfkey_supported) continue; - if (aalg_tmpl_set(t, aalg) && aalg->available) + if (aalg_tmpl_set(t, aalg)) sz += sizeof(struct sadb_comb); } } return sz + sizeof(struct sadb_prop); } -static void dump_ah_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) +static int dump_ah_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) { struct sadb_prop *p; + int sz = 0; int i; p = skb_put(skb, sizeof(struct sadb_prop)); @@ -2975,13 +2976,17 @@ static void dump_ah_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) c->sadb_comb_soft_addtime = 20*60*60; c->sadb_comb_hard_usetime = 8*60*60; c->sadb_comb_soft_usetime = 7*60*60; + sz += sizeof(*c); } } + + return sz + sizeof(*p); } -static void dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) +static int dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) { struct sadb_prop *p; + int sz = 0; int i, k; p = skb_put(skb, sizeof(struct sadb_prop)); @@ -3023,8 +3028,11 @@ static void dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) c->sadb_comb_soft_addtime = 20*60*60; c->sadb_comb_hard_usetime = 8*60*60; c->sadb_comb_soft_usetime = 7*60*60; + sz += sizeof(*c); } } + + return sz + sizeof(*p); } static int key_notify_policy_expire(struct xfrm_policy *xp, const struct km_event *c) @@ -3154,6 +3162,7 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct struct sadb_x_sec_ctx *sec_ctx; struct xfrm_sec_ctx *xfrm_ctx; int ctx_size = 0; + int alg_size = 0; sockaddr_size = pfkey_sockaddr_size(x->props.family); if (!sockaddr_size) @@ -3165,16 +3174,16 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct sizeof(struct sadb_x_policy); if (x->id.proto == IPPROTO_AH) - size += count_ah_combs(t); + alg_size = count_ah_combs(t); else if (x->id.proto == IPPROTO_ESP) - size += count_esp_combs(t); + alg_size = count_esp_combs(t); if ((xfrm_ctx = x->security)) { ctx_size = PFKEY_ALIGN8(xfrm_ctx->ctx_len); size += sizeof(struct sadb_x_sec_ctx) + ctx_size; } - skb = alloc_skb(size + 16, GFP_ATOMIC); + skb = alloc_skb(size + alg_size + 16, GFP_ATOMIC); if (skb == NULL) return -ENOMEM; @@ -3228,10 +3237,13 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct pol->sadb_x_policy_priority = xp->priority; /* Set sadb_comb's. */ + alg_size = 0; if (x->id.proto == IPPROTO_AH) - dump_ah_combs(skb, t); + alg_size = dump_ah_combs(skb, t); else if (x->id.proto == IPPROTO_ESP) - dump_esp_combs(skb, t); + alg_size = dump_esp_combs(skb, t); + + hdr->sadb_msg_len += alg_size / 8; /* security context */ if (xfrm_ctx) { From 7cfb4b8579d3cbaa69a67dc1fc501b58d0c5f49b Mon Sep 17 00:00:00 2001 From: Dominik Haller Date: Tue, 11 Oct 2022 16:31:15 +0200 Subject: [PATCH 0966/1477] ARM: dts: am335x-pcm-953: Define fixed regulators in root node [ Upstream commit 8950f345a67d8046d2472dd6ea81fa18ef5b4844 ] Remove the regulators node and define fixed regulators in the root node. Prevents the sdhci-omap driver from waiting in probe deferral forever because of the missing vmmc-supply and keeps am335x-pcm-953 consistent with the other Phytec AM335 boards. Fixes: bb07a829ec38 ("ARM: dts: Add support for phyCORE-AM335x PCM-953 carrier board") Signed-off-by: Dominik Haller Message-Id: <20221011143115.248003-1-d.haller@phytec.de> Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/boot/dts/am335x-pcm-953.dtsi | 28 +++++++++++++-------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/arch/arm/boot/dts/am335x-pcm-953.dtsi b/arch/arm/boot/dts/am335x-pcm-953.dtsi index 6c547c83e5dd..fc465f0d7e18 100644 --- a/arch/arm/boot/dts/am335x-pcm-953.dtsi +++ b/arch/arm/boot/dts/am335x-pcm-953.dtsi @@ -12,22 +12,20 @@ compatible = "phytec,am335x-pcm-953", "phytec,am335x-phycore-som", "ti,am33xx"; /* Power */ - regulators { - vcc3v3: fixedregulator@1 { - compatible = "regulator-fixed"; - regulator-name = "vcc3v3"; - regulator-min-microvolt = <3300000>; - regulator-max-microvolt = <3300000>; - regulator-boot-on; - }; + vcc3v3: fixedregulator1 { + compatible = "regulator-fixed"; + regulator-name = "vcc3v3"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + regulator-boot-on; + }; - vcc1v8: fixedregulator@2 { - compatible = "regulator-fixed"; - regulator-name = "vcc1v8"; - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <1800000>; - regulator-boot-on; - }; + vcc1v8: fixedregulator2 { + compatible = "regulator-fixed"; + regulator-name = "vcc1v8"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + regulator-boot-on; }; /* User IO */ From 164a5b50d10488d4b442453aaf71a78fa36e6fb4 Mon Sep 17 00:00:00 2001 From: Junxiao Chang Date: Thu, 10 Nov 2022 07:40:23 +0800 Subject: [PATCH 0967/1477] ASoC: hdac_hda: fix hda pcm buffer overflow issue [ Upstream commit 37882100cd0629d830db430a8cee0b724fe1fea3 ] When KASAN is enabled, below log might be dumped with Intel EHL hardware: [ 48.583597] ================================================================== [ 48.585921] BUG: KASAN: slab-out-of-bounds in hdac_hda_dai_hw_params+0x20a/0x22b [snd_soc_hdac_hda] [ 48.587995] Write of size 4 at addr ffff888103489708 by task pulseaudio/759 [ 48.589237] CPU: 2 PID: 759 Comm: pulseaudio Tainted: G U E 5.15.71-intel-ese-standard-lts #9 [ 48.591272] Hardware name: Intel Corporation Elkhart Lake Embedded Platform/ElkhartLake LPDDR4x T3 CRB, BIOS EHLSFWI1.R00.4251.A01.2206130432 06/13/2022 [ 48.593010] Call Trace: [ 48.593648] [ 48.593852] dump_stack_lvl+0x34/0x48 [ 48.594404] print_address_description.constprop.0+0x1f/0x140 [ 48.595174] ? hdac_hda_dai_hw_params+0x20a/0x22b [snd_soc_hdac_hda] [ 48.595868] ? hdac_hda_dai_hw_params+0x20a/0x22b [snd_soc_hdac_hda] [ 48.596519] kasan_report.cold+0x7f/0x11b [ 48.597003] ? hdac_hda_dai_hw_params+0x20a/0x22b [snd_soc_hdac_hda] [ 48.597885] hdac_hda_dai_hw_params+0x20a/0x22b [snd_soc_hdac_hda] HDAC_LAST_DAI_ID is last index id, pcm buffer array size should be +1 to avoid out of bound access. Fixes: 608b8c36c371 ("ASoC: hdac_hda: add support for HDMI/DP as a HDA codec") Reviewed-by: Kai Vehmanen Reviewed-by: Pierre-Louis Bossart Signed-off-by: Junxiao Chang Signed-off-by: Furong Zhou Link: https://lore.kernel.org/r/20221109234023.3111035-1-junxiao.chang@intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/hdac_hda.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/hdac_hda.h b/sound/soc/codecs/hdac_hda.h index d0efc5e254ae..da0ed74758b0 100644 --- a/sound/soc/codecs/hdac_hda.h +++ b/sound/soc/codecs/hdac_hda.h @@ -14,7 +14,7 @@ enum { HDAC_HDMI_1_DAI_ID, HDAC_HDMI_2_DAI_ID, HDAC_HDMI_3_DAI_ID, - HDAC_LAST_DAI_ID = HDAC_HDMI_3_DAI_ID, + HDAC_DAI_ID_NUM }; struct hdac_hda_pcm { @@ -24,7 +24,7 @@ struct hdac_hda_pcm { struct hdac_hda_priv { struct hda_codec codec; - struct hdac_hda_pcm pcm[HDAC_LAST_DAI_ID]; + struct hdac_hda_pcm pcm[HDAC_DAI_ID_NUM]; bool need_display_power; }; From 4f6c7344ab261999847baa78688f55a45875e7b9 Mon Sep 17 00:00:00 2001 From: Detlev Casanova Date: Thu, 10 Nov 2022 14:06:12 -0500 Subject: [PATCH 0968/1477] ASoC: sgtl5000: Reset the CHIP_CLK_CTRL reg on remove [ Upstream commit 0bb8e9b36b5b7f2e77892981ff6c27ee831d8026 ] Since commit bf2aebccddef ("ASoC: sgtl5000: Fix noise on shutdown/remove"), the device power control registers are reset when the driver is removed/shutdown. This is an issue when the device is configured to use the PLL clock. The device will stop responding if it is still configured to use the PLL clock but the PLL clock is powered down. When rebooting linux, the probe function will show: sgtl5000 0-000a: Error reading chip id -11 Make sure that the CHIP_CLK_CTRL is reset to its default value before powering down the device. Fixes: bf2aebccddef ("ASoC: sgtl5000: Fix noise on shutdown/remove") Signed-off-by: Detlev Casanova Reviewed-by: Fabio Estevam Link: https://lore.kernel.org/r/20221110190612.1341469-1-detlev.casanova@collabora.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/sgtl5000.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/sgtl5000.c b/sound/soc/codecs/sgtl5000.c index f066e016a874..edde0323799a 100644 --- a/sound/soc/codecs/sgtl5000.c +++ b/sound/soc/codecs/sgtl5000.c @@ -1797,6 +1797,7 @@ static int sgtl5000_i2c_remove(struct i2c_client *client) { struct sgtl5000_priv *sgtl5000 = i2c_get_clientdata(client); + regmap_write(sgtl5000->regmap, SGTL5000_CHIP_CLK_CTRL, SGTL5000_CHIP_CLK_CTRL_DEFAULT); regmap_write(sgtl5000->regmap, SGTL5000_CHIP_DIG_POWER, SGTL5000_DIG_POWER_DEFAULT); regmap_write(sgtl5000->regmap, SGTL5000_CHIP_ANA_POWER, SGTL5000_ANA_POWER_DEFAULT); From c34db0d6b88b1da95e7ab3353e674f4f574cccee Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Fri, 4 Nov 2022 13:22:13 +0000 Subject: [PATCH 0969/1477] ASoC: soc-pcm: Don't zero TDM masks in __soc_pcm_open() [ Upstream commit 39bd801d6908900e9ab0cdc2655150f95ddd4f1a ] The DAI tx_mask and rx_mask are set by snd_soc_dai_set_tdm_slot() and used by later code that depends on the TDM settings. So __soc_pcm_open() should not be obliterating those mask values. The code in __soc_pcm_hw_params() uses these masks to calculate the active channels so that only the AIF_IN/AIF_OUT widgets for the active TDM slots are enabled. The zeroing of the masks in __soc_pcm_open() disables this functionality so all AIF widgets were enabled even for channels that are not assigned to a TDM slot. Signed-off-by: Richard Fitzgerald Fixes: 2e5894d73789 ("ASoC: pcm: Add support for DAI multicodec") Link: https://lore.kernel.org/r/20221104132213.121847-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/soc-pcm.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 8b8a9aca2912..0e2261ee07b6 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -723,11 +723,6 @@ static int soc_pcm_open(struct snd_pcm_substream *substream) ret = snd_soc_dai_startup(dai, substream); if (ret < 0) goto err; - - if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) - dai->tx_mask = 0; - else - dai->rx_mask = 0; } /* Dynamic PCM DAI links compat checks use dynamic capabilities */ From 26d3d3ffa82b15431e0423dd3464ca56ebf9a0f4 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Wed, 9 Nov 2022 10:48:42 -0800 Subject: [PATCH 0970/1477] scsi: storvsc: Fix handling of srb_status and capacity change events [ Upstream commit b8a5376c321b4669f7ffabc708fd30c3970f3084 ] Current handling of the srb_status is incorrect. Commit 52e1b3b3daa9 ("scsi: storvsc: Correctly handle multiple flags in srb_status") is based on srb_status being a set of flags, when in fact only the 2 high order bits are flags and the remaining 6 bits are an integer status. Because the integer values of interest mostly look like flags, the code actually works when treated that way. But in the interest of correctness going forward, fix this by treating the low 6 bits of srb_status as an integer status code. Add handling for SRB_STATUS_INVALID_REQUEST, which was the original intent of commit 52e1b3b3daa9. Furthermore, treat the ERROR, ABORTED, and INVALID_REQUEST srb status codes as essentially equivalent for the cases we care about. There's no harm in doing so, and it isn't always clear which status code current or older versions of Hyper-V report for particular conditions. Treating the srb status codes as equivalent has the additional benefit of ensuring that capacity change events result in an immediate rescan so that the new size is known to Linux. Existing code checks SCSI sense data for capacity change events when the srb status is ABORTED. But capacity change events are also being observed when Hyper-V reports the srb status as ERROR. Without the immediate rescan, the new size isn't known until something else causes a rescan (such as running fdisk to expand a partition), and in the meantime, tools such as "lsblk" continue to report the old size. Fixes: 52e1b3b3daa9 ("scsi: storvsc: Correctly handle multiple flags in srb_status") Reported-by: Juan Tian Signed-off-by: Michael Kelley Link: https://lore.kernel.org/r/1668019722-1983-1-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu Signed-off-by: Sasha Levin --- drivers/scsi/storvsc_drv.c | 69 +++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 35 deletions(-) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 7ac1090d4379..3fa8a0c94bdc 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -356,16 +356,21 @@ enum storvsc_request_type { }; /* - * SRB status codes and masks; a subset of the codes used here. + * SRB status codes and masks. In the 8-bit field, the two high order bits + * are flags, while the remaining 6 bits are an integer status code. The + * definitions here include only the subset of the integer status codes that + * are tested for in this driver. */ - #define SRB_STATUS_AUTOSENSE_VALID 0x80 #define SRB_STATUS_QUEUE_FROZEN 0x40 -#define SRB_STATUS_INVALID_LUN 0x20 -#define SRB_STATUS_SUCCESS 0x01 -#define SRB_STATUS_ABORTED 0x02 -#define SRB_STATUS_ERROR 0x04 -#define SRB_STATUS_DATA_OVERRUN 0x12 + +/* SRB status integer codes */ +#define SRB_STATUS_SUCCESS 0x01 +#define SRB_STATUS_ABORTED 0x02 +#define SRB_STATUS_ERROR 0x04 +#define SRB_STATUS_INVALID_REQUEST 0x06 +#define SRB_STATUS_DATA_OVERRUN 0x12 +#define SRB_STATUS_INVALID_LUN 0x20 #define SRB_STATUS(status) \ (status & ~(SRB_STATUS_AUTOSENSE_VALID | SRB_STATUS_QUEUE_FROZEN)) @@ -995,38 +1000,25 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb, void (*process_err_fn)(struct work_struct *work); struct hv_host_device *host_dev = shost_priv(host); - /* - * In some situations, Hyper-V sets multiple bits in the - * srb_status, such as ABORTED and ERROR. So process them - * individually, with the most specific bits first. - */ + switch (SRB_STATUS(vm_srb->srb_status)) { + case SRB_STATUS_ERROR: + case SRB_STATUS_ABORTED: + case SRB_STATUS_INVALID_REQUEST: + if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID) { + /* Check for capacity change */ + if ((asc == 0x2a) && (ascq == 0x9)) { + process_err_fn = storvsc_device_scan; + /* Retry the I/O that triggered this. */ + set_host_byte(scmnd, DID_REQUEUE); + goto do_work; + } - if (vm_srb->srb_status & SRB_STATUS_INVALID_LUN) { - set_host_byte(scmnd, DID_NO_CONNECT); - process_err_fn = storvsc_remove_lun; - goto do_work; - } - - if (vm_srb->srb_status & SRB_STATUS_ABORTED) { - if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID && - /* Capacity data has changed */ - (asc == 0x2a) && (ascq == 0x9)) { - process_err_fn = storvsc_device_scan; /* - * Retry the I/O that triggered this. + * Otherwise, let upper layer deal with the + * error when sense message is present */ - set_host_byte(scmnd, DID_REQUEUE); - goto do_work; - } - } - - if (vm_srb->srb_status & SRB_STATUS_ERROR) { - /* - * Let upper layer deal with error when - * sense message is present. - */ - if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID) return; + } /* * If there is an error; offline the device since all @@ -1049,6 +1041,13 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb, default: set_host_byte(scmnd, DID_ERROR); } + return; + + case SRB_STATUS_INVALID_LUN: + set_host_byte(scmnd, DID_NO_CONNECT); + process_err_fn = storvsc_remove_lun; + goto do_work; + } return; From 0b6441abfa5d3c193ba8b75c220aa61ba5a0040f Mon Sep 17 00:00:00 2001 From: Zeng Heng Date: Wed, 16 Nov 2022 15:43:39 +0800 Subject: [PATCH 0971/1477] regulator: core: fix kobject release warning and memory leak in regulator_register() [ Upstream commit 5f4b204b6b8153923d5be8002c5f7082985d153f ] Here is a warning report about lack of registered release() from kobject lib: Device '(null)' does not have a release() function, it is broken and must be fixed. WARNING: CPU: 0 PID: 48430 at drivers/base/core.c:2332 device_release+0x104/0x120 Call Trace: kobject_put+0xdc/0x180 put_device+0x1b/0x30 regulator_register+0x651/0x1170 devm_regulator_register+0x4f/0xb0 When regulator_register() returns fail and directly goto `clean` symbol, rdev->dev has not registered release() function yet (which is registered by regulator_class in the following), so rdev needs to be freed manually. If rdev->dev.of_node is not NULL, which means the of_node has gotten by regulator_of_get_init_data(), it needs to call of_node_put() to avoid refcount leak. Otherwise, only calling put_device() would lead memory leak of rdev in further: unreferenced object 0xffff88810d0b1000 (size 2048): comm "107-i2c-rtq6752", pid 48430, jiffies 4342258431 (age 1341.780s) backtrace: kmalloc_trace+0x22/0x110 regulator_register+0x184/0x1170 devm_regulator_register+0x4f/0xb0 When regulator_register() returns fail and goto `wash` symbol, rdev->dev has registered release() function, so directly call put_device() to cleanup everything. Fixes: d3c731564e09 ("regulator: plug of_node leak in regulator_register()'s error path") Signed-off-by: Zeng Heng Link: https://lore.kernel.org/r/20221116074339.1024240-1-zengheng4@huawei.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/regulator/core.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index bf8ba73d6c7c..f43c668e1630 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -5401,11 +5401,15 @@ wash: mutex_lock(®ulator_list_mutex); regulator_ena_gpio_free(rdev); mutex_unlock(®ulator_list_mutex); + put_device(&rdev->dev); + rdev = NULL; clean: if (dangling_of_gpiod) gpiod_put(config->ena_gpiod); + if (rdev && rdev->dev.of_node) + of_node_put(rdev->dev.of_node); + kfree(rdev); kfree(config); - put_device(&rdev->dev); rinse: if (dangling_cfg_gpiod) gpiod_put(cfg->ena_gpiod); From fcb2d286362b19c07d5ea85878738e48422067ce Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Wed, 16 Nov 2022 17:32:04 +0800 Subject: [PATCH 0972/1477] spi: dw-dma: decrease reference count in dw_spi_dma_init_mfld() [ Upstream commit 804313b64e412a81b0b3389a10e7622452004aa6 ] pci_get_device() will increase the reference count for the returned pci_dev. Since 'dma_dev' is only used to filter the channel in dw_spi_dma_chan_filer() after using it we need to call pci_dev_put() to decrease the reference count. Also add pci_dev_put() for the error case. Fixes: 7063c0d942a1 ("spi/dw_spi: add DMA support") Signed-off-by: Xiongfeng Wang Acked-by: Serge Semin Link: https://lore.kernel.org/r/20221116093204.46700-1-wangxiongfeng2@huawei.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-dw-dma.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/spi/spi-dw-dma.c b/drivers/spi/spi-dw-dma.c index a09831c62192..32ac8f9068e8 100644 --- a/drivers/spi/spi-dw-dma.c +++ b/drivers/spi/spi-dw-dma.c @@ -127,12 +127,15 @@ static int dw_spi_dma_init_mfld(struct device *dev, struct dw_spi *dws) dw_spi_dma_sg_burst_init(dws); + pci_dev_put(dma_dev); + return 0; free_rxchan: dma_release_channel(dws->rxchan); dws->rxchan = NULL; err_exit: + pci_dev_put(dma_dev); return -EBUSY; } From 0c059b7d2a6b7ddb75d9e579456f597ff697aca0 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Wed, 16 Nov 2022 11:37:06 +0800 Subject: [PATCH 0973/1477] regulator: core: fix UAF in destroy_regulator() [ Upstream commit 1f386d6894d0f1b7de8ef640c41622ddd698e7ab ] I got a UAF report as following: ================================================================== BUG: KASAN: use-after-free in __lock_acquire+0x935/0x2060 Read of size 8 at addr ffff88810e838220 by task python3/268 Call Trace: dump_stack_lvl+0x67/0x83 print_report+0x178/0x4b0 kasan_report+0x90/0x190 __lock_acquire+0x935/0x2060 lock_acquire+0x156/0x400 _raw_spin_lock+0x2a/0x40 lockref_get+0x11/0x30 simple_recursive_removal+0x41/0x440 debugfs_remove.part.12+0x32/0x50 debugfs_remove+0x29/0x30 _regulator_put.cold.54+0x3e/0x27f regulator_put+0x1f/0x30 release_nodes+0x6a/0xa0 devres_release_all+0xf8/0x150 Allocated by task 37: kasan_save_stack+0x1c/0x40 kasan_set_track+0x21/0x30 __kasan_slab_alloc+0x5d/0x70 slab_post_alloc_hook+0x62/0x510 kmem_cache_alloc_lru+0x222/0x5a0 __d_alloc+0x31/0x440 d_alloc+0x30/0xf0 d_alloc_parallel+0xc4/0xd20 __lookup_slow+0x15e/0x2f0 lookup_one_len+0x13a/0x150 start_creating+0xea/0x190 debugfs_create_dir+0x1e/0x210 create_regulator+0x254/0x4e0 _regulator_get+0x2a1/0x467 _devm_regulator_get+0x5a/0xb0 regulator_virtual_probe+0xb9/0x1a0 Freed by task 30: kasan_save_stack+0x1c/0x40 kasan_set_track+0x21/0x30 kasan_save_free_info+0x2a/0x50 __kasan_slab_free+0x102/0x190 kmem_cache_free+0xf6/0x600 rcu_core+0x54c/0x12b0 __do_softirq+0xf2/0x5e3 Last potentially related work creation: kasan_save_stack+0x1c/0x40 __kasan_record_aux_stack+0x98/0xb0 call_rcu+0x42/0x700 dentry_free+0x6c/0xd0 __dentry_kill+0x23b/0x2d0 dput.part.31+0x431/0x780 simple_recursive_removal+0xa9/0x440 debugfs_remove.part.12+0x32/0x50 debugfs_remove+0x29/0x30 regulator_unregister+0xe3/0x230 release_nodes+0x6a/0xa0 ================================================================== Here is how happened: processor A processor B regulator_register() rdev_init_debugfs() rdev->debugfs = debugfs_create_dir() devm_regulator_get() rdev = regulator_dev_lookup() create_regulator(rdev) // using rdev->debugfs as parent debugfs_create_dir(rdev->debugfs) mfd_remove_devices_fn() release_nodes() regulator_unregister() // free rdev->debugfs debugfs_remove_recursive(rdev->debugfs) release_nodes() destroy_regulator() debugfs_remove_recursive() <- causes UAF In devm_regulator_get(), after getting rdev, the refcount is get, so fix this by moving debugfs_remove_recursive() to regulator_dev_release(), then it can be proctected by the refcount, the 'rdev->debugfs' can not be freed until the refcount is 0. Fixes: 5de705194e98 ("regulator: Add basic per consumer debugfs") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221116033706.3595812-1-yangyingliang@huawei.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/regulator/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index f43c668e1630..eb083b26ab4f 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -4928,6 +4928,7 @@ static void regulator_dev_release(struct device *dev) { struct regulator_dev *rdev = dev_get_drvdata(dev); + debugfs_remove_recursive(rdev->debugfs); kfree(rdev->constraints); of_node_put(rdev->dev.of_node); kfree(rdev); @@ -5438,7 +5439,6 @@ void regulator_unregister(struct regulator_dev *rdev) mutex_lock(®ulator_list_mutex); - debugfs_remove_recursive(rdev->debugfs); WARN_ON(rdev->open_count); regulator_remove_coupling(rdev); unset_regulator_supplies(rdev); From b76c5a99f44a9bcf78df7abf1fcb74dda6c1d10f Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Sun, 13 Nov 2022 19:57:48 -0600 Subject: [PATCH 0974/1477] bus: sunxi-rsb: Support atomic transfers [ Upstream commit 077686da0e2162c4ea5ae0df205849c2a7a84479 ] When communicating with a PMIC during system poweroff (pm_power_off()), IRQs are disabled and we are in a RCU read-side critical section, so we cannot use wait_for_completion_io_timeout(). Instead, poll the status register for transfer completion. Fixes: d787dcdb9c8f ("bus: sunxi-rsb: Add driver for Allwinner Reduced Serial Bus") Signed-off-by: Samuel Holland Reviewed-by: Jernej Skrabec Link: https://lore.kernel.org/r/20221114015749.28490-3-samuel@sholland.org Signed-off-by: Jernej Skrabec Signed-off-by: Sasha Levin --- drivers/bus/sunxi-rsb.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/drivers/bus/sunxi-rsb.c b/drivers/bus/sunxi-rsb.c index 9b1a5e62417c..f8c29b888e6b 100644 --- a/drivers/bus/sunxi-rsb.c +++ b/drivers/bus/sunxi-rsb.c @@ -268,6 +268,9 @@ EXPORT_SYMBOL_GPL(sunxi_rsb_driver_register); /* common code that starts a transfer */ static int _sunxi_rsb_run_xfer(struct sunxi_rsb *rsb) { + u32 int_mask, status; + bool timeout; + if (readl(rsb->regs + RSB_CTRL) & RSB_CTRL_START_TRANS) { dev_dbg(rsb->dev, "RSB transfer still in progress\n"); return -EBUSY; @@ -275,13 +278,23 @@ static int _sunxi_rsb_run_xfer(struct sunxi_rsb *rsb) reinit_completion(&rsb->complete); - writel(RSB_INTS_LOAD_BSY | RSB_INTS_TRANS_ERR | RSB_INTS_TRANS_OVER, - rsb->regs + RSB_INTE); + int_mask = RSB_INTS_LOAD_BSY | RSB_INTS_TRANS_ERR | RSB_INTS_TRANS_OVER; + writel(int_mask, rsb->regs + RSB_INTE); writel(RSB_CTRL_START_TRANS | RSB_CTRL_GLOBAL_INT_ENB, rsb->regs + RSB_CTRL); - if (!wait_for_completion_io_timeout(&rsb->complete, - msecs_to_jiffies(100))) { + if (irqs_disabled()) { + timeout = readl_poll_timeout_atomic(rsb->regs + RSB_INTS, + status, (status & int_mask), + 10, 100000); + writel(status, rsb->regs + RSB_INTS); + } else { + timeout = !wait_for_completion_io_timeout(&rsb->complete, + msecs_to_jiffies(100)); + status = rsb->status; + } + + if (timeout) { dev_dbg(rsb->dev, "RSB timeout\n"); /* abort the transfer */ @@ -293,18 +306,18 @@ static int _sunxi_rsb_run_xfer(struct sunxi_rsb *rsb) return -ETIMEDOUT; } - if (rsb->status & RSB_INTS_LOAD_BSY) { + if (status & RSB_INTS_LOAD_BSY) { dev_dbg(rsb->dev, "RSB busy\n"); return -EBUSY; } - if (rsb->status & RSB_INTS_TRANS_ERR) { - if (rsb->status & RSB_INTS_TRANS_ERR_ACK) { + if (status & RSB_INTS_TRANS_ERR) { + if (status & RSB_INTS_TRANS_ERR_ACK) { dev_dbg(rsb->dev, "RSB slave nack\n"); return -EINVAL; } - if (rsb->status & RSB_INTS_TRANS_ERR_DATA) { + if (status & RSB_INTS_TRANS_ERR_DATA) { dev_dbg(rsb->dev, "RSB transfer data error\n"); return -EIO; } From b547bf71fa7e35e22dd6494588a71ebd318aae09 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Wed, 9 Nov 2022 22:01:24 +0800 Subject: [PATCH 0975/1477] tee: optee: fix possible memory leak in optee_register_device() [ Upstream commit cce616e012c215d65c15e5d1afa73182dea49389 ] If device_register() returns error in optee_register_device(), the name allocated by dev_set_name() need be freed. As comment of device_register() says, it should use put_device() to give up the reference in the error path. So fix this by calling put_device(), then the name can be freed in kobject_cleanup(), and optee_device is freed in optee_release_device(). Fixes: c3fa24af9244 ("tee: optee: add TEE bus device enumeration support") Signed-off-by: Yang Yingliang Reviewed-by: Sumit Garg Signed-off-by: Jens Wiklander Signed-off-by: Sasha Levin --- drivers/tee/optee/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tee/optee/device.c b/drivers/tee/optee/device.c index 031806468af4..60ffc54da003 100644 --- a/drivers/tee/optee/device.c +++ b/drivers/tee/optee/device.c @@ -80,7 +80,7 @@ static int optee_register_device(const uuid_t *device_uuid) rc = device_register(&optee_device->dev); if (rc) { pr_err("device registration failed, err: %d\n", rc); - kfree(optee_device); + put_device(&optee_device->dev); } return rc; From 8124a02e17175e82e0e9350894775e5f36f3710e Mon Sep 17 00:00:00 2001 From: Michael Grzeschik Date: Mon, 14 Nov 2022 19:59:23 +0100 Subject: [PATCH 0976/1477] ARM: dts: at91: sam9g20ek: enable udc vbus gpio pinctrl [ Upstream commit 40a2226e8bfacb79dd154dea68febeead9d847e9 ] We set the PIOC to GPIO mode. This way the pin becomes an input signal will be usable by the controller. Without this change the udc on the 9g20ek does not work. Cc: nicolas.ferre@microchip.com Cc: ludovic.desroches@microchip.com Cc: alexandre.belloni@bootlin.com Cc: linux-arm-kernel@lists.infradead.org Cc: kernel@pengutronix.de Fixes: 5cb4e73575e3 ("ARM: at91: add at91sam9g20ek boards dt support") Signed-off-by: Michael Grzeschik Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20221114185923.1023249-3-m.grzeschik@pengutronix.de Signed-off-by: Sasha Levin --- arch/arm/boot/dts/at91sam9g20ek_common.dtsi | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/arm/boot/dts/at91sam9g20ek_common.dtsi b/arch/arm/boot/dts/at91sam9g20ek_common.dtsi index ca03685f0f08..4783e657b4cb 100644 --- a/arch/arm/boot/dts/at91sam9g20ek_common.dtsi +++ b/arch/arm/boot/dts/at91sam9g20ek_common.dtsi @@ -39,6 +39,13 @@ }; + usb1 { + pinctrl_usb1_vbus_gpio: usb1_vbus_gpio { + atmel,pins = + ; /* PC5 GPIO */ + }; + }; + mmc0_slot1 { pinctrl_board_mmc0_slot1: mmc0_slot1-board { atmel,pins = @@ -84,6 +91,8 @@ }; usb1: gadget@fffa4000 { + pinctrl-0 = <&pinctrl_usb1_vbus_gpio>; + pinctrl-names = "default"; atmel,vbus-gpio = <&pioC 5 GPIO_ACTIVE_HIGH>; status = "okay"; }; From a2d5dba2fc694957d564944bb74671c68a2c4d24 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 15 Nov 2022 19:34:39 +0200 Subject: [PATCH 0977/1477] net: liquidio: simplify if expression [ Upstream commit 733d4bbf9514890eb53ebe75827bf1fb4fd25ebe ] Fix the warning reported by kbuild: cocci warnings: (new ones prefixed by >>) >> drivers/net/ethernet/cavium/liquidio/lio_main.c:1797:54-56: WARNING !A || A && B is equivalent to !A || B drivers/net/ethernet/cavium/liquidio/lio_main.c:1827:54-56: WARNING !A || A && B is equivalent to !A || B Fixes: 8979f428a4af ("net: liquidio: release resources when liquidio driver open failed") Reported-by: kernel test robot Signed-off-by: Leon Romanovsky Reviewed-by: Saeed Mahameed Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/cavium/liquidio/lio_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index c4dc6e2ccd6b..eefb25bcf57f 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -1798,7 +1798,7 @@ static int liquidio_open(struct net_device *netdev) ifstate_set(lio, LIO_IFSTATE_RUNNING); - if (!OCTEON_CN23XX_PF(oct) || (OCTEON_CN23XX_PF(oct) && !oct->msix_on)) { + if (!OCTEON_CN23XX_PF(oct) || !oct->msix_on) { ret = setup_tx_poll_fn(netdev); if (ret) goto err_poll; @@ -1828,7 +1828,7 @@ static int liquidio_open(struct net_device *netdev) return 0; err_rx_ctrl: - if (!OCTEON_CN23XX_PF(oct) || (OCTEON_CN23XX_PF(oct) && !oct->msix_on)) + if (!OCTEON_CN23XX_PF(oct) || !oct->msix_on) cleanup_tx_poll_fn(netdev); err_poll: if (lio->ptp_clock) { From bddde342c62ee741a5417df6da93c0701b020e86 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 21 May 2022 08:45:15 +0100 Subject: [PATCH 0978/1477] rxrpc: Allow list of in-use local UDP endpoints to be viewed in /proc [ Upstream commit 33912c2639ad76660988c8ca97e4d18fca89b668 ] Allow the list of in-use local UDP endpoints in the current network namespace to be viewed in /proc. To aid with this, the endpoint list is converted to an hlist and RCU-safe manipulation is used so that the list can be read with only the RCU read lock held. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Signed-off-by: David S. Miller Stable-dep-of: 3bcd6c7eaa53 ("rxrpc: Fix race between conn bundle lookup and bundle removal [ZDI-CAN-15975]") Signed-off-by: Sasha Levin --- net/rxrpc/ar-internal.h | 5 +-- net/rxrpc/local_object.c | 37 +++++++++++---------- net/rxrpc/net_ns.c | 5 ++- net/rxrpc/proc.c | 69 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 94 insertions(+), 22 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index ccb65412b670..2d0c797a176a 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -86,7 +86,7 @@ struct rxrpc_net { struct work_struct client_conn_reaper; struct timer_list client_conn_reap_timer; - struct list_head local_endpoints; + struct hlist_head local_endpoints; struct mutex local_mutex; /* Lock for ->local_endpoints */ DECLARE_HASHTABLE (peer_hash, 10); @@ -266,7 +266,7 @@ struct rxrpc_local { atomic_t active_users; /* Number of users of the local endpoint */ atomic_t usage; /* Number of references to the structure */ struct rxrpc_net *rxnet; /* The network ns in which this resides */ - struct list_head link; + struct hlist_node link; struct socket *socket; /* my UDP socket */ struct work_struct processor; struct rxrpc_sock __rcu *service; /* Service(s) listening on this endpoint */ @@ -1001,6 +1001,7 @@ void rxrpc_put_peer_locked(struct rxrpc_peer *); extern const struct seq_operations rxrpc_call_seq_ops; extern const struct seq_operations rxrpc_connection_seq_ops; extern const struct seq_operations rxrpc_peer_seq_ops; +extern const struct seq_operations rxrpc_local_seq_ops; /* * recvmsg.c diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index ebbf1b03b62c..11db28a902f4 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -81,7 +81,7 @@ static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet, atomic_set(&local->usage, 1); atomic_set(&local->active_users, 1); local->rxnet = rxnet; - INIT_LIST_HEAD(&local->link); + INIT_HLIST_NODE(&local->link); INIT_WORK(&local->processor, rxrpc_local_processor); init_rwsem(&local->defrag_sem); skb_queue_head_init(&local->reject_queue); @@ -199,7 +199,7 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net, { struct rxrpc_local *local; struct rxrpc_net *rxnet = rxrpc_net(net); - struct list_head *cursor; + struct hlist_node *cursor; const char *age; long diff; int ret; @@ -209,16 +209,12 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net, mutex_lock(&rxnet->local_mutex); - for (cursor = rxnet->local_endpoints.next; - cursor != &rxnet->local_endpoints; - cursor = cursor->next) { - local = list_entry(cursor, struct rxrpc_local, link); + hlist_for_each(cursor, &rxnet->local_endpoints) { + local = hlist_entry(cursor, struct rxrpc_local, link); diff = rxrpc_local_cmp_key(local, srx); - if (diff < 0) + if (diff != 0) continue; - if (diff > 0) - break; /* Services aren't allowed to share transport sockets, so * reject that here. It is possible that the object is dying - @@ -230,9 +226,10 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net, goto addr_in_use; } - /* Found a match. We replace a dying object. Attempting to - * bind the transport socket may still fail if we're attempting - * to use a local address that the dying object is still using. + /* Found a match. We want to replace a dying object. + * Attempting to bind the transport socket may still fail if + * we're attempting to use a local address that the dying + * object is still using. */ if (!rxrpc_use_local(local)) break; @@ -249,10 +246,12 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net, if (ret < 0) goto sock_error; - if (cursor != &rxnet->local_endpoints) - list_replace_init(cursor, &local->link); - else - list_add_tail(&local->link, cursor); + if (cursor) { + hlist_replace_rcu(cursor, &local->link); + cursor->pprev = NULL; + } else { + hlist_add_head_rcu(&local->link, &rxnet->local_endpoints); + } age = "new"; found: @@ -393,7 +392,7 @@ static void rxrpc_local_destroyer(struct rxrpc_local *local) local->dead = true; mutex_lock(&rxnet->local_mutex); - list_del_init(&local->link); + hlist_del_init_rcu(&local->link); mutex_unlock(&rxnet->local_mutex); rxrpc_clean_up_local_conns(local); @@ -480,9 +479,9 @@ void rxrpc_destroy_all_locals(struct rxrpc_net *rxnet) flush_workqueue(rxrpc_workqueue); - if (!list_empty(&rxnet->local_endpoints)) { + if (!hlist_empty(&rxnet->local_endpoints)) { mutex_lock(&rxnet->local_mutex); - list_for_each_entry(local, &rxnet->local_endpoints, link) { + hlist_for_each_entry(local, &rxnet->local_endpoints, link) { pr_err("AF_RXRPC: Leaked local %p {%d}\n", local, atomic_read(&local->usage)); } diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c index cc7e30733feb..34f389975a7d 100644 --- a/net/rxrpc/net_ns.c +++ b/net/rxrpc/net_ns.c @@ -72,7 +72,7 @@ static __net_init int rxrpc_init_net(struct net *net) timer_setup(&rxnet->client_conn_reap_timer, rxrpc_client_conn_reap_timeout, 0); - INIT_LIST_HEAD(&rxnet->local_endpoints); + INIT_HLIST_HEAD(&rxnet->local_endpoints); mutex_init(&rxnet->local_mutex); hash_init(rxnet->peer_hash); @@ -98,6 +98,9 @@ static __net_init int rxrpc_init_net(struct net *net) proc_create_net("peers", 0444, rxnet->proc_net, &rxrpc_peer_seq_ops, sizeof(struct seq_net_private)); + proc_create_net("locals", 0444, rxnet->proc_net, + &rxrpc_local_seq_ops, + sizeof(struct seq_net_private)); return 0; err_proc: diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index e2f990754f88..8a8f776f91ae 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -334,3 +334,72 @@ const struct seq_operations rxrpc_peer_seq_ops = { .stop = rxrpc_peer_seq_stop, .show = rxrpc_peer_seq_show, }; + +/* + * Generate a list of extant virtual local endpoints in /proc/net/rxrpc/locals + */ +static int rxrpc_local_seq_show(struct seq_file *seq, void *v) +{ + struct rxrpc_local *local; + char lbuff[50]; + + if (v == SEQ_START_TOKEN) { + seq_puts(seq, + "Proto Local " + " Use Act\n"); + return 0; + } + + local = hlist_entry(v, struct rxrpc_local, link); + + sprintf(lbuff, "%pISpc", &local->srx.transport); + + seq_printf(seq, + "UDP %-47.47s %3u %3u\n", + lbuff, + atomic_read(&local->usage), + atomic_read(&local->active_users)); + + return 0; +} + +static void *rxrpc_local_seq_start(struct seq_file *seq, loff_t *_pos) + __acquires(rcu) +{ + struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); + unsigned int n; + + rcu_read_lock(); + + if (*_pos >= UINT_MAX) + return NULL; + + n = *_pos; + if (n == 0) + return SEQ_START_TOKEN; + + return seq_hlist_start_rcu(&rxnet->local_endpoints, n - 1); +} + +static void *rxrpc_local_seq_next(struct seq_file *seq, void *v, loff_t *_pos) +{ + struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); + + if (*_pos >= UINT_MAX) + return NULL; + + return seq_hlist_next_rcu(v, &rxnet->local_endpoints, _pos); +} + +static void rxrpc_local_seq_stop(struct seq_file *seq, void *v) + __releases(rcu) +{ + rcu_read_unlock(); +} + +const struct seq_operations rxrpc_local_seq_ops = { + .start = rxrpc_local_seq_start, + .next = rxrpc_local_seq_next, + .stop = rxrpc_local_seq_stop, + .show = rxrpc_local_seq_show, +}; From 23c03ee0eec4424a54a5619b23954d9069bac026 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 21 May 2022 08:45:22 +0100 Subject: [PATCH 0979/1477] rxrpc: Use refcount_t rather than atomic_t [ Upstream commit a05754295e01f006a651eec759c5dbe682ef6cef ] Move to using refcount_t rather than atomic_t for refcounts in rxrpc. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Signed-off-by: David S. Miller Stable-dep-of: 3bcd6c7eaa53 ("rxrpc: Fix race between conn bundle lookup and bundle removal [ZDI-CAN-15975]") Signed-off-by: Sasha Levin --- include/trace/events/rxrpc.h | 2 +- net/rxrpc/af_rxrpc.c | 2 +- net/rxrpc/ar-internal.h | 18 ++++--------- net/rxrpc/call_accept.c | 4 +-- net/rxrpc/call_object.c | 44 ++++++++++++++++---------------- net/rxrpc/conn_client.c | 30 +++++++++++----------- net/rxrpc/conn_object.c | 49 ++++++++++++++++++------------------ net/rxrpc/conn_service.c | 8 +++--- net/rxrpc/input.c | 4 +-- net/rxrpc/local_object.c | 31 ++++++++++++----------- net/rxrpc/peer_object.c | 40 +++++++++++++++-------------- net/rxrpc/proc.c | 8 +++--- net/rxrpc/skbuff.c | 1 - 13 files changed, 119 insertions(+), 122 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 1c714336b863..221856f2d295 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -583,7 +583,7 @@ TRACE_EVENT(rxrpc_client, TP_fast_assign( __entry->conn = conn ? conn->debug_id : 0; __entry->channel = channel; - __entry->usage = conn ? atomic_read(&conn->usage) : -2; + __entry->usage = conn ? refcount_read(&conn->ref) : -2; __entry->op = op; __entry->cid = conn ? conn->proto.cid : 0; ), diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 41671af6b33f..0354f90dc93a 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -351,7 +351,7 @@ static void rxrpc_dummy_notify_rx(struct sock *sk, struct rxrpc_call *rxcall, */ void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call) { - _enter("%d{%d}", call->debug_id, atomic_read(&call->usage)); + _enter("%d{%d}", call->debug_id, refcount_read(&call->ref)); mutex_lock(&call->user_mutex); rxrpc_release_call(rxrpc_sk(sock->sk), call); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 2d0c797a176a..08552ad82f50 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -14,14 +14,6 @@ #include #include "protocol.h" -#if 0 -#define CHECK_SLAB_OKAY(X) \ - BUG_ON(atomic_read((X)) >> (sizeof(atomic_t) - 2) == \ - (POISON_FREE << 8 | POISON_FREE)) -#else -#define CHECK_SLAB_OKAY(X) do {} while (0) -#endif - #define FCRYPT_BSIZE 8 struct rxrpc_crypt { union { @@ -264,7 +256,7 @@ struct rxrpc_security { struct rxrpc_local { struct rcu_head rcu; atomic_t active_users; /* Number of users of the local endpoint */ - atomic_t usage; /* Number of references to the structure */ + refcount_t ref; /* Number of references to the structure */ struct rxrpc_net *rxnet; /* The network ns in which this resides */ struct hlist_node link; struct socket *socket; /* my UDP socket */ @@ -289,7 +281,7 @@ struct rxrpc_local { */ struct rxrpc_peer { struct rcu_head rcu; /* This must be first */ - atomic_t usage; + refcount_t ref; unsigned long hash_key; struct hlist_node hash_link; struct rxrpc_local *local; @@ -391,7 +383,7 @@ enum rxrpc_conn_proto_state { */ struct rxrpc_bundle { struct rxrpc_conn_parameters params; - atomic_t usage; + refcount_t ref; unsigned int debug_id; bool try_upgrade; /* True if the bundle is attempting upgrade */ bool alloc_conn; /* True if someone's getting a conn */ @@ -412,7 +404,7 @@ struct rxrpc_connection { struct rxrpc_conn_proto proto; struct rxrpc_conn_parameters params; - atomic_t usage; + refcount_t ref; struct rcu_head rcu; struct list_head cache_link; @@ -592,7 +584,7 @@ struct rxrpc_call { int error; /* Local error incurred */ enum rxrpc_call_state state; /* current state of call */ enum rxrpc_call_completion completion; /* Call completion condition */ - atomic_t usage; + refcount_t ref; u16 service_id; /* service ID */ u8 security_ix; /* Security type */ enum rxrpc_interruptibility interruptibility; /* At what point call may be interrupted */ diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index a0b033954cea..2a14d69b171f 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -91,7 +91,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, (head + 1) & (size - 1)); trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_service, - atomic_read(&conn->usage), here); + refcount_read(&conn->ref), here); } /* Now it gets complicated, because calls get registered with the @@ -104,7 +104,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, call->state = RXRPC_CALL_SERVER_PREALLOC; trace_rxrpc_call(call->debug_id, rxrpc_call_new_service, - atomic_read(&call->usage), + refcount_read(&call->ref), here, (const void *)user_call_ID); write_lock(&rx->call_lock); diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 150cd7b2154c..10dad2834d5b 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -112,7 +112,7 @@ struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *rx, found_extant_call: rxrpc_get_call(call, rxrpc_call_got); read_unlock(&rx->call_lock); - _leave(" = %p [%d]", call, atomic_read(&call->usage)); + _leave(" = %p [%d]", call, refcount_read(&call->ref)); return call; } @@ -160,7 +160,7 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp, spin_lock_init(&call->notify_lock); spin_lock_init(&call->input_lock); rwlock_init(&call->state_lock); - atomic_set(&call->usage, 1); + refcount_set(&call->ref, 1); call->debug_id = debug_id; call->tx_total_len = -1; call->next_rx_timo = 20 * HZ; @@ -301,7 +301,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, call->interruptibility = p->interruptibility; call->tx_total_len = p->tx_total_len; trace_rxrpc_call(call->debug_id, rxrpc_call_new_client, - atomic_read(&call->usage), + refcount_read(&call->ref), here, (const void *)p->user_call_ID); if (p->kernel) __set_bit(RXRPC_CALL_KERNEL, &call->flags); @@ -354,7 +354,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, goto error_attached_to_socket; trace_rxrpc_call(call->debug_id, rxrpc_call_connected, - atomic_read(&call->usage), here, NULL); + refcount_read(&call->ref), here, NULL); rxrpc_start_call_timer(call); @@ -374,7 +374,7 @@ error_dup_user_ID: __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, RX_CALL_DEAD, -EEXIST); trace_rxrpc_call(call->debug_id, rxrpc_call_error, - atomic_read(&call->usage), here, ERR_PTR(-EEXIST)); + refcount_read(&call->ref), here, ERR_PTR(-EEXIST)); rxrpc_release_call(rx, call); mutex_unlock(&call->user_mutex); rxrpc_put_call(call, rxrpc_call_put); @@ -388,7 +388,7 @@ error_dup_user_ID: */ error_attached_to_socket: trace_rxrpc_call(call->debug_id, rxrpc_call_error, - atomic_read(&call->usage), here, ERR_PTR(ret)); + refcount_read(&call->ref), here, ERR_PTR(ret)); set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, RX_CALL_DEAD, ret); @@ -444,8 +444,9 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx, bool rxrpc_queue_call(struct rxrpc_call *call) { const void *here = __builtin_return_address(0); - int n = atomic_fetch_add_unless(&call->usage, 1, 0); - if (n == 0) + int n; + + if (!__refcount_inc_not_zero(&call->ref, &n)) return false; if (rxrpc_queue_work(&call->processor)) trace_rxrpc_call(call->debug_id, rxrpc_call_queued, n + 1, @@ -461,7 +462,7 @@ bool rxrpc_queue_call(struct rxrpc_call *call) bool __rxrpc_queue_call(struct rxrpc_call *call) { const void *here = __builtin_return_address(0); - int n = atomic_read(&call->usage); + int n = refcount_read(&call->ref); ASSERTCMP(n, >=, 1); if (rxrpc_queue_work(&call->processor)) trace_rxrpc_call(call->debug_id, rxrpc_call_queued_ref, n, @@ -478,7 +479,7 @@ void rxrpc_see_call(struct rxrpc_call *call) { const void *here = __builtin_return_address(0); if (call) { - int n = atomic_read(&call->usage); + int n = refcount_read(&call->ref); trace_rxrpc_call(call->debug_id, rxrpc_call_seen, n, here, NULL); @@ -488,11 +489,11 @@ void rxrpc_see_call(struct rxrpc_call *call) bool rxrpc_try_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op) { const void *here = __builtin_return_address(0); - int n = atomic_fetch_add_unless(&call->usage, 1, 0); + int n; - if (n == 0) + if (!__refcount_inc_not_zero(&call->ref, &n)) return false; - trace_rxrpc_call(call->debug_id, op, n, here, NULL); + trace_rxrpc_call(call->debug_id, op, n + 1, here, NULL); return true; } @@ -502,9 +503,10 @@ bool rxrpc_try_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op) void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op) { const void *here = __builtin_return_address(0); - int n = atomic_inc_return(&call->usage); + int n; - trace_rxrpc_call(call->debug_id, op, n, here, NULL); + __refcount_inc(&call->ref, &n); + trace_rxrpc_call(call->debug_id, op, n + 1, here, NULL); } /* @@ -529,10 +531,10 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) struct rxrpc_connection *conn = call->conn; bool put = false; - _enter("{%d,%d}", call->debug_id, atomic_read(&call->usage)); + _enter("{%d,%d}", call->debug_id, refcount_read(&call->ref)); trace_rxrpc_call(call->debug_id, rxrpc_call_release, - atomic_read(&call->usage), + refcount_read(&call->ref), here, (const void *)call->flags); ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); @@ -621,14 +623,14 @@ void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op) struct rxrpc_net *rxnet = call->rxnet; const void *here = __builtin_return_address(0); unsigned int debug_id = call->debug_id; + bool dead; int n; ASSERT(call != NULL); - n = atomic_dec_return(&call->usage); + dead = __refcount_dec_and_test(&call->ref, &n); trace_rxrpc_call(debug_id, op, n, here, NULL); - ASSERTCMP(n, >=, 0); - if (n == 0) { + if (dead) { _debug("call %d dead", call->debug_id); ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); @@ -718,7 +720,7 @@ void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet) list_del_init(&call->link); pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n", - call, atomic_read(&call->usage), + call, refcount_read(&call->ref), rxrpc_call_states[call->state], call->flags, call->events); diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index f5fb223aba82..6e2ffafcc98d 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -102,7 +102,7 @@ void rxrpc_destroy_client_conn_ids(void) if (!idr_is_empty(&rxrpc_client_conn_ids)) { idr_for_each_entry(&rxrpc_client_conn_ids, conn, id) { pr_err("AF_RXRPC: Leaked client conn %p {%d}\n", - conn, atomic_read(&conn->usage)); + conn, refcount_read(&conn->ref)); } BUG(); } @@ -122,7 +122,7 @@ static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_conn_parameters *cp, if (bundle) { bundle->params = *cp; rxrpc_get_peer(bundle->params.peer); - atomic_set(&bundle->usage, 1); + refcount_set(&bundle->ref, 1); spin_lock_init(&bundle->channel_lock); INIT_LIST_HEAD(&bundle->waiting_calls); } @@ -131,7 +131,7 @@ static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_conn_parameters *cp, struct rxrpc_bundle *rxrpc_get_bundle(struct rxrpc_bundle *bundle) { - atomic_inc(&bundle->usage); + refcount_inc(&bundle->ref); return bundle; } @@ -144,10 +144,13 @@ static void rxrpc_free_bundle(struct rxrpc_bundle *bundle) void rxrpc_put_bundle(struct rxrpc_bundle *bundle) { unsigned int d = bundle->debug_id; - unsigned int u = atomic_dec_return(&bundle->usage); + bool dead; + int r; - _debug("PUT B=%x %u", d, u); - if (u == 0) + dead = __refcount_dec_and_test(&bundle->ref, &r); + + _debug("PUT B=%x %d", d, r); + if (dead) rxrpc_free_bundle(bundle); } @@ -169,7 +172,7 @@ rxrpc_alloc_client_connection(struct rxrpc_bundle *bundle, gfp_t gfp) return ERR_PTR(-ENOMEM); } - atomic_set(&conn->usage, 1); + refcount_set(&conn->ref, 1); conn->bundle = bundle; conn->params = bundle->params; conn->out_clientflag = RXRPC_CLIENT_INITIATED; @@ -199,7 +202,7 @@ rxrpc_alloc_client_connection(struct rxrpc_bundle *bundle, gfp_t gfp) key_get(conn->params.key); trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_client, - atomic_read(&conn->usage), + refcount_read(&conn->ref), __builtin_return_address(0)); atomic_inc(&rxnet->nr_client_conns); @@ -972,14 +975,13 @@ void rxrpc_put_client_conn(struct rxrpc_connection *conn) { const void *here = __builtin_return_address(0); unsigned int debug_id = conn->debug_id; - int n; + bool dead; + int r; - n = atomic_dec_return(&conn->usage); - trace_rxrpc_conn(debug_id, rxrpc_conn_put_client, n, here); - if (n <= 0) { - ASSERTCMP(n, >=, 0); + dead = __refcount_dec_and_test(&conn->ref, &r); + trace_rxrpc_conn(debug_id, rxrpc_conn_put_client, r - 1, here); + if (dead) rxrpc_kill_client_conn(conn); - } } /* diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 3ef05a0e90ad..d829b97550cc 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -105,7 +105,7 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local, goto not_found; *_peer = peer; conn = rxrpc_find_service_conn_rcu(peer, skb); - if (!conn || atomic_read(&conn->usage) == 0) + if (!conn || refcount_read(&conn->ref) == 0) goto not_found; _leave(" = %p", conn); return conn; @@ -115,7 +115,7 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local, */ conn = idr_find(&rxrpc_client_conn_ids, sp->hdr.cid >> RXRPC_CIDSHIFT); - if (!conn || atomic_read(&conn->usage) == 0) { + if (!conn || refcount_read(&conn->ref) == 0) { _debug("no conn"); goto not_found; } @@ -264,11 +264,12 @@ void rxrpc_kill_connection(struct rxrpc_connection *conn) bool rxrpc_queue_conn(struct rxrpc_connection *conn) { const void *here = __builtin_return_address(0); - int n = atomic_fetch_add_unless(&conn->usage, 1, 0); - if (n == 0) + int r; + + if (!__refcount_inc_not_zero(&conn->ref, &r)) return false; if (rxrpc_queue_work(&conn->processor)) - trace_rxrpc_conn(conn->debug_id, rxrpc_conn_queued, n + 1, here); + trace_rxrpc_conn(conn->debug_id, rxrpc_conn_queued, r + 1, here); else rxrpc_put_connection(conn); return true; @@ -281,7 +282,7 @@ void rxrpc_see_connection(struct rxrpc_connection *conn) { const void *here = __builtin_return_address(0); if (conn) { - int n = atomic_read(&conn->usage); + int n = refcount_read(&conn->ref); trace_rxrpc_conn(conn->debug_id, rxrpc_conn_seen, n, here); } @@ -293,9 +294,10 @@ void rxrpc_see_connection(struct rxrpc_connection *conn) struct rxrpc_connection *rxrpc_get_connection(struct rxrpc_connection *conn) { const void *here = __builtin_return_address(0); - int n = atomic_inc_return(&conn->usage); + int r; - trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, n, here); + __refcount_inc(&conn->ref, &r); + trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, r, here); return conn; } @@ -306,11 +308,11 @@ struct rxrpc_connection * rxrpc_get_connection_maybe(struct rxrpc_connection *conn) { const void *here = __builtin_return_address(0); + int r; if (conn) { - int n = atomic_fetch_add_unless(&conn->usage, 1, 0); - if (n > 0) - trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, n + 1, here); + if (__refcount_inc_not_zero(&conn->ref, &r)) + trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, r + 1, here); else conn = NULL; } @@ -334,12 +336,11 @@ void rxrpc_put_service_conn(struct rxrpc_connection *conn) { const void *here = __builtin_return_address(0); unsigned int debug_id = conn->debug_id; - int n; + int r; - n = atomic_dec_return(&conn->usage); - trace_rxrpc_conn(debug_id, rxrpc_conn_put_service, n, here); - ASSERTCMP(n, >=, 0); - if (n == 1) + __refcount_dec(&conn->ref, &r); + trace_rxrpc_conn(debug_id, rxrpc_conn_put_service, r - 1, here); + if (r - 1 == 1) rxrpc_set_service_reap_timer(conn->params.local->rxnet, jiffies + rxrpc_connection_expiry); } @@ -352,9 +353,9 @@ static void rxrpc_destroy_connection(struct rcu_head *rcu) struct rxrpc_connection *conn = container_of(rcu, struct rxrpc_connection, rcu); - _enter("{%d,u=%d}", conn->debug_id, atomic_read(&conn->usage)); + _enter("{%d,u=%d}", conn->debug_id, refcount_read(&conn->ref)); - ASSERTCMP(atomic_read(&conn->usage), ==, 0); + ASSERTCMP(refcount_read(&conn->ref), ==, 0); _net("DESTROY CONN %d", conn->debug_id); @@ -394,8 +395,8 @@ void rxrpc_service_connection_reaper(struct work_struct *work) write_lock(&rxnet->conn_lock); list_for_each_entry_safe(conn, _p, &rxnet->service_conns, link) { - ASSERTCMP(atomic_read(&conn->usage), >, 0); - if (likely(atomic_read(&conn->usage) > 1)) + ASSERTCMP(refcount_read(&conn->ref), >, 0); + if (likely(refcount_read(&conn->ref) > 1)) continue; if (conn->state == RXRPC_CONN_SERVICE_PREALLOC) continue; @@ -407,7 +408,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work) expire_at = idle_timestamp + rxrpc_closed_conn_expiry * HZ; _debug("reap CONN %d { u=%d,t=%ld }", - conn->debug_id, atomic_read(&conn->usage), + conn->debug_id, refcount_read(&conn->ref), (long)expire_at - (long)now); if (time_before(now, expire_at)) { @@ -420,7 +421,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work) /* The usage count sits at 1 whilst the object is unused on the * list; we reduce that to 0 to make the object unavailable. */ - if (atomic_cmpxchg(&conn->usage, 1, 0) != 1) + if (!refcount_dec_if_one(&conn->ref)) continue; trace_rxrpc_conn(conn->debug_id, rxrpc_conn_reap_service, 0, NULL); @@ -444,7 +445,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work) link); list_del_init(&conn->link); - ASSERTCMP(atomic_read(&conn->usage), ==, 0); + ASSERTCMP(refcount_read(&conn->ref), ==, 0); rxrpc_kill_connection(conn); } @@ -472,7 +473,7 @@ void rxrpc_destroy_all_connections(struct rxrpc_net *rxnet) write_lock(&rxnet->conn_lock); list_for_each_entry_safe(conn, _p, &rxnet->service_conns, link) { pr_err("AF_RXRPC: Leaked conn %p {%d}\n", - conn, atomic_read(&conn->usage)); + conn, refcount_read(&conn->ref)); leak = true; } write_unlock(&rxnet->conn_lock); diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c index 6c847720494f..68508166bbc0 100644 --- a/net/rxrpc/conn_service.c +++ b/net/rxrpc/conn_service.c @@ -9,7 +9,7 @@ #include "ar-internal.h" static struct rxrpc_bundle rxrpc_service_dummy_bundle = { - .usage = ATOMIC_INIT(1), + .ref = REFCOUNT_INIT(1), .debug_id = UINT_MAX, .channel_lock = __SPIN_LOCK_UNLOCKED(&rxrpc_service_dummy_bundle.channel_lock), }; @@ -99,7 +99,7 @@ conn_published: return; found_extant_conn: - if (atomic_read(&cursor->usage) == 0) + if (refcount_read(&cursor->ref) == 0) goto replace_old_connection; write_sequnlock_bh(&peer->service_conn_lock); /* We should not be able to get here. rxrpc_incoming_connection() is @@ -132,7 +132,7 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *rxn * the rxrpc_connections list. */ conn->state = RXRPC_CONN_SERVICE_PREALLOC; - atomic_set(&conn->usage, 2); + refcount_set(&conn->ref, 2); conn->bundle = rxrpc_get_bundle(&rxrpc_service_dummy_bundle); atomic_inc(&rxnet->nr_conns); @@ -142,7 +142,7 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *rxn write_unlock(&rxnet->conn_lock); trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_service, - atomic_read(&conn->usage), + refcount_read(&conn->ref), __builtin_return_address(0)); } diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 1145cb14d86f..e9178115a744 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -1163,8 +1163,6 @@ static void rxrpc_post_packet_to_local(struct rxrpc_local *local, */ static void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb) { - CHECK_SLAB_OKAY(&local->usage); - if (rxrpc_get_local_maybe(local)) { skb_queue_tail(&local->reject_queue, skb); rxrpc_queue_local(local); @@ -1422,7 +1420,7 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb) } } - if (!call || atomic_read(&call->usage) == 0) { + if (!call || refcount_read(&call->ref) == 0) { if (rxrpc_to_client(sp) || sp->hdr.type != RXRPC_PACKET_TYPE_DATA) goto bad_message; diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 11db28a902f4..2c66ee981f39 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -78,7 +78,7 @@ static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet, local = kzalloc(sizeof(struct rxrpc_local), GFP_KERNEL); if (local) { - atomic_set(&local->usage, 1); + refcount_set(&local->ref, 1); atomic_set(&local->active_users, 1); local->rxnet = rxnet; INIT_HLIST_NODE(&local->link); @@ -284,10 +284,10 @@ addr_in_use: struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *local) { const void *here = __builtin_return_address(0); - int n; + int r; - n = atomic_inc_return(&local->usage); - trace_rxrpc_local(local->debug_id, rxrpc_local_got, n, here); + __refcount_inc(&local->ref, &r); + trace_rxrpc_local(local->debug_id, rxrpc_local_got, r + 1, here); return local; } @@ -297,12 +297,12 @@ struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *local) struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *local) { const void *here = __builtin_return_address(0); + int r; if (local) { - int n = atomic_fetch_add_unless(&local->usage, 1, 0); - if (n > 0) + if (__refcount_inc_not_zero(&local->ref, &r)) trace_rxrpc_local(local->debug_id, rxrpc_local_got, - n + 1, here); + r + 1, here); else local = NULL; } @@ -316,10 +316,10 @@ void rxrpc_queue_local(struct rxrpc_local *local) { const void *here = __builtin_return_address(0); unsigned int debug_id = local->debug_id; - int n = atomic_read(&local->usage); + int r = refcount_read(&local->ref); if (rxrpc_queue_work(&local->processor)) - trace_rxrpc_local(debug_id, rxrpc_local_queued, n, here); + trace_rxrpc_local(debug_id, rxrpc_local_queued, r + 1, here); else rxrpc_put_local(local); } @@ -331,15 +331,16 @@ void rxrpc_put_local(struct rxrpc_local *local) { const void *here = __builtin_return_address(0); unsigned int debug_id; - int n; + bool dead; + int r; if (local) { debug_id = local->debug_id; - n = atomic_dec_return(&local->usage); - trace_rxrpc_local(debug_id, rxrpc_local_put, n, here); + dead = __refcount_dec_and_test(&local->ref, &r); + trace_rxrpc_local(debug_id, rxrpc_local_put, r, here); - if (n == 0) + if (dead) call_rcu(&local->rcu, rxrpc_local_rcu); } } @@ -427,7 +428,7 @@ static void rxrpc_local_processor(struct work_struct *work) return; trace_rxrpc_local(local->debug_id, rxrpc_local_processing, - atomic_read(&local->usage), NULL); + refcount_read(&local->ref), NULL); do { again = false; @@ -483,7 +484,7 @@ void rxrpc_destroy_all_locals(struct rxrpc_net *rxnet) mutex_lock(&rxnet->local_mutex); hlist_for_each_entry(local, &rxnet->local_endpoints, link) { pr_err("AF_RXRPC: Leaked local %p {%d}\n", - local, atomic_read(&local->usage)); + local, refcount_read(&local->ref)); } mutex_unlock(&rxnet->local_mutex); BUG(); diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 0298fe2ad6d3..26d2ae9baaf2 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -121,7 +121,7 @@ static struct rxrpc_peer *__rxrpc_lookup_peer_rcu( hash_for_each_possible_rcu(rxnet->peer_hash, peer, hash_link, hash_key) { if (rxrpc_peer_cmp_key(peer, local, srx, hash_key) == 0 && - atomic_read(&peer->usage) > 0) + refcount_read(&peer->ref) > 0) return peer; } @@ -140,7 +140,7 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *local, peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key); if (peer) { _net("PEER %d {%pISp}", peer->debug_id, &peer->srx.transport); - _leave(" = %p {u=%d}", peer, atomic_read(&peer->usage)); + _leave(" = %p {u=%d}", peer, refcount_read(&peer->ref)); } return peer; } @@ -216,7 +216,7 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp) peer = kzalloc(sizeof(struct rxrpc_peer), gfp); if (peer) { - atomic_set(&peer->usage, 1); + refcount_set(&peer->ref, 1); peer->local = rxrpc_get_local(local); INIT_HLIST_HEAD(&peer->error_targets); peer->service_conns = RB_ROOT; @@ -378,7 +378,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx, _net("PEER %d {%pISp}", peer->debug_id, &peer->srx.transport); - _leave(" = %p {u=%d}", peer, atomic_read(&peer->usage)); + _leave(" = %p {u=%d}", peer, refcount_read(&peer->ref)); return peer; } @@ -388,10 +388,10 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx, struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *peer) { const void *here = __builtin_return_address(0); - int n; + int r; - n = atomic_inc_return(&peer->usage); - trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, n, here); + __refcount_inc(&peer->ref, &r); + trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, r + 1, here); return peer; } @@ -401,11 +401,11 @@ struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *peer) struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *peer) { const void *here = __builtin_return_address(0); + int r; if (peer) { - int n = atomic_fetch_add_unless(&peer->usage, 1, 0); - if (n > 0) - trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, n + 1, here); + if (__refcount_inc_not_zero(&peer->ref, &r)) + trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, r + 1, here); else peer = NULL; } @@ -436,13 +436,14 @@ void rxrpc_put_peer(struct rxrpc_peer *peer) { const void *here = __builtin_return_address(0); unsigned int debug_id; - int n; + bool dead; + int r; if (peer) { debug_id = peer->debug_id; - n = atomic_dec_return(&peer->usage); - trace_rxrpc_peer(debug_id, rxrpc_peer_put, n, here); - if (n == 0) + dead = __refcount_dec_and_test(&peer->ref, &r); + trace_rxrpc_peer(debug_id, rxrpc_peer_put, r - 1, here); + if (dead) __rxrpc_put_peer(peer); } } @@ -455,11 +456,12 @@ void rxrpc_put_peer_locked(struct rxrpc_peer *peer) { const void *here = __builtin_return_address(0); unsigned int debug_id = peer->debug_id; - int n; + bool dead; + int r; - n = atomic_dec_return(&peer->usage); - trace_rxrpc_peer(debug_id, rxrpc_peer_put, n, here); - if (n == 0) { + dead = __refcount_dec_and_test(&peer->ref, &r); + trace_rxrpc_peer(debug_id, rxrpc_peer_put, r - 1, here); + if (dead) { hash_del_rcu(&peer->hash_link); list_del_init(&peer->keepalive_link); rxrpc_free_peer(peer); @@ -481,7 +483,7 @@ void rxrpc_destroy_all_peers(struct rxrpc_net *rxnet) hlist_for_each_entry(peer, &rxnet->peer_hash[i], hash_link) { pr_err("Leaked peer %u {%u} %pISp\n", peer->debug_id, - atomic_read(&peer->usage), + refcount_read(&peer->ref), &peer->srx.transport); } } diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index 8a8f776f91ae..8967201fd8e5 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -107,7 +107,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) call->cid, call->call_id, rxrpc_is_service_call(call) ? "Svc" : "Clt", - atomic_read(&call->usage), + refcount_read(&call->ref), rxrpc_call_states[call->state], call->abort_code, call->debug_id, @@ -189,7 +189,7 @@ print: conn->service_id, conn->proto.cid, rxrpc_conn_is_service(conn) ? "Svc" : "Clt", - atomic_read(&conn->usage), + refcount_read(&conn->ref), rxrpc_conn_states[conn->state], key_serial(conn->params.key), atomic_read(&conn->serial), @@ -239,7 +239,7 @@ static int rxrpc_peer_seq_show(struct seq_file *seq, void *v) " %3u %5u %6llus %8u %8u\n", lbuff, rbuff, - atomic_read(&peer->usage), + refcount_read(&peer->ref), peer->cong_cwnd, peer->mtu, now - peer->last_tx_at, @@ -357,7 +357,7 @@ static int rxrpc_local_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "UDP %-47.47s %3u %3u\n", lbuff, - atomic_read(&local->usage), + refcount_read(&local->ref), atomic_read(&local->active_users)); return 0; diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c index 0348d2bf6f7d..580a5acffee7 100644 --- a/net/rxrpc/skbuff.c +++ b/net/rxrpc/skbuff.c @@ -71,7 +71,6 @@ void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) const void *here = __builtin_return_address(0); if (skb) { int n; - CHECK_SLAB_OKAY(&skb->users); n = atomic_dec_return(select_skb_count(skb)); trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, rxrpc_skb(skb)->rx_flags, here); From 3535c632e6d16c98f76e615da8dc0cb2750c66cc Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 16 Nov 2022 14:02:28 +0000 Subject: [PATCH 0980/1477] rxrpc: Fix race between conn bundle lookup and bundle removal [ZDI-CAN-15975] [ Upstream commit 3bcd6c7eaa53b56c3f584da46a1f7652e759d0e5 ] After rxrpc_unbundle_conn() has removed a connection from a bundle, it checks to see if there are any conns with available channels and, if not, removes and attempts to destroy the bundle. Whilst it does check after grabbing client_bundles_lock that there are no connections attached, this races with rxrpc_look_up_bundle() retrieving the bundle, but not attaching a connection for the connection to be attached later. There is therefore a window in which the bundle can get destroyed before we manage to attach a new connection to it. Fix this by adding an "active" counter to struct rxrpc_bundle: (1) rxrpc_connect_call() obtains an active count by prepping/looking up a bundle and ditches it before returning. (2) If, during rxrpc_connect_call(), a connection is added to the bundle, this obtains an active count, which is held until the connection is discarded. (3) rxrpc_deactivate_bundle() is created to drop an active count on a bundle and destroy it when the active count reaches 0. The active count is checked inside client_bundles_lock() to prevent a race with rxrpc_look_up_bundle(). (4) rxrpc_unbundle_conn() then calls rxrpc_deactivate_bundle(). Fixes: 245500d853e9 ("rxrpc: Rewrite the client connection manager") Reported-by: zdi-disclosures@trendmicro.com # ZDI-CAN-15975 Signed-off-by: David Howells Tested-by: zdi-disclosures@trendmicro.com cc: Marc Dionne cc: linux-afs@lists.infradead.org Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/rxrpc/ar-internal.h | 1 + net/rxrpc/conn_client.c | 38 +++++++++++++++++++++++--------------- 2 files changed, 24 insertions(+), 15 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 08552ad82f50..d86894a1c35d 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -384,6 +384,7 @@ enum rxrpc_conn_proto_state { struct rxrpc_bundle { struct rxrpc_conn_parameters params; refcount_t ref; + atomic_t active; /* Number of active users */ unsigned int debug_id; bool try_upgrade; /* True if the bundle is attempting upgrade */ bool alloc_conn; /* True if someone's getting a conn */ diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 6e2ffafcc98d..f5fa5f3083bd 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -40,6 +40,8 @@ __read_mostly unsigned long rxrpc_conn_idle_client_fast_expiry = 2 * HZ; DEFINE_IDR(rxrpc_client_conn_ids); static DEFINE_SPINLOCK(rxrpc_conn_id_lock); +static void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle); + /* * Get a connection ID and epoch for a client connection from the global pool. * The connection struct pointer is then recorded in the idr radix tree. The @@ -123,6 +125,7 @@ static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_conn_parameters *cp, bundle->params = *cp; rxrpc_get_peer(bundle->params.peer); refcount_set(&bundle->ref, 1); + atomic_set(&bundle->active, 1); spin_lock_init(&bundle->channel_lock); INIT_LIST_HEAD(&bundle->waiting_calls); } @@ -149,7 +152,7 @@ void rxrpc_put_bundle(struct rxrpc_bundle *bundle) dead = __refcount_dec_and_test(&bundle->ref, &r); - _debug("PUT B=%x %d", d, r); + _debug("PUT B=%x %d", d, r - 1); if (dead) rxrpc_free_bundle(bundle); } @@ -344,6 +347,7 @@ found_bundle_free: rxrpc_free_bundle(candidate); found_bundle: rxrpc_get_bundle(bundle); + atomic_inc(&bundle->active); spin_unlock(&local->client_bundles_lock); _leave(" = %u [found]", bundle->debug_id); return bundle; @@ -441,6 +445,7 @@ static void rxrpc_add_conn_to_bundle(struct rxrpc_bundle *bundle, gfp_t gfp) if (old) trace_rxrpc_client(old, -1, rxrpc_client_replace); candidate->bundle_shift = shift; + atomic_inc(&bundle->active); bundle->conns[i] = candidate; for (j = 0; j < RXRPC_MAXCALLS; j++) set_bit(shift + j, &bundle->avail_chans); @@ -731,6 +736,7 @@ granted_channel: smp_rmb(); out_put_bundle: + rxrpc_deactivate_bundle(bundle); rxrpc_put_bundle(bundle); out: _leave(" = %d", ret); @@ -906,9 +912,8 @@ out: static void rxrpc_unbundle_conn(struct rxrpc_connection *conn) { struct rxrpc_bundle *bundle = conn->bundle; - struct rxrpc_local *local = bundle->params.local; unsigned int bindex; - bool need_drop = false, need_put = false; + bool need_drop = false; int i; _enter("C=%x", conn->debug_id); @@ -927,15 +932,22 @@ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn) } spin_unlock(&bundle->channel_lock); - /* If there are no more connections, remove the bundle */ - if (!bundle->avail_chans) { - _debug("maybe unbundle"); - spin_lock(&local->client_bundles_lock); + if (need_drop) { + rxrpc_deactivate_bundle(bundle); + rxrpc_put_connection(conn); + } +} - for (i = 0; i < ARRAY_SIZE(bundle->conns); i++) - if (bundle->conns[i]) - break; - if (i == ARRAY_SIZE(bundle->conns) && !bundle->params.exclusive) { +/* + * Drop the active count on a bundle. + */ +static void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle) +{ + struct rxrpc_local *local = bundle->params.local; + bool need_put = false; + + if (atomic_dec_and_lock(&bundle->active, &local->client_bundles_lock)) { + if (!bundle->params.exclusive) { _debug("erase bundle"); rb_erase(&bundle->local_node, &local->client_bundles); need_put = true; @@ -945,10 +957,6 @@ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn) if (need_put) rxrpc_put_bundle(bundle); } - - if (need_drop) - rxrpc_put_connection(conn); - _leave(""); } /* From 398a860a44297245ed4cb7ff59b37fef89c379f8 Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Wed, 16 Nov 2022 21:02:49 +0800 Subject: [PATCH 0981/1477] nfc/nci: fix race with opening and closing [ Upstream commit 0ad6bded175e829c2ca261529c9dce39a32a042d ] Previously we leverage NCI_UNREG and the lock inside nci_close_device to prevent the race condition between opening a device and closing a device. However, it still has problem because a failed opening command will erase the NCI_UNREG flag and allow another opening command to bypass the status checking. This fix corrects that by making sure the NCI_UNREG is held. Reported-by: syzbot+43475bf3cfbd6e41f5b7@syzkaller.appspotmail.com Fixes: 48b71a9e66c2 ("NFC: add NCI_UNREG flag to eliminate the race") Signed-off-by: Lin Ma Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/nfc/nci/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 2cfff70f70e0..ed9019d807c7 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -530,7 +530,7 @@ static int nci_open_device(struct nci_dev *ndev) skb_queue_purge(&ndev->tx_q); ndev->ops->close(ndev); - ndev->flags = 0; + ndev->flags &= BIT(NCI_UNREG); } done: From 26bb8f6aaae303d890265f38c51a3f378d4a57af Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Thu, 17 Nov 2022 14:55:27 +0800 Subject: [PATCH 0982/1477] net: pch_gbe: fix potential memleak in pch_gbe_tx_queue() [ Upstream commit 2360f9b8c4e81d242d4cbf99d630a2fffa681fab ] In pch_gbe_xmit_frame(), NETDEV_TX_OK will be returned whether pch_gbe_tx_queue() sends data successfully or not, so pch_gbe_tx_queue() needs to free skb before returning. But pch_gbe_tx_queue() returns without freeing skb in case of dma_map_single() fails. Add dev_kfree_skb_any() to fix it. Fixes: 77555ee72282 ("net: Add Gigabit Ethernet driver of Topcliff PCH") Signed-off-by: Wang Hai Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c index 2942102efd48..3361166e56de 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c @@ -1166,6 +1166,7 @@ static void pch_gbe_tx_queue(struct pch_gbe_adapter *adapter, buffer_info->dma = 0; buffer_info->time_stamp = 0; tx_ring->next_to_use = ring_num; + dev_kfree_skb_any(skb); return; } buffer_info->mapped = true; From 459332f8dbfb99d9fb95c0f37c7015d36687fc6d Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Thu, 10 Nov 2022 20:26:06 +0800 Subject: [PATCH 0983/1477] 9p/fd: fix issue of list_del corruption in p9_fd_cancel() [ Upstream commit 11c10956515b8ec44cf4f2a7b9d8bf8b9dc05ec4 ] Syz reported the following issue: kernel BUG at lib/list_debug.c:53! invalid opcode: 0000 [#1] PREEMPT SMP KASAN RIP: 0010:__list_del_entry_valid.cold+0x5c/0x72 Call Trace: p9_fd_cancel+0xb1/0x270 p9_client_rpc+0x8ea/0xba0 p9_client_create+0x9c0/0xed0 v9fs_session_init+0x1e0/0x1620 v9fs_mount+0xba/0xb80 legacy_get_tree+0x103/0x200 vfs_get_tree+0x89/0x2d0 path_mount+0x4c0/0x1ac0 __x64_sys_mount+0x33b/0x430 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 The process is as follows: Thread A: Thread B: p9_poll_workfn() p9_client_create() ... ... p9_conn_cancel() p9_fd_cancel() list_del() ... ... list_del() //list_del corruption There is no lock protection when deleting list in p9_conn_cancel(). After deleting list in Thread A, thread B will delete the same list again. It will cause issue of list_del corruption. Setting req->status to REQ_STATUS_ERROR under lock prevents other cleanup paths from trying to manipulate req_list. The other thread can safely check req->status because it still holds a reference to req at this point. Link: https://lkml.kernel.org/r/20221110122606.383352-1-shaozhengchao@huawei.com Fixes: 52f1c45dde91 ("9p: trans_fd/p9_conn_cancel: drop client lock earlier") Reported-by: syzbot+9b69b8d10ab4a7d88056@syzkaller.appspotmail.com Signed-off-by: Zhengchao Shao [Dominique: add description of the fix in commit message] Signed-off-by: Dominique Martinet Signed-off-by: Sasha Levin --- net/9p/trans_fd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index fec6c800c898..400219801e63 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -200,9 +200,11 @@ static void p9_conn_cancel(struct p9_conn *m, int err) list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) { list_move(&req->req_list, &cancel_list); + req->status = REQ_STATUS_ERROR; } list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) { list_move(&req->req_list, &cancel_list); + req->status = REQ_STATUS_ERROR; } spin_unlock(&m->client->lock); From 5c97af75f53c626283afd8a800a4bd57614f761f Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Wed, 9 Nov 2022 12:39:07 -0700 Subject: [PATCH 0984/1477] netfilter: conntrack: Fix data-races around ct mark [ Upstream commit 52d1aa8b8249ff477aaa38b6f74a8ced780d079c ] nf_conn:mark can be read from and written to in parallel. Use READ_ONCE()/WRITE_ONCE() for reads and writes to prevent unwanted compiler optimizations. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Daniel Xu Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/core/flow_dissector.c | 2 +- net/ipv4/netfilter/ipt_CLUSTERIP.c | 4 ++-- net/netfilter/nf_conntrack_core.c | 2 +- net/netfilter/nf_conntrack_netlink.c | 24 ++++++++++++++---------- net/netfilter/nf_conntrack_standalone.c | 2 +- net/netfilter/nft_ct.c | 6 +++--- net/netfilter/xt_connmark.c | 18 ++++++++++-------- net/openvswitch/conntrack.c | 8 ++++---- net/sched/act_connmark.c | 4 ++-- net/sched/act_ct.c | 8 ++++---- net/sched/act_ctinfo.c | 6 +++--- 11 files changed, 45 insertions(+), 39 deletions(-) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index ed120828c7e2..b8d082f55718 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -263,7 +263,7 @@ skb_flow_dissect_ct(const struct sk_buff *skb, key->ct_zone = ct->zone.id; #endif #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) - key->ct_mark = ct->mark; + key->ct_mark = READ_ONCE(ct->mark); #endif cl = nf_ct_labels_find(ct); diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 1088564d4dbc..77e3b67e8790 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -424,7 +424,7 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par) switch (ctinfo) { case IP_CT_NEW: - ct->mark = hash; + WRITE_ONCE(ct->mark, hash); break; case IP_CT_RELATED: case IP_CT_RELATED_REPLY: @@ -441,7 +441,7 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par) #ifdef DEBUG nf_ct_dump_tuple_ip(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); #endif - pr_debug("hash=%u ct_hash=%u ", hash, ct->mark); + pr_debug("hash=%u ct_hash=%u ", hash, READ_ONCE(ct->mark)); if (!clusterip_responsible(cipinfo->config, hash)) { pr_debug("not responsible\n"); return NF_DROP; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 8369af0c50ea..193a18bfddc0 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1598,7 +1598,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, } #ifdef CONFIG_NF_CONNTRACK_MARK - ct->mark = exp->master->mark; + ct->mark = READ_ONCE(exp->master->mark); #endif #ifdef CONFIG_NF_CONNTRACK_SECMARK ct->secmark = exp->master->secmark; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 9e6898164199..c402283e7545 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -317,9 +317,9 @@ nla_put_failure: } #ifdef CONFIG_NF_CONNTRACK_MARK -static int ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct) +static int ctnetlink_dump_mark(struct sk_buff *skb, u32 mark) { - if (nla_put_be32(skb, CTA_MARK, htonl(ct->mark))) + if (nla_put_be32(skb, CTA_MARK, htonl(mark))) goto nla_put_failure; return 0; @@ -532,7 +532,7 @@ static int ctnetlink_dump_extinfo(struct sk_buff *skb, static int ctnetlink_dump_info(struct sk_buff *skb, struct nf_conn *ct) { if (ctnetlink_dump_status(skb, ct) < 0 || - ctnetlink_dump_mark(skb, ct) < 0 || + ctnetlink_dump_mark(skb, READ_ONCE(ct->mark)) < 0 || ctnetlink_dump_secctx(skb, ct) < 0 || ctnetlink_dump_id(skb, ct) < 0 || ctnetlink_dump_use(skb, ct) < 0 || @@ -711,6 +711,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) struct sk_buff *skb; unsigned int type; unsigned int flags = 0, group; + u32 mark; int err; if (events & (1 << IPCT_DESTROY)) { @@ -811,8 +812,9 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) } #ifdef CONFIG_NF_CONNTRACK_MARK - if ((events & (1 << IPCT_MARK) || ct->mark) - && ctnetlink_dump_mark(skb, ct) < 0) + mark = READ_ONCE(ct->mark); + if ((events & (1 << IPCT_MARK) || mark) && + ctnetlink_dump_mark(skb, mark) < 0) goto nla_put_failure; #endif nlmsg_end(skb, nlh); @@ -1099,7 +1101,7 @@ static int ctnetlink_filter_match(struct nf_conn *ct, void *data) } #ifdef CONFIG_NF_CONNTRACK_MARK - if ((ct->mark & filter->mark.mask) != filter->mark.val) + if ((READ_ONCE(ct->mark) & filter->mark.mask) != filter->mark.val) goto ignore_entry; #endif @@ -1979,9 +1981,9 @@ static void ctnetlink_change_mark(struct nf_conn *ct, mask = ~ntohl(nla_get_be32(cda[CTA_MARK_MASK])); mark = ntohl(nla_get_be32(cda[CTA_MARK])); - newmark = (ct->mark & mask) ^ mark; - if (newmark != ct->mark) - ct->mark = newmark; + newmark = (READ_ONCE(ct->mark) & mask) ^ mark; + if (newmark != READ_ONCE(ct->mark)) + WRITE_ONCE(ct->mark, newmark); } #endif @@ -2669,6 +2671,7 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct) { const struct nf_conntrack_zone *zone; struct nlattr *nest_parms; + u32 mark; zone = nf_ct_zone(ct); @@ -2726,7 +2729,8 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct) goto nla_put_failure; #ifdef CONFIG_NF_CONNTRACK_MARK - if (ct->mark && ctnetlink_dump_mark(skb, ct) < 0) + mark = READ_ONCE(ct->mark); + if (mark && ctnetlink_dump_mark(skb, mark) < 0) goto nla_put_failure; #endif if (ctnetlink_dump_labels(skb, ct) < 0) diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 313d1c8ff066..a7f88cdf3f87 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -360,7 +360,7 @@ static int ct_seq_show(struct seq_file *s, void *v) goto release; #if defined(CONFIG_NF_CONNTRACK_MARK) - seq_printf(s, "mark=%u ", ct->mark); + seq_printf(s, "mark=%u ", READ_ONCE(ct->mark)); #endif ct_show_secctx(s, ct); diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 781118465d46..14093d86e682 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -97,7 +97,7 @@ static void nft_ct_get_eval(const struct nft_expr *expr, return; #ifdef CONFIG_NF_CONNTRACK_MARK case NFT_CT_MARK: - *dest = ct->mark; + *dest = READ_ONCE(ct->mark); return; #endif #ifdef CONFIG_NF_CONNTRACK_SECMARK @@ -294,8 +294,8 @@ static void nft_ct_set_eval(const struct nft_expr *expr, switch (priv->key) { #ifdef CONFIG_NF_CONNTRACK_MARK case NFT_CT_MARK: - if (ct->mark != value) { - ct->mark = value; + if (READ_ONCE(ct->mark) != value) { + WRITE_ONCE(ct->mark, value); nf_conntrack_event_cache(IPCT_MARK, ct); } break; diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c index e5ebc0810675..ad3c033db64e 100644 --- a/net/netfilter/xt_connmark.c +++ b/net/netfilter/xt_connmark.c @@ -30,6 +30,7 @@ connmark_tg_shift(struct sk_buff *skb, const struct xt_connmark_tginfo2 *info) u_int32_t new_targetmark; struct nf_conn *ct; u_int32_t newmark; + u_int32_t oldmark; ct = nf_ct_get(skb, &ctinfo); if (ct == NULL) @@ -37,14 +38,15 @@ connmark_tg_shift(struct sk_buff *skb, const struct xt_connmark_tginfo2 *info) switch (info->mode) { case XT_CONNMARK_SET: - newmark = (ct->mark & ~info->ctmask) ^ info->ctmark; + oldmark = READ_ONCE(ct->mark); + newmark = (oldmark & ~info->ctmask) ^ info->ctmark; if (info->shift_dir == D_SHIFT_RIGHT) newmark >>= info->shift_bits; else newmark <<= info->shift_bits; - if (ct->mark != newmark) { - ct->mark = newmark; + if (READ_ONCE(ct->mark) != newmark) { + WRITE_ONCE(ct->mark, newmark); nf_conntrack_event_cache(IPCT_MARK, ct); } break; @@ -55,15 +57,15 @@ connmark_tg_shift(struct sk_buff *skb, const struct xt_connmark_tginfo2 *info) else new_targetmark <<= info->shift_bits; - newmark = (ct->mark & ~info->ctmask) ^ + newmark = (READ_ONCE(ct->mark) & ~info->ctmask) ^ new_targetmark; - if (ct->mark != newmark) { - ct->mark = newmark; + if (READ_ONCE(ct->mark) != newmark) { + WRITE_ONCE(ct->mark, newmark); nf_conntrack_event_cache(IPCT_MARK, ct); } break; case XT_CONNMARK_RESTORE: - new_targetmark = (ct->mark & info->ctmask); + new_targetmark = (READ_ONCE(ct->mark) & info->ctmask); if (info->shift_dir == D_SHIFT_RIGHT) new_targetmark >>= info->shift_bits; else @@ -126,7 +128,7 @@ connmark_mt(const struct sk_buff *skb, struct xt_action_param *par) if (ct == NULL) return false; - return ((ct->mark & info->mask) == info->mark) ^ info->invert; + return ((READ_ONCE(ct->mark) & info->mask) == info->mark) ^ info->invert; } static int connmark_mt_check(const struct xt_mtchk_param *par) diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 41f248895a87..0f0f380e81a4 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -150,7 +150,7 @@ static u8 ovs_ct_get_state(enum ip_conntrack_info ctinfo) static u32 ovs_ct_get_mark(const struct nf_conn *ct) { #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) - return ct ? ct->mark : 0; + return ct ? READ_ONCE(ct->mark) : 0; #else return 0; #endif @@ -336,9 +336,9 @@ static int ovs_ct_set_mark(struct nf_conn *ct, struct sw_flow_key *key, #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) u32 new_mark; - new_mark = ct_mark | (ct->mark & ~(mask)); - if (ct->mark != new_mark) { - ct->mark = new_mark; + new_mark = ct_mark | (READ_ONCE(ct->mark) & ~(mask)); + if (READ_ONCE(ct->mark) != new_mark) { + WRITE_ONCE(ct->mark, new_mark); if (nf_ct_is_confirmed(ct)) nf_conntrack_event_cache(IPCT_MARK, ct); key->ct.mark = new_mark; diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index e19885d7fe2c..31d268eedf3f 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -62,7 +62,7 @@ static int tcf_connmark_act(struct sk_buff *skb, const struct tc_action *a, c = nf_ct_get(skb, &ctinfo); if (c) { - skb->mark = c->mark; + skb->mark = READ_ONCE(c->mark); /* using overlimits stats to count how many packets marked */ ca->tcf_qstats.overlimits++; goto out; @@ -82,7 +82,7 @@ static int tcf_connmark_act(struct sk_buff *skb, const struct tc_action *a, c = nf_ct_tuplehash_to_ctrack(thash); /* using overlimits stats to count how many packets marked */ ca->tcf_qstats.overlimits++; - skb->mark = c->mark; + skb->mark = READ_ONCE(c->mark); nf_ct_put(c); out: diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index f7e88d7466c3..2d41d866de3e 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -177,7 +177,7 @@ static void tcf_ct_flow_table_add_action_meta(struct nf_conn *ct, entry = tcf_ct_flow_table_flow_action_get_next(action); entry->id = FLOW_ACTION_CT_METADATA; #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) - entry->ct_metadata.mark = ct->mark; + entry->ct_metadata.mark = READ_ONCE(ct->mark); #endif ctinfo = dir == IP_CT_DIR_ORIGINAL ? IP_CT_ESTABLISHED : IP_CT_ESTABLISHED_REPLY; @@ -843,9 +843,9 @@ static void tcf_ct_act_set_mark(struct nf_conn *ct, u32 mark, u32 mask) if (!mask) return; - new_mark = mark | (ct->mark & ~(mask)); - if (ct->mark != new_mark) { - ct->mark = new_mark; + new_mark = mark | (READ_ONCE(ct->mark) & ~(mask)); + if (READ_ONCE(ct->mark) != new_mark) { + WRITE_ONCE(ct->mark, new_mark); if (nf_ct_is_confirmed(ct)) nf_conntrack_event_cache(IPCT_MARK, ct); } diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c index b20c8ce59905..06c74f22ab98 100644 --- a/net/sched/act_ctinfo.c +++ b/net/sched/act_ctinfo.c @@ -33,7 +33,7 @@ static void tcf_ctinfo_dscp_set(struct nf_conn *ct, struct tcf_ctinfo *ca, { u8 dscp, newdscp; - newdscp = (((ct->mark & cp->dscpmask) >> cp->dscpmaskshift) << 2) & + newdscp = (((READ_ONCE(ct->mark) & cp->dscpmask) >> cp->dscpmaskshift) << 2) & ~INET_ECN_MASK; switch (proto) { @@ -73,7 +73,7 @@ static void tcf_ctinfo_cpmark_set(struct nf_conn *ct, struct tcf_ctinfo *ca, struct sk_buff *skb) { ca->stats_cpmark_set++; - skb->mark = ct->mark & cp->cpmarkmask; + skb->mark = READ_ONCE(ct->mark) & cp->cpmarkmask; } static int tcf_ctinfo_act(struct sk_buff *skb, const struct tc_action *a, @@ -131,7 +131,7 @@ static int tcf_ctinfo_act(struct sk_buff *skb, const struct tc_action *a, } if (cp->mode & CTINFO_MODE_DSCP) - if (!cp->dscpstatemask || (ct->mark & cp->dscpstatemask)) + if (!cp->dscpstatemask || (READ_ONCE(ct->mark) & cp->dscpstatemask)) tcf_ctinfo_dscp_set(ct, ca, cp, skb, wlen, proto); if (cp->mode & CTINFO_MODE_CPMARK) From 290a71ff721b072356c18be0e9bfa505935463d9 Mon Sep 17 00:00:00 2001 From: Zheng Yongjun Date: Thu, 17 Nov 2022 06:20:11 +0000 Subject: [PATCH 0985/1477] ARM: mxs: fix memory leak in mxs_machine_init() [ Upstream commit f31e3c204d1844b8680a442a48868af5ac3d5481 ] If of_property_read_string() failed, 'soc_dev_attr' should be freed before return. Otherwise there is a memory leak. Fixes: 2046338dcbc6 ("ARM: mxs: Use soc bus infrastructure") Signed-off-by: Zheng Yongjun Reviewed-by: Marco Felsch Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/mach-mxs/mach-mxs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-mxs/mach-mxs.c b/arch/arm/mach-mxs/mach-mxs.c index c109f47e9cbc..a687e83ad604 100644 --- a/arch/arm/mach-mxs/mach-mxs.c +++ b/arch/arm/mach-mxs/mach-mxs.c @@ -387,8 +387,10 @@ static void __init mxs_machine_init(void) root = of_find_node_by_path("/"); ret = of_property_read_string(root, "model", &soc_dev_attr->machine); - if (ret) + if (ret) { + kfree(soc_dev_attr); return; + } soc_dev_attr->family = "Freescale MXS Family"; soc_dev_attr->soc_id = mxs_get_soc_id(); From 756534f7cf53f35e9e098c15a6e341f27b5888d9 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Fri, 18 Nov 2022 10:41:02 -0300 Subject: [PATCH 0986/1477] ARM: dts: imx6q-prti6q: Fix ref/tcxo-clock-frequency properties [ Upstream commit e68be7b39f21d8a9291a5a3019787cd3ca999dd7 ] make dtbs_check gives the following errors: ref-clock-frequency: size (9) error for type uint32 tcxo-clock-frequency: size (9) error for type uint32 Fix it by passing the frequencies inside < > as documented in Documentation/devicetree/bindings/net/wireless/ti,wlcore.yaml. Signed-off-by: Fabio Estevam Fixes: 0d446a505592 ("ARM: dts: add Protonic PRTI6Q board") Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/imx6q-prti6q.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/imx6q-prti6q.dts b/arch/arm/boot/dts/imx6q-prti6q.dts index b4605edfd2ab..d8fa83effd63 100644 --- a/arch/arm/boot/dts/imx6q-prti6q.dts +++ b/arch/arm/boot/dts/imx6q-prti6q.dts @@ -364,8 +364,8 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_wifi>; interrupts-extended = <&gpio1 30 IRQ_TYPE_LEVEL_HIGH>; - ref-clock-frequency = "38400000"; - tcxo-clock-frequency = "19200000"; + ref-clock-frequency = <38400000>; + tcxo-clock-frequency = <19200000>; }; }; From da86a63479e5743d8d631bd813a6f0b45423d4ef Mon Sep 17 00:00:00 2001 From: Liu Jian Date: Thu, 17 Nov 2022 19:13:56 +0800 Subject: [PATCH 0987/1477] net: ethernet: mtk_eth_soc: fix error handling in mtk_open() [ Upstream commit f70074140524c59a0935947b06dd6cb6e1ea642d ] If mtk_start_dma() fails, invoke phylink_disconnect_phy() to perform cleanup. phylink_disconnect_phy() contains the put_device action. If phylink_disconnect_phy is not performed, the Kref of netdev will leak. Fixes: b8fc9f30821e ("net: ethernet: mediatek: Add basic PHYLINK support") Signed-off-by: Liu Jian Reviewed-by: Russell King (Oracle) Link: https://lore.kernel.org/r/20221117111356.161547-1-liujian56@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index c7aff89141e1..217dc67c48fa 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -2299,7 +2299,10 @@ static int mtk_open(struct net_device *dev) int err = mtk_start_dma(eth); if (err) + if (err) { + phylink_disconnect_phy(mac->phylink); return err; + } mtk_gdm_config(eth, MTK_GDMA_TO_PDMA); From a24d5f6c8b7bf0f1551573e45dfeb1958d852a91 Mon Sep 17 00:00:00 2001 From: Peter Kosyh Date: Thu, 17 Nov 2022 18:28:06 +0300 Subject: [PATCH 0988/1477] net/mlx4: Check retval of mlx4_bitmap_init [ Upstream commit 594c61ffc77de0a197934aa0f1df9285c68801c6 ] If mlx4_bitmap_init fails, mlx4_bitmap_alloc_range will dereference the NULL pointer (bitmap->table). Make sure, that mlx4_bitmap_alloc_range called in no error case. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: d57febe1a478 ("net/mlx4: Add A0 hybrid steering") Reviewed-by: Tariq Toukan Signed-off-by: Peter Kosyh Link: https://lore.kernel.org/r/20221117152806.278072-1-pkosyh@yandex.ru Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx4/qp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index 427e7a31862c..d7f2890c254f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -697,7 +697,8 @@ static int mlx4_create_zones(struct mlx4_dev *dev, err = mlx4_bitmap_init(*bitmap + k, 1, MLX4_QP_TABLE_RAW_ETH_SIZE - 1, 0, 0); - mlx4_bitmap_alloc_range(*bitmap + k, 1, 1, 0); + if (!err) + mlx4_bitmap_alloc_range(*bitmap + k, 1, 1, 0); } if (err) From 2c59ef9ab63df1ece9b53a3bd1d3779adcb006a4 Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Thu, 17 Nov 2022 16:50:38 +0800 Subject: [PATCH 0989/1477] net/qla3xxx: fix potential memleak in ql3xxx_send() [ Upstream commit 62a7311fb96c61d281da9852dbee4712fc8c3277 ] The ql3xxx_send() returns NETDEV_TX_OK without freeing skb in error handling case, add dev_kfree_skb_any() to fix it. Fixes: bd36b0ac5d06 ("qla3xxx: Add support for Qlogic 4032 chip.") Signed-off-by: Zhang Changzhong Link: https://lore.kernel.org/r/1668675039-21138-1-git-send-email-zhangchangzhong@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qla3xxx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c index 2219e4c59ae6..99fd35a8ca75 100644 --- a/drivers/net/ethernet/qlogic/qla3xxx.c +++ b/drivers/net/ethernet/qlogic/qla3xxx.c @@ -2475,6 +2475,7 @@ static netdev_tx_t ql3xxx_send(struct sk_buff *skb, skb_shinfo(skb)->nr_frags); if (tx_cb->seg_count == -1) { netdev_err(ndev, "%s: invalid segment count!\n", __func__); + dev_kfree_skb_any(skb); return NETDEV_TX_OK; } From 0b553ded34500fd9752ebee18dcabb18c062efb4 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 17 Nov 2022 21:51:48 +0800 Subject: [PATCH 0990/1477] net: pch_gbe: fix pci device refcount leak while module exiting [ Upstream commit 5619537284f1017e9f6c7500b02b859b3830a06d ] As comment of pci_get_domain_bus_and_slot() says, it returns a pci device with refcount increment, when finish using it, the caller must decrement the reference count by calling pci_dev_put(). In pch_gbe_probe(), pci_get_domain_bus_and_slot() is called, so in error path in probe() and remove() function, pci_dev_put() should be called to avoid refcount leak. Compile tested only. Fixes: 1a0bdadb4e36 ("net/pch_gbe: supports eg20t ptp clock") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221117135148.301014-1-yangyingliang@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c index 3361166e56de..bde32f0845ca 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c @@ -2482,6 +2482,7 @@ static void pch_gbe_remove(struct pci_dev *pdev) unregister_netdev(netdev); pch_gbe_phy_hw_reset(&adapter->hw); + pci_dev_put(adapter->ptp_pdev); free_netdev(netdev); } @@ -2563,7 +2564,7 @@ static int pch_gbe_probe(struct pci_dev *pdev, /* setup the private structure */ ret = pch_gbe_sw_init(adapter); if (ret) - goto err_free_netdev; + goto err_put_dev; /* Initialize PHY */ ret = pch_gbe_init_phy(adapter); @@ -2621,6 +2622,8 @@ static int pch_gbe_probe(struct pci_dev *pdev, err_free_adapter: pch_gbe_phy_hw_reset(&adapter->hw); +err_put_dev: + pci_dev_put(adapter->ptp_pdev); err_free_netdev: free_netdev(netdev); return ret; From ce41e03cacaa1f15539785a381ef0a27cf6d25da Mon Sep 17 00:00:00 2001 From: Diana Wang Date: Thu, 17 Nov 2022 16:37:43 +0100 Subject: [PATCH 0991/1477] nfp: fill splittable of devlink_port_attrs correctly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 4abd9600b9d15d3d92a9ac25cf200422a4c415ee ] The error is reflected in that it shows wrong splittable status of port when executing "devlink port show". The reason which leads the error is that the assigned operation of splittable is just a simple negation operation of split and it does not consider port lanes quantity. A splittable port should have several lanes that can be split(lanes quantity > 1). If without the judgement, it will show wrong message for some firmware, such as 2x25G, 2x10G. Fixes: a0f49b548652 ("devlink: Add a new devlink port split ability attribute and pass to netlink") Signed-off-by: Diana Wang Reviewed-by: Louis Peens Reviewed-by: Niklas Söderlund Signed-off-by: Simon Horman Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/netronome/nfp/nfp_devlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c index 7a8187458724..24578c48f075 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c @@ -363,7 +363,7 @@ int nfp_devlink_port_register(struct nfp_app *app, struct nfp_port *port) return ret; attrs.split = eth_port.is_split; - attrs.splittable = !attrs.split; + attrs.splittable = eth_port.port_lanes > 1 && !attrs.split; attrs.lanes = eth_port.port_lanes; attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; attrs.phys.port_number = eth_port.label_port; From 72be055615e0d8170ade2a0f750a7171b53fd37c Mon Sep 17 00:00:00 2001 From: Jaco Coetzee Date: Thu, 17 Nov 2022 16:37:44 +0100 Subject: [PATCH 0992/1477] nfp: add port from netdev validation for EEPROM access [ Upstream commit 0873016d46f6dfafd1bdf4d9b935b3331b226f7c ] Setting of the port flag `NFP_PORT_CHANGED`, introduced to ensure the correct reading of EEPROM data, causes a fatal kernel NULL pointer dereference in cases where the target netdev type cannot be determined. Add validation of port struct pointer before attempting to set the `NFP_PORT_CHANGED` flag. Return that operation is not supported if the netdev type cannot be determined. Fixes: 4ae97cae07e1 ("nfp: ethtool: fix the display error of `ethtool -m DEVNAME`") Signed-off-by: Jaco Coetzee Reviewed-by: Louis Peens Signed-off-by: Simon Horman Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index 3977aa2f59bd..311873ff57e3 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -1225,6 +1225,9 @@ nfp_port_get_module_info(struct net_device *netdev, u8 data; port = nfp_port_from_netdev(netdev); + if (!port) + return -EOPNOTSUPP; + /* update port state to get latest interface */ set_bit(NFP_PORT_CHANGED, &port->flags); eth_port = nfp_port_get_eth_port(port); From f42802e14a8752cebb434954c41f5aa123242996 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 18 Nov 2022 09:12:49 +0800 Subject: [PATCH 0993/1477] macsec: Fix invalid error code set [ Upstream commit 7cef6b73fba96abef731a53501924fc3c4a0f947 ] 'ret' is defined twice in macsec_changelink(), when it is set in macsec_is_offloaded case, it will be invalid before return. Fixes: 3cf3227a21d1 ("net: macsec: hardware offloading infrastructure") Signed-off-by: YueHaibing Reviewed-by: Saeed Mahameed Reviewed-by: Antoine Tenart Link: https://lore.kernel.org/r/20221118011249.48112-1-yuehaibing@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/macsec.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index c20ebf44acfe..3e564158c401 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -3813,7 +3813,6 @@ static int macsec_changelink(struct net_device *dev, struct nlattr *tb[], if (macsec_is_offloaded(macsec)) { const struct macsec_ops *ops; struct macsec_context ctx; - int ret; ops = macsec_get_ops(netdev_priv(dev), &ctx); if (!ops) { From 909186cf34de7d761833d064cd58c64cf6884e00 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Sat, 19 Nov 2022 16:11:34 +0800 Subject: [PATCH 0994/1477] Drivers: hv: vmbus: fix double free in the error path of vmbus_add_channel_work() [ Upstream commit f92a4b50f0bd7fd52391dc4bb9a309085d278f91 ] In the error path of vmbus_device_register(), device_unregister() is called, which calls vmbus_device_release(). The latter frees the struct hv_device that was passed in to vmbus_device_register(). So remove the kfree() in vmbus_add_channel_work() to avoid a double free. Fixes: c2e5df616e1a ("vmbus: add per-channel sysfs info") Suggested-by: Michael Kelley Signed-off-by: Yang Yingliang Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20221119081135.1564691-2-yangyingliang@huawei.com Signed-off-by: Wei Liu Signed-off-by: Sasha Levin --- drivers/hv/channel_mgmt.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 10188b1a6a08..5b902adb0d1b 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -501,13 +501,17 @@ static void vmbus_add_channel_work(struct work_struct *work) * Add the new device to the bus. This will kick off device-driver * binding which eventually invokes the device driver's AddDevice() * method. + * + * If vmbus_device_register() fails, the 'device_obj' is freed in + * vmbus_device_release() as called by device_unregister() in the + * error path of vmbus_device_register(). In the outside error + * path, there's no need to free it. */ ret = vmbus_device_register(newchannel->device_obj); if (ret != 0) { pr_err("unable to add child device object (relid %d)\n", newchannel->offermsg.child_relid); - kfree(newchannel->device_obj); goto err_deq_chan; } From 8dca384970acd94dd88aee60b1264e81e48d4ad1 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Sat, 19 Nov 2022 16:11:35 +0800 Subject: [PATCH 0995/1477] Drivers: hv: vmbus: fix possible memory leak in vmbus_device_register() [ Upstream commit 25c94b051592c010abe92c85b0485f1faedc83f3 ] If device_register() returns error in vmbus_device_register(), the name allocated by dev_set_name() must be freed. As comment of device_register() says, it should use put_device() to give up the reference in the error path. So fix this by calling put_device(), then the name can be freed in kobject_cleanup(). Fixes: 09d50ff8a233 ("Staging: hv: make the Hyper-V virtual bus code build") Signed-off-by: Yang Yingliang Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20221119081135.1564691-3-yangyingliang@huawei.com Signed-off-by: Wei Liu Signed-off-by: Sasha Levin --- drivers/hv/vmbus_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 514279dac7cb..e99400f3ae1d 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -2020,6 +2020,7 @@ int vmbus_device_register(struct hv_device *child_device_obj) ret = device_register(&child_device_obj->device); if (ret) { pr_err("Unable to register child device\n"); + put_device(&child_device_obj->device); return ret; } From e62e62ea912a49f7230620f1bdc20410b943a44c Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Wed, 28 Jul 2021 17:01:15 +0200 Subject: [PATCH 0996/1477] netfilter: ipset: Limit the maximal range of consecutive elements to add/delete [ Upstream commit 5f7b51bf09baca8e4f80cbe879536842bafb5f31 ] The range size of consecutive elements were not limited. Thus one could define a huge range which may result soft lockup errors due to the long execution time. Now the range size is limited to 2^20 entries. Reported-by: Brad Spengler Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso Stable-dep-of: c7aa1a76d4a0 ("netfilter: ipset: regression in ip_set_hash_ip.c") Signed-off-by: Sasha Levin --- include/linux/netfilter/ipset/ip_set.h | 3 +++ net/netfilter/ipset/ip_set_hash_ip.c | 9 ++++++++- net/netfilter/ipset/ip_set_hash_ipmark.c | 10 +++++++++- net/netfilter/ipset/ip_set_hash_ipport.c | 3 +++ net/netfilter/ipset/ip_set_hash_ipportip.c | 3 +++ net/netfilter/ipset/ip_set_hash_ipportnet.c | 3 +++ net/netfilter/ipset/ip_set_hash_net.c | 11 ++++++++++- net/netfilter/ipset/ip_set_hash_netiface.c | 10 +++++++++- net/netfilter/ipset/ip_set_hash_netnet.c | 16 +++++++++++++++- net/netfilter/ipset/ip_set_hash_netport.c | 11 ++++++++++- net/netfilter/ipset/ip_set_hash_netportnet.c | 16 +++++++++++++++- 11 files changed, 88 insertions(+), 7 deletions(-) diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index ab192720e2d6..53c9a17ecb3e 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -198,6 +198,9 @@ struct ip_set_region { u32 elements; /* Number of elements vs timeout */ }; +/* Max range where every element is added/deleted in one step */ +#define IPSET_MAX_RANGE (1<<20) + /* The core set type structure */ struct ip_set_type { struct list_head list; diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index 5d6d68eaf6a9..361f4fd69bf4 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -131,8 +131,11 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); if (ret) return ret; - if (ip > ip_to) + if (ip > ip_to) { + if (ip_to == 0) + return -IPSET_ERR_HASH_ELEM; swap(ip, ip_to); + } } else if (tb[IPSET_ATTR_CIDR]) { u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); @@ -143,6 +146,10 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1); + /* 64bit division is not allowed on 32bit */ + if (((u64)ip_to - ip + 1) >> (32 - h->netmask) > IPSET_MAX_RANGE) + return -ERANGE; + if (retried) { ip = ntohl(h->next.ip); e.ip = htonl(ip); diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c index aba1df617d6e..eefce34a34f0 100644 --- a/net/netfilter/ipset/ip_set_hash_ipmark.c +++ b/net/netfilter/ipset/ip_set_hash_ipmark.c @@ -120,6 +120,8 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], e.mark = ntohl(nla_get_be32(tb[IPSET_ATTR_MARK])); e.mark &= h->markmask; + if (e.mark == 0 && e.ip == 0) + return -IPSET_ERR_HASH_ELEM; if (adt == IPSET_TEST || !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR])) { @@ -132,8 +134,11 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); if (ret) return ret; - if (ip > ip_to) + if (ip > ip_to) { + if (e.mark == 0 && ip_to == 0) + return -IPSET_ERR_HASH_ELEM; swap(ip, ip_to); + } } else if (tb[IPSET_ATTR_CIDR]) { u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); @@ -142,6 +147,9 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], ip_set_mask_from_to(ip, ip_to, cidr); } + if (((u64)ip_to - ip + 1) > IPSET_MAX_RANGE) + return -ERANGE; + if (retried) ip = ntohl(h->next.ip); for (; ip <= ip_to; ip++) { diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index 1ff228717e29..4a54e9e8ae59 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -172,6 +172,9 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], swap(port, port_to); } + if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE) + return -ERANGE; + if (retried) ip = ntohl(h->next.ip); for (; ip <= ip_to; ip++) { diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index fa88afd812fa..09737de5ecc3 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -179,6 +179,9 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], swap(port, port_to); } + if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE) + return -ERANGE; + if (retried) ip = ntohl(h->next.ip); for (; ip <= ip_to; ip++) { diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index eef6ecfcb409..02685371a682 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -252,6 +252,9 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], swap(port, port_to); } + if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE) + return -ERANGE; + ip2_to = ip2_from; if (tb[IPSET_ATTR_IP2_TO]) { ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2_TO], &ip2_to); diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index 136cf0781d3a..9d1beaacb973 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -139,7 +139,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_net4_elem e = { .cidr = HOST_MASK }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - u32 ip = 0, ip_to = 0; + u32 ip = 0, ip_to = 0, ipn, n = 0; int ret; if (tb[IPSET_ATTR_LINENO]) @@ -187,6 +187,15 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], if (ip + UINT_MAX == ip_to) return -IPSET_ERR_HASH_RANGE; } + ipn = ip; + do { + ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr); + n++; + } while (ipn++ < ip_to); + + if (n > IPSET_MAX_RANGE) + return -ERANGE; + if (retried) ip = ntohl(h->next.ip); do { diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index be5e95a0d876..c3ada9c63fa3 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -201,7 +201,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - u32 ip = 0, ip_to = 0; + u32 ip = 0, ip_to = 0, ipn, n = 0; int ret; if (tb[IPSET_ATTR_LINENO]) @@ -255,6 +255,14 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], } else { ip_set_mask_from_to(ip, ip_to, e.cidr); } + ipn = ip; + do { + ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr); + n++; + } while (ipn++ < ip_to); + + if (n > IPSET_MAX_RANGE) + return -ERANGE; if (retried) ip = ntohl(h->next.ip); diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c index da4ef910b12d..b1411bc91a40 100644 --- a/net/netfilter/ipset/ip_set_hash_netnet.c +++ b/net/netfilter/ipset/ip_set_hash_netnet.c @@ -167,7 +167,8 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], struct hash_netnet4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0; - u32 ip2 = 0, ip2_from = 0, ip2_to = 0; + u32 ip2 = 0, ip2_from = 0, ip2_to = 0, ipn; + u64 n = 0, m = 0; int ret; if (tb[IPSET_ATTR_LINENO]) @@ -243,6 +244,19 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], } else { ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]); } + ipn = ip; + do { + ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]); + n++; + } while (ipn++ < ip_to); + ipn = ip2_from; + do { + ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]); + m++; + } while (ipn++ < ip2_to); + + if (n*m > IPSET_MAX_RANGE) + return -ERANGE; if (retried) { ip = ntohl(h->next.ip[0]); diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index 34448df80fb9..d26d13528fe8 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -157,7 +157,8 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - u32 port, port_to, p = 0, ip = 0, ip_to = 0; + u32 port, port_to, p = 0, ip = 0, ip_to = 0, ipn; + u64 n = 0; bool with_ports = false; u8 cidr; int ret; @@ -234,6 +235,14 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], } else { ip_set_mask_from_to(ip, ip_to, e.cidr + 1); } + ipn = ip; + do { + ipn = ip_set_range_to_cidr(ipn, ip_to, &cidr); + n++; + } while (ipn++ < ip_to); + + if (n*(port_to - port + 1) > IPSET_MAX_RANGE) + return -ERANGE; if (retried) { ip = ntohl(h->next.ip); diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c index 934c1712cba8..6446f4fccc72 100644 --- a/net/netfilter/ipset/ip_set_hash_netportnet.c +++ b/net/netfilter/ipset/ip_set_hash_netportnet.c @@ -181,7 +181,8 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], struct hash_netportnet4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0, p = 0, port, port_to; - u32 ip2_from = 0, ip2_to = 0, ip2; + u32 ip2_from = 0, ip2_to = 0, ip2, ipn; + u64 n = 0, m = 0; bool with_ports = false; int ret; @@ -283,6 +284,19 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], } else { ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]); } + ipn = ip; + do { + ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]); + n++; + } while (ipn++ < ip_to); + ipn = ip2_from; + do { + ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]); + m++; + } while (ipn++ < ip2_to); + + if (n*m*(port_to - port + 1) > IPSET_MAX_RANGE) + return -ERANGE; if (retried) { ip = ntohl(h->next.ip[0]); From 5689eba90a20646ec3d7dd50360e28c7d5fdbbfe Mon Sep 17 00:00:00 2001 From: Vishwanath Pai Date: Wed, 28 Sep 2022 14:26:50 -0400 Subject: [PATCH 0997/1477] netfilter: ipset: regression in ip_set_hash_ip.c [ Upstream commit c7aa1a76d4a0a3c401025b60c401412bbb60f8c6 ] This patch introduced a regression: commit 48596a8ddc46 ("netfilter: ipset: Fix adding an IPv4 range containing more than 2^31 addresses") The variable e.ip is passed to adtfn() function which finally adds the ip address to the set. The patch above refactored the for loop and moved e.ip = htonl(ip) to the end of the for loop. What this means is that if the value of "ip" changes between the first assignement of e.ip and the forloop, then e.ip is pointing to a different ip address than "ip". Test case: $ ipset create jdtest_tmp hash:ip family inet hashsize 2048 maxelem 100000 $ ipset add jdtest_tmp 10.0.1.1/31 ipset v6.21.1: Element cannot be added to the set: it's already added The value of ip gets updated inside the "else if (tb[IPSET_ATTR_CIDR])" block but e.ip is still pointing to the old value. Fixes: 48596a8ddc46 ("netfilter: ipset: Fix adding an IPv4 range containing more than 2^31 addresses") Reviewed-by: Joshua Hunt Signed-off-by: Vishwanath Pai Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/ipset/ip_set_hash_ip.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index 361f4fd69bf4..d7a81b2250e7 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -150,18 +150,16 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], if (((u64)ip_to - ip + 1) >> (32 - h->netmask) > IPSET_MAX_RANGE) return -ERANGE; - if (retried) { + if (retried) ip = ntohl(h->next.ip); - e.ip = htonl(ip); - } for (; ip <= ip_to;) { + e.ip = htonl(ip); ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; ip += hosts; - e.ip = htonl(ip); - if (e.ip == 0) + if (ip == 0) return 0; ret = 0; From e06ff9f8fedfc523d4f925ee1c0f06e26e1cefd6 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Thu, 20 Oct 2022 12:25:59 +0300 Subject: [PATCH 0998/1477] net/mlx5: Fix FW tracer timestamp calculation [ Upstream commit 61db3d7b99a367416e489ccf764cc5f9b00d62a1 ] Fix a bug in calculation of FW tracer timestamp. Decreasing one in the calculation should effect only bits 52_7 and not effect bits 6_0 of the timestamp, otherwise bits 6_0 are always set in this calculation. Fixes: 70dd6fdb8987 ("net/mlx5: FW tracer, parse traces and kernel tracing support") Signed-off-by: Moshe Shemesh Reviewed-by: Feras Daoud Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c index e8a4adccd2b2..f800e1ca5ba6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c @@ -638,7 +638,7 @@ static void mlx5_tracer_handle_timestamp_trace(struct mlx5_fw_tracer *tracer, trace_timestamp = (timestamp_event.timestamp & MASK_52_7) | (str_frmt->timestamp & MASK_6_0); else - trace_timestamp = ((timestamp_event.timestamp & MASK_52_7) - 1) | + trace_timestamp = ((timestamp_event.timestamp - 1) & MASK_52_7) | (str_frmt->timestamp & MASK_6_0); mlx5_tracer_print_trace(str_frmt, dev, trace_timestamp); From 891daa95b0bb3e739bcea577b6956567ad11005d Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Thu, 17 Nov 2022 09:07:20 +0200 Subject: [PATCH 0999/1477] net/mlx5: Fix handling of entry refcount when command is not issued to FW [ Upstream commit aaf2e65cac7f2e1ae729c2fbc849091df9699f96 ] In case command interface is down, or the command is not allowed, driver did not increment the entry refcount, but might have decrement as part of forced completion handling. Fix that by always increment and decrement the refcount to make it symmetric for all flows. Fixes: 50b2412b7e78 ("net/mlx5: Avoid possible free of command entry while timeout comp handler") Signed-off-by: Eran Ben Elisha Signed-off-by: Moshe Shemesh Reported-by: Jack Wang Tested-by: Jack Wang Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index cf07318048df..c838d8698eab 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -959,6 +959,7 @@ static void cmd_work_handler(struct work_struct *work) cmd_ent_get(ent); set_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state); + cmd_ent_get(ent); /* for the _real_ FW event on completion */ /* Skip sending command to fw if internal error */ if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, ent->op)) { u8 status = 0; @@ -972,7 +973,6 @@ static void cmd_work_handler(struct work_struct *work) return; } - cmd_ent_get(ent); /* for the _real_ FW event on completion */ /* ring doorbell after the descriptor is valid */ mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx); wmb(); @@ -1586,8 +1586,8 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force cmd_ent_put(ent); /* timeout work was canceled */ if (!forced || /* Real FW completion */ - pci_channel_offline(dev->pdev) || /* FW is inaccessible */ - dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) + mlx5_cmd_is_down(dev) || /* No real FW completion is expected */ + !opcode_allowed(cmd, ent->op)) cmd_ent_put(ent); ent->ts2 = ktime_get_ns(); From e87a077d09c05985a0edac7c6c49bb307f775d12 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 18 Nov 2022 16:45:00 -0500 Subject: [PATCH 1000/1477] tipc: set con sock in tipc_conn_alloc [ Upstream commit 0e5d56c64afcd6fd2d132ea972605b66f8a7d3c4 ] A crash was reported by Wei Chen: BUG: kernel NULL pointer dereference, address: 0000000000000018 RIP: 0010:tipc_conn_close+0x12/0x100 Call Trace: tipc_topsrv_exit_net+0x139/0x320 ops_exit_list.isra.9+0x49/0x80 cleanup_net+0x31a/0x540 process_one_work+0x3fa/0x9f0 worker_thread+0x42/0x5c0 It was caused by !con->sock in tipc_conn_close(). In tipc_topsrv_accept(), con is allocated in conn_idr then its sock is set: con = tipc_conn_alloc(); ... <----[1] con->sock = newsock; If tipc_conn_close() is called in anytime of [1], the null-pointer-def is triggered by con->sock->sk due to con->sock is not yet set. This patch fixes it by moving the con->sock setting to tipc_conn_alloc() under s->idr_lock. So that con->sock can never be NULL when getting the con from s->conn_idr. It will be also safer to move con->server and flag CF_CONNECTED setting under s->idr_lock, as they should all be set before tipc_conn_alloc() is called. Fixes: c5fa7b3cf3cb ("tipc: introduce new TIPC server infrastructure") Reported-by: Wei Chen Signed-off-by: Xin Long Acked-by: Jon Maloy Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/tipc/topsrv.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c index 561e709ae06a..dcd4afb3640f 100644 --- a/net/tipc/topsrv.c +++ b/net/tipc/topsrv.c @@ -176,7 +176,7 @@ static void tipc_conn_close(struct tipc_conn *con) conn_put(con); } -static struct tipc_conn *tipc_conn_alloc(struct tipc_topsrv *s) +static struct tipc_conn *tipc_conn_alloc(struct tipc_topsrv *s, struct socket *sock) { struct tipc_conn *con; int ret; @@ -202,10 +202,11 @@ static struct tipc_conn *tipc_conn_alloc(struct tipc_topsrv *s) } con->conid = ret; s->idr_in_use++; - spin_unlock_bh(&s->idr_lock); set_bit(CF_CONNECTED, &con->flags); con->server = s; + con->sock = sock; + spin_unlock_bh(&s->idr_lock); return con; } @@ -467,7 +468,7 @@ static void tipc_topsrv_accept(struct work_struct *work) ret = kernel_accept(lsock, &newsock, O_NONBLOCK); if (ret < 0) return; - con = tipc_conn_alloc(srv); + con = tipc_conn_alloc(srv, newsock); if (IS_ERR(con)) { ret = PTR_ERR(con); sock_release(newsock); @@ -479,7 +480,6 @@ static void tipc_topsrv_accept(struct work_struct *work) newsk->sk_data_ready = tipc_conn_data_ready; newsk->sk_write_space = tipc_conn_write_space; newsk->sk_user_data = con; - con->sock = newsock; write_unlock_bh(&newsk->sk_callback_lock); /* Wake up receive process in case of 'SYN+' message */ @@ -577,12 +577,11 @@ bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower, sub.filter = filter; *(u64 *)&sub.usr_handle = (u64)port; - con = tipc_conn_alloc(tipc_topsrv(net)); + con = tipc_conn_alloc(tipc_topsrv(net), NULL); if (IS_ERR(con)) return false; *conid = con->conid; - con->sock = NULL; rc = tipc_conn_rcv_sub(tipc_topsrv(net), con, &sub); if (rc >= 0) return true; From 4058e3b74ab3eabe0835cee9a0c6deda79e8a295 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 18 Nov 2022 16:45:01 -0500 Subject: [PATCH 1001/1477] tipc: add an extra conn_get in tipc_conn_alloc [ Upstream commit a7b42969d63f47320853a802efd879fbdc4e010e ] One extra conn_get() is needed in tipc_conn_alloc(), as after tipc_conn_alloc() is called, tipc_conn_close() may free this con before deferencing it in tipc_topsrv_accept(): tipc_conn_alloc(); newsk = newsock->sk; <---- tipc_conn_close(); write_lock_bh(&sk->sk_callback_lock); newsk->sk_data_ready = tipc_conn_data_ready; Then an uaf issue can be triggered: BUG: KASAN: use-after-free in tipc_topsrv_accept+0x1e7/0x370 [tipc] Call Trace: dump_stack_lvl+0x33/0x46 print_report+0x178/0x4b0 kasan_report+0x8c/0x100 kasan_check_range+0x179/0x1e0 tipc_topsrv_accept+0x1e7/0x370 [tipc] process_one_work+0x6a3/0x1030 worker_thread+0x8a/0xdf0 This patch fixes it by holding it in tipc_conn_alloc(), then after all accessing in tipc_topsrv_accept() releasing it. Note when does this in tipc_topsrv_kern_subscr(), as tipc_conn_rcv_sub() returns 0 or -1 only, we don't need to check for "> 0". Fixes: c5fa7b3cf3cb ("tipc: introduce new TIPC server infrastructure") Signed-off-by: Xin Long Acked-by: Jon Maloy Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/tipc/topsrv.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c index dcd4afb3640f..89d8a2bd30cd 100644 --- a/net/tipc/topsrv.c +++ b/net/tipc/topsrv.c @@ -206,6 +206,7 @@ static struct tipc_conn *tipc_conn_alloc(struct tipc_topsrv *s, struct socket *s set_bit(CF_CONNECTED, &con->flags); con->server = s; con->sock = sock; + conn_get(con); spin_unlock_bh(&s->idr_lock); return con; @@ -484,6 +485,7 @@ static void tipc_topsrv_accept(struct work_struct *work) /* Wake up receive process in case of 'SYN+' message */ newsk->sk_data_ready(newsk); + conn_put(con); } } @@ -583,10 +585,11 @@ bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower, *conid = con->conid; rc = tipc_conn_rcv_sub(tipc_topsrv(net), con, &sub); - if (rc >= 0) - return true; + if (rc) + conn_put(con); + conn_put(con); - return false; + return !rc; } void tipc_topsrv_kern_unsubscr(struct net *net, int conid) From c7788361a645708a51924d8aa63ead223923a5a8 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Sat, 19 Nov 2022 15:28:32 +0800 Subject: [PATCH 1002/1477] tipc: check skb_linearize() return value in tipc_disc_rcv() [ Upstream commit cd0f6421162201e4b22ce757a1966729323185eb ] If skb_linearize() fails in tipc_disc_rcv(), we need to free the skb instead of handle it. Fixes: 25b0b9c4e835 ("tipc: handle collisions of 32-bit node address hash values") Signed-off-by: YueHaibing Acked-by: Jon Maloy Link: https://lore.kernel.org/r/20221119072832.7896-1-yuehaibing@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/tipc/discover.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/tipc/discover.c b/net/tipc/discover.c index 2ae268b67465..2730310249e3 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -210,7 +210,10 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb, u32 self; int err; - skb_linearize(skb); + if (skb_linearize(skb)) { + kfree_skb(skb); + return; + } hdr = buf_msg(skb); if (caps & TIPC_NODE_ID128) From 6b638a16ead12e3759d80322ab8cabebfd4d7165 Mon Sep 17 00:00:00 2001 From: Chen Zhongjin Date: Thu, 3 Nov 2022 17:07:13 +0800 Subject: [PATCH 1003/1477] xfrm: Fix ignored return value in xfrm6_init() [ Upstream commit 40781bfb836eda57d19c0baa37c7e72590e05fdc ] When IPv6 module initializing in xfrm6_init(), register_pernet_subsys() is possible to fail but its return value is ignored. If IPv6 initialization fails later and xfrm6_fini() is called, removing uninitialized list in xfrm6_net_ops will cause null-ptr-deref: KASAN: null-ptr-deref in range [0x0000000000000008-0x000000000000000f] CPU: 1 PID: 330 Comm: insmod RIP: 0010:unregister_pernet_operations+0xc9/0x450 Call Trace: unregister_pernet_subsys+0x31/0x3e xfrm6_fini+0x16/0x30 [ipv6] ip6_route_init+0xcd/0x128 [ipv6] inet6_init+0x29c/0x602 [ipv6] ... Fix it by catching the error return value of register_pernet_subsys(). Fixes: 8d068875caca ("xfrm: make gc_thresh configurable in all namespaces") Signed-off-by: Chen Zhongjin Reviewed-by: Leon Romanovsky Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/ipv6/xfrm6_policy.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index af7a4b8b1e9c..247296e3294b 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -289,9 +289,13 @@ int __init xfrm6_init(void) if (ret) goto out_state; - register_pernet_subsys(&xfrm6_net_ops); + ret = register_pernet_subsys(&xfrm6_net_ops); + if (ret) + goto out_protocol; out: return ret; +out_protocol: + xfrm6_protocol_fini(); out_state: xfrm6_state_fini(); out_policy: From 8e2664e12bc6fa8d2aa642a7027d6a5c2810a674 Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Thu, 17 Nov 2022 15:50:09 +0800 Subject: [PATCH 1004/1477] sfc: fix potential memleak in __ef100_hard_start_xmit() [ Upstream commit aad98abd5cb8133507f22654f56bcb443aaa2d89 ] The __ef100_hard_start_xmit() returns NETDEV_TX_OK without freeing skb in error handling case, add dev_kfree_skb_any() to fix it. Fixes: 51b35a454efd ("sfc: skeleton EF100 PF driver") Signed-off-by: Zhang Changzhong Acked-by: Martin Habets Reviewed-by: Leon Romanovsky Link: https://lore.kernel.org/r/1668671409-10909-1-git-send-email-zhangchangzhong@huawei.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/sfc/ef100_netdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/sfc/ef100_netdev.c b/drivers/net/ethernet/sfc/ef100_netdev.c index 67fe44db6b61..63a44ee763be 100644 --- a/drivers/net/ethernet/sfc/ef100_netdev.c +++ b/drivers/net/ethernet/sfc/ef100_netdev.c @@ -200,6 +200,7 @@ static netdev_tx_t ef100_hard_start_xmit(struct sk_buff *skb, skb->len, skb->data_len, channel->channel); if (!efx->n_channels || !efx->n_tx_channels || !channel) { netif_stop_queue(net_dev); + dev_kfree_skb_any(skb); goto err; } From 68a7aec3f4b55caae63a77881dff2c07a2a8f883 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 18 Nov 2022 16:33:03 -0500 Subject: [PATCH 1005/1477] net: sched: allow act_ct to be built without NF_NAT [ Upstream commit 8427fd100c7b7793650e212a81e42f1cf124613d ] In commit f11fe1dae1c4 ("net/sched: Make NET_ACT_CT depends on NF_NAT"), it fixed the build failure when NF_NAT is m and NET_ACT_CT is y by adding depends on NF_NAT for NET_ACT_CT. However, it would also cause NET_ACT_CT cannot be built without NF_NAT, which is not expected. This patch fixes it by changing to use "(!NF_NAT || NF_NAT)" as the depend. Fixes: f11fe1dae1c4 ("net/sched: Make NET_ACT_CT depends on NF_NAT") Signed-off-by: Xin Long Link: https://lore.kernel.org/r/b6386f28d1ba34721795fb776a91cbdabb203447.1668807183.git.lucien.xin@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/sched/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/Kconfig b/net/sched/Kconfig index d762e89ab74f..bc4e5da76fa6 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -976,7 +976,7 @@ config NET_ACT_TUNNEL_KEY config NET_ACT_CT tristate "connection tracking tc action" - depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT && NF_FLOW_TABLE + depends on NET_CLS_ACT && NF_CONNTRACK && (!NF_NAT || NF_NAT) && NF_FLOW_TABLE help Say Y here to allow sending the packets to conntrack module. From 32b944b9c4b25227ec8d983c27532d4f4a65d8a4 Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Fri, 18 Nov 2022 16:24:19 +0800 Subject: [PATCH 1006/1477] NFC: nci: fix memory leak in nci_rx_data_packet() [ Upstream commit 53270fb0fd77fe786d8c07a0793981d797836b93 ] Syzbot reported a memory leak about skb: unreferenced object 0xffff88810e144e00 (size 240): comm "syz-executor284", pid 3701, jiffies 4294952403 (age 12.620s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] __alloc_skb+0x1f9/0x270 net/core/skbuff.c:497 [] alloc_skb include/linux/skbuff.h:1267 [inline] [] virtual_ncidev_write+0x24/0xe0 drivers/nfc/virtual_ncidev.c:116 [] do_loop_readv_writev fs/read_write.c:759 [inline] [] do_loop_readv_writev fs/read_write.c:743 [inline] [] do_iter_write+0x253/0x300 fs/read_write.c:863 [] vfs_writev+0xdd/0x240 fs/read_write.c:934 [] do_writev+0xa6/0x1c0 fs/read_write.c:977 [] do_syscall_x64 arch/x86/entry/common.c:50 [inline] [] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 [] entry_SYSCALL_64_after_hwframe+0x63/0xcd In nci_rx_data_packet(), if we don't get a valid conn_info, we will return directly but forget to release the skb. Reported-by: syzbot+cdb9a427d1bc08815104@syzkaller.appspotmail.com Fixes: 4aeee6871e8c ("NFC: nci: Add dynamic logical connections support") Signed-off-by: Liu Shixin Link: https://lore.kernel.org/r/20221118082419.239475-1-liushixin2@huawei.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/nfc/nci/data.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c index b002e18f38c8..b4548d887489 100644 --- a/net/nfc/nci/data.c +++ b/net/nfc/nci/data.c @@ -279,8 +279,10 @@ void nci_rx_data_packet(struct nci_dev *ndev, struct sk_buff *skb) nci_plen(skb->data)); conn_info = nci_get_conn_info_by_conn_id(ndev, nci_conn_id(skb->data)); - if (!conn_info) + if (!conn_info) { + kfree_skb(skb); return; + } /* strip the nci data header */ skb_pull(skb, NCI_DATA_HDR_SIZE); From f81e9c0510b02e5f096c16c2f419248f48f647f0 Mon Sep 17 00:00:00 2001 From: Andreas Kemnade Date: Sun, 20 Nov 2022 23:12:07 +0100 Subject: [PATCH 1007/1477] regulator: twl6030: re-add TWL6032_SUBCLASS [ Upstream commit 3d6c982b26db94cc21bc9f7784f63e8286b7be62 ] In former times, info->feature was populated via the parent driver by pdata/regulator_init_data->driver_data for all regulators when USB_PRODUCT_ID_LSB indicates a TWL6032. Today, the information is not set, so re-add it at the regulator definitions. Fixes: 25d82337705e2 ("regulator: twl: make driver DT only") Signed-off-by: Andreas Kemnade Link: https://lore.kernel.org/r/20221120221208.3093727-2-andreas@kemnade.info Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/regulator/twl6030-regulator.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/regulator/twl6030-regulator.c b/drivers/regulator/twl6030-regulator.c index 430265c404d6..7c7e3648ea4b 100644 --- a/drivers/regulator/twl6030-regulator.c +++ b/drivers/regulator/twl6030-regulator.c @@ -530,6 +530,7 @@ static const struct twlreg_info TWL6030_INFO_##label = { \ #define TWL6032_ADJUSTABLE_LDO(label, offset) \ static const struct twlreg_info TWL6032_INFO_##label = { \ .base = offset, \ + .features = TWL6032_SUBCLASS, \ .desc = { \ .name = #label, \ .id = TWL6032_REG_##label, \ @@ -562,6 +563,7 @@ static const struct twlreg_info TWLFIXED_INFO_##label = { \ #define TWL6032_ADJUSTABLE_SMPS(label, offset) \ static const struct twlreg_info TWLSMPS_INFO_##label = { \ .base = offset, \ + .features = TWL6032_SUBCLASS, \ .desc = { \ .name = #label, \ .id = TWL6032_REG_##label, \ From c40b76dfa7e4b27e861ac7722d1a9f974997d0f0 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Sat, 19 Nov 2022 15:02:02 +0800 Subject: [PATCH 1008/1477] bnx2x: fix pci device refcount leak in bnx2x_vf_is_pcie_pending() [ Upstream commit 3637a29ccbb6461b7268c5c5db525935d510afc6 ] As comment of pci_get_domain_bus_and_slot() says, it returns a pci device with refcount increment, when finish using it, the caller must decrement the reference count by calling pci_dev_put(). Call pci_dev_put() before returning from bnx2x_vf_is_pcie_pending() to avoid refcount leak. Fixes: b56e9670ffa4 ("bnx2x: Prepare device and initialize VF database") Suggested-by: Jakub Kicinski Signed-off-by: Yang Yingliang Reviewed-by: Leon Romanovsky Link: https://lore.kernel.org/r/20221119070202.1407648-1-yangyingliang@huawei.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c index 08437eaacbb9..ac327839eed9 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c @@ -795,16 +795,20 @@ static void bnx2x_vf_enable_traffic(struct bnx2x *bp, struct bnx2x_virtf *vf) static u8 bnx2x_vf_is_pcie_pending(struct bnx2x *bp, u8 abs_vfid) { - struct pci_dev *dev; struct bnx2x_virtf *vf = bnx2x_vf_by_abs_fid(bp, abs_vfid); + struct pci_dev *dev; + bool pending; if (!vf) return false; dev = pci_get_domain_bus_and_slot(vf->domain, vf->bus, vf->devfn); - if (dev) - return bnx2x_is_pcie_pending(dev); - return false; + if (!dev) + return false; + pending = bnx2x_is_pcie_pending(dev); + pci_dev_put(dev); + + return pending; } int bnx2x_vf_flr_clnup_epilog(struct bnx2x *bp, u8 abs_vfid) From af9de5cdcb1088b1f9b61814bfa8752f39c13b97 Mon Sep 17 00:00:00 2001 From: Dawei Li Date: Sat, 5 Nov 2022 00:05:36 +0800 Subject: [PATCH 1009/1477] dma-buf: fix racing conflict of dma_heap_add() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 432e25902b9651622578c6248e549297d03caf66 ] Racing conflict could be: task A task B list_for_each_entry strcmp(h->name)) list_for_each_entry strcmp(h->name) kzalloc kzalloc ...... ..... device_create device_create list_add list_add The root cause is that task B has no idea about the fact someone else(A) has inserted heap with same name when it calls list_add, so a potential collision occurs. Fixes: c02a81fba74f ("dma-buf: Add dma-buf heaps framework") Signed-off-by: Dawei Li Acked-by: Andrew Davis Acked-by: Christian König Signed-off-by: Sumit Semwal Link: https://patchwork.freedesktop.org/patch/msgid/TYCP286MB2323873BBDF88020781FB986CA3B9@TYCP286MB2323.JPNP286.PROD.OUTLOOK.COM Signed-off-by: Sasha Levin --- drivers/dma-buf/dma-heap.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/dma-buf/dma-heap.c b/drivers/dma-buf/dma-heap.c index 798f86fcd50f..dcbb023acc45 100644 --- a/drivers/dma-buf/dma-heap.c +++ b/drivers/dma-buf/dma-heap.c @@ -209,18 +209,6 @@ struct dma_heap *dma_heap_add(const struct dma_heap_export_info *exp_info) return ERR_PTR(-EINVAL); } - /* check the name is unique */ - mutex_lock(&heap_list_lock); - list_for_each_entry(h, &heap_list, list) { - if (!strcmp(h->name, exp_info->name)) { - mutex_unlock(&heap_list_lock); - pr_err("dma_heap: Already registered heap named %s\n", - exp_info->name); - return ERR_PTR(-EINVAL); - } - } - mutex_unlock(&heap_list_lock); - heap = kzalloc(sizeof(*heap), GFP_KERNEL); if (!heap) return ERR_PTR(-ENOMEM); @@ -259,13 +247,27 @@ struct dma_heap *dma_heap_add(const struct dma_heap_export_info *exp_info) err_ret = ERR_CAST(dev_ret); goto err2; } - /* Add heap to the list */ + mutex_lock(&heap_list_lock); + /* check the name is unique */ + list_for_each_entry(h, &heap_list, list) { + if (!strcmp(h->name, exp_info->name)) { + mutex_unlock(&heap_list_lock); + pr_err("dma_heap: Already registered heap named %s\n", + exp_info->name); + err_ret = ERR_PTR(-EINVAL); + goto err3; + } + } + + /* Add heap to the list */ list_add(&heap->list, &heap_list); mutex_unlock(&heap_list_lock); return heap; +err3: + device_destroy(dma_heap_class, heap->heap_devt); err2: cdev_del(&heap->heap_cdev); err1: From b8e494240e69f91517256adcd6fda62d0671772d Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 21 Nov 2022 19:26:15 +0100 Subject: [PATCH 1010/1477] netfilter: flowtable_offload: add missing locking [ Upstream commit bcd9e3c1656d0f7dd9743598c65c3ae24efb38d0 ] nf_flow_table_block_setup and the driver TC_SETUP_FT call can modify the flow block cb list while they are being traversed elsewhere, causing a crash. Add a write lock around the calls to protect readers Fixes: c29f74e0df7a ("netfilter: nf_flow_table: hardware offload support") Reported-by: Chad Monroe Signed-off-by: Felix Fietkau Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_flow_table_offload.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index d1862782be45..28306cb66719 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -910,6 +910,7 @@ static int nf_flow_table_block_setup(struct nf_flowtable *flowtable, struct flow_block_cb *block_cb, *next; int err = 0; + down_write(&flowtable->flow_block_lock); switch (cmd) { case FLOW_BLOCK_BIND: list_splice(&bo->cb_list, &flowtable->flow_block.cb_list); @@ -924,6 +925,7 @@ static int nf_flow_table_block_setup(struct nf_flowtable *flowtable, WARN_ON_ONCE(1); err = -EOPNOTSUPP; } + up_write(&flowtable->flow_block_lock); return err; } @@ -980,7 +982,9 @@ static int nf_flow_table_offload_cmd(struct flow_block_offload *bo, nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable, extack); + down_write(&flowtable->flow_block_lock); err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, bo); + up_write(&flowtable->flow_block_lock); if (err < 0) return err; From c0af4d005a260a5f93b5784d6dc26e2cd2ef18dd Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 18 Nov 2022 17:49:11 -0800 Subject: [PATCH 1011/1477] dccp/tcp: Reset saddr on failure after inet6?_hash_connect(). [ Upstream commit 77934dc6db0d2b111a8f2759e9ad2fb67f5cffa5 ] When connect() is called on a socket bound to the wildcard address, we change the socket's saddr to a local address. If the socket fails to connect() to the destination, we have to reset the saddr. However, when an error occurs after inet_hash6?_connect() in (dccp|tcp)_v[46]_conect(), we forget to reset saddr and leave the socket bound to the address. From the user's point of view, whether saddr is reset or not varies with errno. Let's fix this inconsistent behaviour. Note that after this patch, the repro [0] will trigger the WARN_ON() in inet_csk_get_port() again, but this patch is not buggy and rather fixes a bug papering over the bhash2's bug for which we need another fix. For the record, the repro causes -EADDRNOTAVAIL in inet_hash6_connect() by this sequence: s1 = socket() s1.setsockopt(SOL_SOCKET, SO_REUSEADDR, 1) s1.bind(('127.0.0.1', 10000)) s1.sendto(b'hello', MSG_FASTOPEN, (('127.0.0.1', 10000))) # or s1.connect(('127.0.0.1', 10000)) s2 = socket() s2.setsockopt(SOL_SOCKET, SO_REUSEADDR, 1) s2.bind(('0.0.0.0', 10000)) s2.connect(('127.0.0.1', 10000)) # -EADDRNOTAVAIL s2.listen(32) # WARN_ON(inet_csk(sk)->icsk_bind2_hash != tb2); [0]: https://syzkaller.appspot.com/bug?extid=015d756bbd1f8b5c8f09 Fixes: 3df80d9320bc ("[DCCP]: Introduce DCCPv6") Fixes: 7c657876b63c ("[DCCP]: Initial implementation") Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Acked-by: Joanne Koong Reviewed-by: Eric Dumazet Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/dccp/ipv4.c | 2 ++ net/dccp/ipv6.c | 2 ++ net/ipv4/tcp_ipv4.c | 2 ++ net/ipv6/tcp_ipv6.c | 2 ++ 4 files changed, 8 insertions(+) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 2455b0c0e486..a2a8b952b3c5 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -130,6 +130,8 @@ failure: * This unhashes the socket and releases the local port, if necessary. */ dccp_set_state(sk, DCCP_CLOSED); + if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) + inet_reset_saddr(sk); ip_rt_put(rt); sk->sk_route_caps = 0; inet->inet_dport = 0; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 2be5c69824f9..21c61a9c3b15 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -957,6 +957,8 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, late_failure: dccp_set_state(sk, DCCP_CLOSED); + if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) + inet_reset_saddr(sk); __sk_dst_reset(sk); failure: inet->inet_dport = 0; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 31a8009f74ee..8bd7b1ec3b6a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -322,6 +322,8 @@ failure: * if necessary. */ tcp_set_state(sk, TCP_CLOSE); + if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) + inet_reset_saddr(sk); ip_rt_put(rt); sk->sk_route_caps = 0; inet->inet_dport = 0; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index a558dd9d177b..c599e14be414 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -339,6 +339,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, late_failure: tcp_set_state(sk, TCP_CLOSE); + if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) + inet_reset_saddr(sk); failure: inet->inet_dport = 0; sk->sk_route_caps = 0; From aeebb074997251c150d042a33a8673bfea53c247 Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Sun, 20 Nov 2022 15:28:38 +0800 Subject: [PATCH 1012/1477] ipv4: Fix error return code in fib_table_insert() [ Upstream commit 568fe84940ac0e4e0b2cd7751b8b4911f7b9c215 ] In fib_table_insert(), if the alias was already inserted, but node not exist, the error code should be set before return from error handling path. Fixes: a6c76c17df02 ("ipv4: Notify route after insertion to the routing table") Signed-off-by: Ziyang Xuan Link: https://lore.kernel.org/r/20221120072838.2167047-1-william.xuanziyang@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv4/fib_trie.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index a28f525e2c47..d11fb16234a6 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1331,8 +1331,10 @@ int fib_table_insert(struct net *net, struct fib_table *tb, /* The alias was already inserted, so the node must exist. */ l = l ? l : fib_find_node(t, &tp, key); - if (WARN_ON_ONCE(!l)) + if (WARN_ON_ONCE(!l)) { + err = -ENOENT; goto out_free_new_fa; + } if (fib_find_alias(&l->leaf, new_fa->fa_slen, 0, 0, tb->tb_id, true) == new_fa) { From e61b00374a6e45c2a13b00d06e09d0000b3386ca Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Wed, 23 Nov 2022 17:07:18 +0100 Subject: [PATCH 1013/1477] s390/dasd: fix no record found for raw_track_access [ Upstream commit 590ce6d96d6a224b470a3862c33a483d5022bfdb ] For DASD devices in raw_track_access mode only full track images are read and written. For this purpose it is not necessary to do search operation in the locate record extended function. The documentation even states that this might fail if the searched record is not found on a track. Currently the driver sets a value of 1 in the search field for the first record after record zero. This is the default for disks not in raw_track_access mode but record 1 might be missing on a completely empty track. There has not been any problem with this on IBM storage servers but it might lead to errors with DASD devices on other vendors storage servers. Fix this by setting the search field to 0. Record zero is always available even on a completely empty track. Fixes: e4dbb0f2b5dd ("[S390] dasd: Add support for raw ECKD access.") Signed-off-by: Stefan Haberland Reviewed-by: Jan Hoeppner Link: https://lore.kernel.org/r/20221123160719.3002694-4-sth@linux.ibm.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/s390/block/dasd_eckd.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 7749deb614d7..53d22975a32f 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -4627,7 +4627,6 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_raw(struct dasd_device *startdev, struct dasd_device *basedev; struct req_iterator iter; struct dasd_ccw_req *cqr; - unsigned int first_offs; unsigned int trkcount; unsigned long *idaws; unsigned int size; @@ -4661,7 +4660,6 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_raw(struct dasd_device *startdev, last_trk = (blk_rq_pos(req) + blk_rq_sectors(req) - 1) / DASD_RAW_SECTORS_PER_TRACK; trkcount = last_trk - first_trk + 1; - first_offs = 0; if (rq_data_dir(req) == READ) cmd = DASD_ECKD_CCW_READ_TRACK; @@ -4705,13 +4703,13 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_raw(struct dasd_device *startdev, if (use_prefix) { prefix_LRE(ccw++, data, first_trk, last_trk, cmd, basedev, - startdev, 1, first_offs + 1, trkcount, 0, 0); + startdev, 1, 0, trkcount, 0, 0); } else { define_extent(ccw++, data, first_trk, last_trk, cmd, basedev, 0); ccw[-1].flags |= CCW_FLAG_CC; data += sizeof(struct DE_eckd_data); - locate_record_ext(ccw++, data, first_trk, first_offs + 1, + locate_record_ext(ccw++, data, first_trk, 0, trkcount, cmd, basedev, 0, 0); } From 4d2be0cf27d9b7b508bf9d7b524cfb79e31a6311 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Thu, 28 Jan 2021 20:48:02 +0100 Subject: [PATCH 1014/1477] net: arcnet: Fix RESET flag handling [ Upstream commit 01365633bd1c836240f9bbf86bbeee749795480a ] The main arcnet interrupt handler calls arcnet_close() then arcnet_open(), if the RESET status flag is encountered. This is invalid: 1) In general, interrupt handlers should never call ->ndo_stop() and ->ndo_open() functions. They are usually full of blocking calls and other methods that are expected to be called only from drivers init and exit code paths. 2) arcnet_close() contains a del_timer_sync(). If the irq handler interrupts the to-be-deleted timer, del_timer_sync() will just loop forever. 3) arcnet_close() also calls tasklet_kill(), which has a warning if called from irq context. 4) For device reset, the sequence "arcnet_close(); arcnet_open();" is not complete. Some children arcnet drivers have special init/exit code sequences, which then embed a call to arcnet_open() and arcnet_close() accordingly. Check drivers/net/arcnet/com20020.c. Run the device RESET sequence from a scheduled workqueue instead. Signed-off-by: Ahmed S. Darwish Signed-off-by: Sebastian Andrzej Siewior Link: https://lore.kernel.org/r/20210128194802.727770-1-a.darwish@linutronix.de Signed-off-by: Jakub Kicinski Stable-dep-of: 1c40cde6b517 ("arcnet: fix potential memory leak in com20020_probe()") Signed-off-by: Sasha Levin --- drivers/net/arcnet/arc-rimi.c | 4 +- drivers/net/arcnet/arcdevice.h | 6 +++ drivers/net/arcnet/arcnet.c | 66 +++++++++++++++++++++++++++++-- drivers/net/arcnet/com20020-isa.c | 4 +- drivers/net/arcnet/com20020-pci.c | 2 +- drivers/net/arcnet/com20020_cs.c | 2 +- drivers/net/arcnet/com90io.c | 4 +- drivers/net/arcnet/com90xx.c | 4 +- 8 files changed, 78 insertions(+), 14 deletions(-) diff --git a/drivers/net/arcnet/arc-rimi.c b/drivers/net/arcnet/arc-rimi.c index 98df38fe553c..12d085405bd0 100644 --- a/drivers/net/arcnet/arc-rimi.c +++ b/drivers/net/arcnet/arc-rimi.c @@ -332,7 +332,7 @@ static int __init arc_rimi_init(void) dev->irq = 9; if (arcrimi_probe(dev)) { - free_netdev(dev); + free_arcdev(dev); return -EIO; } @@ -349,7 +349,7 @@ static void __exit arc_rimi_exit(void) iounmap(lp->mem_start); release_mem_region(dev->mem_start, dev->mem_end - dev->mem_start + 1); free_irq(dev->irq, dev); - free_netdev(dev); + free_arcdev(dev); } #ifndef MODULE diff --git a/drivers/net/arcnet/arcdevice.h b/drivers/net/arcnet/arcdevice.h index 22a49c6d7ae6..5d4a4c7efbbf 100644 --- a/drivers/net/arcnet/arcdevice.h +++ b/drivers/net/arcnet/arcdevice.h @@ -298,6 +298,10 @@ struct arcnet_local { int excnak_pending; /* We just got an excesive nak interrupt */ + /* RESET flag handling */ + int reset_in_progress; + struct work_struct reset_work; + struct { uint16_t sequence; /* sequence number (incs with each packet) */ __be16 aborted_seq; @@ -350,7 +354,9 @@ void arcnet_dump_skb(struct net_device *dev, struct sk_buff *skb, char *desc) void arcnet_unregister_proto(struct ArcProto *proto); irqreturn_t arcnet_interrupt(int irq, void *dev_id); + struct net_device *alloc_arcdev(const char *name); +void free_arcdev(struct net_device *dev); int arcnet_open(struct net_device *dev); int arcnet_close(struct net_device *dev); diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c index e04efc0a5c97..d76dd7d14299 100644 --- a/drivers/net/arcnet/arcnet.c +++ b/drivers/net/arcnet/arcnet.c @@ -387,10 +387,44 @@ static void arcnet_timer(struct timer_list *t) struct arcnet_local *lp = from_timer(lp, t, timer); struct net_device *dev = lp->dev; - if (!netif_carrier_ok(dev)) { + spin_lock_irq(&lp->lock); + + if (!lp->reset_in_progress && !netif_carrier_ok(dev)) { netif_carrier_on(dev); netdev_info(dev, "link up\n"); } + + spin_unlock_irq(&lp->lock); +} + +static void reset_device_work(struct work_struct *work) +{ + struct arcnet_local *lp; + struct net_device *dev; + + lp = container_of(work, struct arcnet_local, reset_work); + dev = lp->dev; + + /* Do not bring the network interface back up if an ifdown + * was already done. + */ + if (!netif_running(dev) || !lp->reset_in_progress) + return; + + rtnl_lock(); + + /* Do another check, in case of an ifdown that was triggered in + * the small race window between the exit condition above and + * acquiring RTNL. + */ + if (!netif_running(dev) || !lp->reset_in_progress) + goto out; + + dev_close(dev); + dev_open(dev, NULL); + +out: + rtnl_unlock(); } static void arcnet_reply_tasklet(unsigned long data) @@ -452,12 +486,25 @@ struct net_device *alloc_arcdev(const char *name) lp->dev = dev; spin_lock_init(&lp->lock); timer_setup(&lp->timer, arcnet_timer, 0); + INIT_WORK(&lp->reset_work, reset_device_work); } return dev; } EXPORT_SYMBOL(alloc_arcdev); +void free_arcdev(struct net_device *dev) +{ + struct arcnet_local *lp = netdev_priv(dev); + + /* Do not cancel this at ->ndo_close(), as the workqueue itself + * indirectly calls the ifdown path through dev_close(). + */ + cancel_work_sync(&lp->reset_work); + free_netdev(dev); +} +EXPORT_SYMBOL(free_arcdev); + /* Open/initialize the board. This is called sometime after booting when * the 'ifconfig' program is run. * @@ -587,6 +634,10 @@ int arcnet_close(struct net_device *dev) /* shut down the card */ lp->hw.close(dev); + + /* reset counters */ + lp->reset_in_progress = 0; + module_put(lp->hw.owner); return 0; } @@ -820,6 +871,9 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id) spin_lock_irqsave(&lp->lock, flags); + if (lp->reset_in_progress) + goto out; + /* RESET flag was enabled - if device is not running, we must * clear it right away (but nothing else). */ @@ -852,11 +906,14 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id) if (status & RESETflag) { arc_printk(D_NORMAL, dev, "spurious reset (status=%Xh)\n", status); - arcnet_close(dev); - arcnet_open(dev); + + lp->reset_in_progress = 1; + netif_stop_queue(dev); + netif_carrier_off(dev); + schedule_work(&lp->reset_work); /* get out of the interrupt handler! */ - break; + goto out; } /* RX is inhibited - we must have received something. * Prepare to receive into the next buffer. @@ -1052,6 +1109,7 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id) udelay(1); lp->hw.intmask(dev, lp->intmask); +out: spin_unlock_irqrestore(&lp->lock, flags); return retval; } diff --git a/drivers/net/arcnet/com20020-isa.c b/drivers/net/arcnet/com20020-isa.c index f983c4ce6b07..be618e4b9ed5 100644 --- a/drivers/net/arcnet/com20020-isa.c +++ b/drivers/net/arcnet/com20020-isa.c @@ -169,7 +169,7 @@ static int __init com20020_init(void) dev->irq = 9; if (com20020isa_probe(dev)) { - free_netdev(dev); + free_arcdev(dev); return -EIO; } @@ -182,7 +182,7 @@ static void __exit com20020_exit(void) unregister_netdev(my_dev); free_irq(my_dev->irq, my_dev); release_region(my_dev->base_addr, ARCNET_TOTAL_SIZE); - free_netdev(my_dev); + free_arcdev(my_dev); } #ifndef MODULE diff --git a/drivers/net/arcnet/com20020-pci.c b/drivers/net/arcnet/com20020-pci.c index 9f44e2e458df..b4f8798d8c50 100644 --- a/drivers/net/arcnet/com20020-pci.c +++ b/drivers/net/arcnet/com20020-pci.c @@ -294,7 +294,7 @@ static void com20020pci_remove(struct pci_dev *pdev) unregister_netdev(dev); free_irq(dev->irq, dev); - free_netdev(dev); + free_arcdev(dev); } } diff --git a/drivers/net/arcnet/com20020_cs.c b/drivers/net/arcnet/com20020_cs.c index cf607ffcf358..9cc5eb6a8e90 100644 --- a/drivers/net/arcnet/com20020_cs.c +++ b/drivers/net/arcnet/com20020_cs.c @@ -177,7 +177,7 @@ static void com20020_detach(struct pcmcia_device *link) dev = info->dev; if (dev) { dev_dbg(&link->dev, "kfree...\n"); - free_netdev(dev); + free_arcdev(dev); } dev_dbg(&link->dev, "kfree2...\n"); kfree(info); diff --git a/drivers/net/arcnet/com90io.c b/drivers/net/arcnet/com90io.c index cf214b730671..3856b447d38e 100644 --- a/drivers/net/arcnet/com90io.c +++ b/drivers/net/arcnet/com90io.c @@ -396,7 +396,7 @@ static int __init com90io_init(void) err = com90io_probe(dev); if (err) { - free_netdev(dev); + free_arcdev(dev); return err; } @@ -419,7 +419,7 @@ static void __exit com90io_exit(void) free_irq(dev->irq, dev); release_region(dev->base_addr, ARCNET_TOTAL_SIZE); - free_netdev(dev); + free_arcdev(dev); } module_init(com90io_init) diff --git a/drivers/net/arcnet/com90xx.c b/drivers/net/arcnet/com90xx.c index 3dc3d533cb19..d8dfb9ea0de8 100644 --- a/drivers/net/arcnet/com90xx.c +++ b/drivers/net/arcnet/com90xx.c @@ -554,7 +554,7 @@ err_free_irq: err_release_mem: release_mem_region(dev->mem_start, dev->mem_end - dev->mem_start + 1); err_free_dev: - free_netdev(dev); + free_arcdev(dev); return -EIO; } @@ -672,7 +672,7 @@ static void __exit com90xx_exit(void) release_region(dev->base_addr, ARCNET_TOTAL_SIZE); release_mem_region(dev->mem_start, dev->mem_end - dev->mem_start + 1); - free_netdev(dev); + free_arcdev(dev); } } From 9cc863d523999de19f609bf8da49d6dad2bba193 Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Sun, 20 Nov 2022 14:24:38 +0800 Subject: [PATCH 1015/1477] arcnet: fix potential memory leak in com20020_probe() [ Upstream commit 1c40cde6b5171d9c8dfc69be00464fd1c75e210b ] In com20020_probe(), if com20020_config() fails, dev and info will not be freed, which will lead to a memory leak. This patch adds freeing dev and info after com20020_config() fails to fix this bug. Compile tested only. Fixes: 15b99ac17295 ("[PATCH] pcmcia: add return value to _config() functions") Signed-off-by: Wang Hai Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/arcnet/com20020_cs.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/arcnet/com20020_cs.c b/drivers/net/arcnet/com20020_cs.c index 9cc5eb6a8e90..e0c7720bd5da 100644 --- a/drivers/net/arcnet/com20020_cs.c +++ b/drivers/net/arcnet/com20020_cs.c @@ -113,6 +113,7 @@ static int com20020_probe(struct pcmcia_device *p_dev) struct com20020_dev *info; struct net_device *dev; struct arcnet_local *lp; + int ret = -ENOMEM; dev_dbg(&p_dev->dev, "com20020_attach()\n"); @@ -142,12 +143,18 @@ static int com20020_probe(struct pcmcia_device *p_dev) info->dev = dev; p_dev->priv = info; - return com20020_config(p_dev); + ret = com20020_config(p_dev); + if (ret) + goto fail_config; + return 0; + +fail_config: + free_arcdev(dev); fail_alloc_dev: kfree(info); fail_alloc_info: - return -ENOMEM; + return ret; } /* com20020_attach */ static void com20020_detach(struct pcmcia_device *link) From e14583073fc0f31b35b7de4eefbb1e2720914e0f Mon Sep 17 00:00:00 2001 From: Martin Faltesek Date: Mon, 21 Nov 2022 18:42:44 -0600 Subject: [PATCH 1016/1477] nfc: st-nci: fix incorrect validating logic in EVT_TRANSACTION [ Upstream commit c60c152230828825c06e62a8f1ce956d4b659266 ] The first validation check for EVT_TRANSACTION has two different checks tied together with logical AND. One is a check for minimum packet length, and the other is for a valid aid_tag. If either condition is true (fails), then an error should be triggered. The fix is to change && to ||. Reported-by: Denis Efremov Reviewed-by: Guenter Roeck Fixes: 5d1ceb7f5e56 ("NFC: st21nfcb: Add HCI transaction event support") Signed-off-by: Martin Faltesek Reviewed-by: Krzysztof Kozlowski Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/nfc/st-nci/se.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nfc/st-nci/se.c b/drivers/nfc/st-nci/se.c index 807eae04c1e3..b1ee5a38f964 100644 --- a/drivers/nfc/st-nci/se.c +++ b/drivers/nfc/st-nci/se.c @@ -327,7 +327,7 @@ static int st_nci_hci_connectivity_event_received(struct nci_dev *ndev, * AID 81 5 to 16 * PARAMETERS 82 0 to 255 */ - if (skb->len < NFC_MIN_AID_LENGTH + 2 && + if (skb->len < NFC_MIN_AID_LENGTH + 2 || skb->data[0] != NFC_EVT_TRANSACTION_AID_TAG) return -EPROTO; From b034fe2a0800b93ca575ce0d947e3f30b344f87e Mon Sep 17 00:00:00 2001 From: Martin Faltesek Date: Mon, 21 Nov 2022 18:42:45 -0600 Subject: [PATCH 1017/1477] nfc: st-nci: fix memory leaks in EVT_TRANSACTION [ Upstream commit 440f2ae9c9f06e26f5dcea697a53717fc61a318c ] Error path does not free previously allocated memory. Add devm_kfree() to the failure path. Reported-by: Denis Efremov Reviewed-by: Guenter Roeck Fixes: 5d1ceb7f5e56 ("NFC: st21nfcb: Add HCI transaction event support") Signed-off-by: Martin Faltesek Reviewed-by: Krzysztof Kozlowski Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/nfc/st-nci/se.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nfc/st-nci/se.c b/drivers/nfc/st-nci/se.c index b1ee5a38f964..37d397aae9b9 100644 --- a/drivers/nfc/st-nci/se.c +++ b/drivers/nfc/st-nci/se.c @@ -340,8 +340,10 @@ static int st_nci_hci_connectivity_event_received(struct nci_dev *ndev, /* Check next byte is PARAMETERS tag (82) */ if (skb->data[transaction->aid_len + 2] != - NFC_EVT_TRANSACTION_PARAMS_TAG) + NFC_EVT_TRANSACTION_PARAMS_TAG) { + devm_kfree(dev, transaction); return -EPROTO; + } transaction->params_len = skb->data[transaction->aid_len + 3]; memcpy(transaction->params, skb->data + From 11052f118879d44e9f954c35f32be461f77e96f4 Mon Sep 17 00:00:00 2001 From: Yu Liao Date: Wed, 23 Nov 2022 16:22:36 +0800 Subject: [PATCH 1018/1477] net: thunderx: Fix the ACPI memory leak [ Upstream commit 661e5ebbafd26d9d2e3c749f5cf591e55c7364f5 ] The ACPI buffer memory (string.pointer) should be freed as the buffer is not used after returning from bgx_acpi_match_id(), free it to prevent memory leak. Fixes: 46b903a01c05 ("net, thunder, bgx: Add support to get MAC address from ACPI.") Signed-off-by: Yu Liao Link: https://lore.kernel.org/r/20221123082237.1220521-1-liaoyu15@huawei.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/cavium/thunder/thunder_bgx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index 8ff28ed04b7f..f0e48b9373d6 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -1438,8 +1438,10 @@ static acpi_status bgx_acpi_match_id(acpi_handle handle, u32 lvl, return AE_OK; } - if (strncmp(string.pointer, bgx_sel, 4)) + if (strncmp(string.pointer, bgx_sel, 4)) { + kfree(string.pointer); return AE_OK; + } acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, 1, bgx_acpi_register_phy, NULL, bgx, NULL); From 1d840c5d673d3942ae80ad1a15c3bfb572c2421f Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 18 Nov 2022 13:05:39 +0100 Subject: [PATCH 1019/1477] s390/crashdump: fix TOD programmable field size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit f44e07a8afdd713ddc1a8832c39372fe5dd86895 ] The size of the TOD programmable field was incorrectly increased from four to eight bytes with commit 1a2c5840acf9 ("s390/dump: cleanup CPU save area handling"). This leads to an elf notes section NT_S390_TODPREG which has a size of eight instead of four bytes in case of kdump, however even worse is that the contents is incorrect: it is supposed to contain only the contents of the TOD programmable field, but in fact contains a mix of the TOD programmable field (32 bit upper bits) and parts of the CPU timer register (lower 32 bits). Fix this by simply changing the size of the todpreg field within the save area structure. This will implicitly also fix the size of the corresponding elf notes sections. This also gets rid of this compile time warning: in function ‘fortify_memcpy_chk’, inlined from ‘save_area_add_regs’ at arch/s390/kernel/crash_dump.c:99:2: ./include/linux/fortify-string.h:413:25: error: call to ‘__read_overflow2_field’ declared with attribute warning: detected read beyond size of field (2nd parameter); maybe use struct_group()? [-Werror=attribute-warning] 413 | __read_overflow2_field(q_size_field, size); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Fixes: 1a2c5840acf9 ("s390/dump: cleanup CPU save area handling") Reviewed-by: Christian Borntraeger Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev Signed-off-by: Sasha Levin --- arch/s390/kernel/crash_dump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index 76762dc67ca9..f292c3e10671 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -44,7 +44,7 @@ struct save_area { u64 fprs[16]; u32 fpc; u32 prefix; - u64 todpreg; + u32 todpreg; u64 timer; u64 todcmp; u64 vxrs_low[16]; From 1f080b8caae9bb59d5db89509f41e17f6d41edd2 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 10 May 2022 19:36:14 +0300 Subject: [PATCH 1020/1477] net: enetc: manage ENETC_F_QBV in priv->active_offloads only when enabled [ Upstream commit 32bf8e1f6fb9f6dc334b2b98dffc2e5dcd51e513 ] Future work in this driver would like to look at priv->active_offloads & ENETC_F_QBV to determine whether a tc-taprio qdisc offload was installed, but this does not produce the intended effect. All the other flags in priv->active_offloads are managed dynamically, except ENETC_F_QBV which is set statically based on the probed SI capability. This change makes priv->active_offloads & ENETC_F_QBV really track the presence of a tc-taprio schedule on the port. Some existing users, like the enetc_sched_speed_set() call from phylink_mac_link_up(), are best kept using the old logic: the tc-taprio offload does not re-trigger another link mode resolve, so the scheduler needs to be functional from the get go, as long as Qbv is supported at all on the port. So to preserve functionality there, look at the static station interface capability from pf->si->hw_features instead. Signed-off-by: Vladimir Oltean Reviewed-by: Claudiu Manoil Signed-off-by: Jakub Kicinski Stable-dep-of: 290b5fe096e7 ("net: enetc: preserve TX ring priority across reconfiguration") Signed-off-by: Sasha Levin --- drivers/net/ethernet/freescale/enetc/enetc_pf.c | 6 ++---- drivers/net/ethernet/freescale/enetc/enetc_qos.c | 6 ++++++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index 6904e10dd46b..515db7e6e649 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -748,9 +748,6 @@ static void enetc_pf_netdev_setup(struct enetc_si *si, struct net_device *ndev, ndev->priv_flags |= IFF_UNICAST_FLT; - if (si->hw_features & ENETC_SI_F_QBV) - priv->active_offloads |= ENETC_F_QBV; - if (si->hw_features & ENETC_SI_F_PSFP && !enetc_psfp_enable(priv)) { priv->active_offloads |= ENETC_F_QCI; ndev->features |= NETIF_F_HW_TC; @@ -996,7 +993,8 @@ static void enetc_pl_mac_link_up(struct phylink_config *config, struct enetc_ndev_priv *priv; priv = netdev_priv(pf->si->ndev); - if (priv->active_offloads & ENETC_F_QBV) + + if (pf->si->hw_features & ENETC_SI_F_QBV) enetc_sched_speed_set(priv, speed); if (!phylink_autoneg_inband(mode) && diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c index 62efe1aebf86..b0e278e1f4ad 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c @@ -69,6 +69,9 @@ static int enetc_setup_taprio(struct net_device *ndev, enetc_wr(&priv->si->hw, ENETC_QBV_PTGCR_OFFSET, tge & (~ENETC_QBV_TGE)); + + priv->active_offloads &= ~ENETC_F_QBV; + return 0; } @@ -135,6 +138,9 @@ static int enetc_setup_taprio(struct net_device *ndev, dma_unmap_single(&priv->si->pdev->dev, dma, data_size, DMA_TO_DEVICE); kfree(gcl_data); + if (!err) + priv->active_offloads |= ENETC_F_QBV; + return err; } From de4dd4f9b3f648e07a2c3cc7115b655e02ac3672 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 28 Sep 2022 12:52:02 +0300 Subject: [PATCH 1021/1477] net: enetc: cache accesses to &priv->si->hw [ Upstream commit 715bf2610f1d1adf3d4f9b7b3dd729984ec4270a ] The &priv->si->hw construct dereferences 2 pointers and makes lines longer than they need to be, in turn making the code harder to read. Replace &priv->si->hw accesses with a "hw" variable when there are 2 or more accesses within a function that dereference this. This includes loops, since &priv->si->hw is a loop invariant. Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski Stable-dep-of: 290b5fe096e7 ("net: enetc: preserve TX ring priority across reconfiguration") Signed-off-by: Sasha Levin --- drivers/net/ethernet/freescale/enetc/enetc.c | 28 +++++---- drivers/net/ethernet/freescale/enetc/enetc.h | 9 +-- .../net/ethernet/freescale/enetc/enetc_qos.c | 60 +++++++++---------- 3 files changed, 49 insertions(+), 48 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index ca62c72eb772..65fa21776a98 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -1272,13 +1272,14 @@ static void enetc_setup_rxbdr(struct enetc_hw *hw, struct enetc_bdr *rx_ring) static void enetc_setup_bdrs(struct enetc_ndev_priv *priv) { + struct enetc_hw *hw = &priv->si->hw; int i; for (i = 0; i < priv->num_tx_rings; i++) - enetc_setup_txbdr(&priv->si->hw, priv->tx_ring[i]); + enetc_setup_txbdr(hw, priv->tx_ring[i]); for (i = 0; i < priv->num_rx_rings; i++) - enetc_setup_rxbdr(&priv->si->hw, priv->rx_ring[i]); + enetc_setup_rxbdr(hw, priv->rx_ring[i]); } static void enetc_clear_rxbdr(struct enetc_hw *hw, struct enetc_bdr *rx_ring) @@ -1311,13 +1312,14 @@ static void enetc_clear_txbdr(struct enetc_hw *hw, struct enetc_bdr *tx_ring) static void enetc_clear_bdrs(struct enetc_ndev_priv *priv) { + struct enetc_hw *hw = &priv->si->hw; int i; for (i = 0; i < priv->num_tx_rings; i++) - enetc_clear_txbdr(&priv->si->hw, priv->tx_ring[i]); + enetc_clear_txbdr(hw, priv->tx_ring[i]); for (i = 0; i < priv->num_rx_rings; i++) - enetc_clear_rxbdr(&priv->si->hw, priv->rx_ring[i]); + enetc_clear_rxbdr(hw, priv->rx_ring[i]); udelay(1); } @@ -1325,13 +1327,13 @@ static void enetc_clear_bdrs(struct enetc_ndev_priv *priv) static int enetc_setup_irqs(struct enetc_ndev_priv *priv) { struct pci_dev *pdev = priv->si->pdev; + struct enetc_hw *hw = &priv->si->hw; int i, j, err; for (i = 0; i < priv->bdr_int_num; i++) { int irq = pci_irq_vector(pdev, ENETC_BDR_INT_BASE_IDX + i); struct enetc_int_vector *v = priv->int_vector[i]; int entry = ENETC_BDR_INT_BASE_IDX + i; - struct enetc_hw *hw = &priv->si->hw; snprintf(v->name, sizeof(v->name), "%s-rxtx%d", priv->ndev->name, i); @@ -1419,13 +1421,14 @@ static void enetc_setup_interrupts(struct enetc_ndev_priv *priv) static void enetc_clear_interrupts(struct enetc_ndev_priv *priv) { + struct enetc_hw *hw = &priv->si->hw; int i; for (i = 0; i < priv->num_tx_rings; i++) - enetc_txbdr_wr(&priv->si->hw, i, ENETC_TBIER, 0); + enetc_txbdr_wr(hw, i, ENETC_TBIER, 0); for (i = 0; i < priv->num_rx_rings; i++) - enetc_rxbdr_wr(&priv->si->hw, i, ENETC_RBIER, 0); + enetc_rxbdr_wr(hw, i, ENETC_RBIER, 0); } static int enetc_phylink_connect(struct net_device *ndev) @@ -1565,6 +1568,7 @@ static int enetc_setup_tc_mqprio(struct net_device *ndev, void *type_data) { struct enetc_ndev_priv *priv = netdev_priv(ndev); struct tc_mqprio_qopt *mqprio = type_data; + struct enetc_hw *hw = &priv->si->hw; struct enetc_bdr *tx_ring; u8 num_tc; int i; @@ -1579,7 +1583,7 @@ static int enetc_setup_tc_mqprio(struct net_device *ndev, void *type_data) /* Reset all ring priorities to 0 */ for (i = 0; i < priv->num_tx_rings; i++) { tx_ring = priv->tx_ring[i]; - enetc_set_bdr_prio(&priv->si->hw, tx_ring->index, 0); + enetc_set_bdr_prio(hw, tx_ring->index, 0); } return 0; @@ -1598,7 +1602,7 @@ static int enetc_setup_tc_mqprio(struct net_device *ndev, void *type_data) */ for (i = 0; i < num_tc; i++) { tx_ring = priv->tx_ring[i]; - enetc_set_bdr_prio(&priv->si->hw, tx_ring->index, i); + enetc_set_bdr_prio(hw, tx_ring->index, i); } /* Reset the number of netdev queues based on the TC count */ @@ -1679,19 +1683,21 @@ static int enetc_set_rss(struct net_device *ndev, int en) static void enetc_enable_rxvlan(struct net_device *ndev, bool en) { struct enetc_ndev_priv *priv = netdev_priv(ndev); + struct enetc_hw *hw = &priv->si->hw; int i; for (i = 0; i < priv->num_rx_rings; i++) - enetc_bdr_enable_rxvlan(&priv->si->hw, i, en); + enetc_bdr_enable_rxvlan(hw, i, en); } static void enetc_enable_txvlan(struct net_device *ndev, bool en) { struct enetc_ndev_priv *priv = netdev_priv(ndev); + struct enetc_hw *hw = &priv->si->hw; int i; for (i = 0; i < priv->num_tx_rings; i++) - enetc_bdr_enable_txvlan(&priv->si->hw, i, en); + enetc_bdr_enable_txvlan(hw, i, en); } void enetc_set_features(struct net_device *ndev, netdev_features_t features) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h index 00386c5d3cde..38d8ea48b931 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.h +++ b/drivers/net/ethernet/freescale/enetc/enetc.h @@ -338,19 +338,20 @@ int enetc_set_psfp(struct net_device *ndev, bool en); static inline void enetc_get_max_cap(struct enetc_ndev_priv *priv) { + struct enetc_hw *hw = &priv->si->hw; u32 reg; - reg = enetc_port_rd(&priv->si->hw, ENETC_PSIDCAPR); + reg = enetc_port_rd(hw, ENETC_PSIDCAPR); priv->psfp_cap.max_streamid = reg & ENETC_PSIDCAPR_MSK; /* Port stream filter capability */ - reg = enetc_port_rd(&priv->si->hw, ENETC_PSFCAPR); + reg = enetc_port_rd(hw, ENETC_PSFCAPR); priv->psfp_cap.max_psfp_filter = reg & ENETC_PSFCAPR_MSK; /* Port stream gate capability */ - reg = enetc_port_rd(&priv->si->hw, ENETC_PSGCAPR); + reg = enetc_port_rd(hw, ENETC_PSGCAPR); priv->psfp_cap.max_psfp_gate = (reg & ENETC_PSGCAPR_SGIT_MSK); priv->psfp_cap.max_psfp_gatelist = (reg & ENETC_PSGCAPR_GCL_MSK) >> 16; /* Port flow meter capability */ - reg = enetc_port_rd(&priv->si->hw, ENETC_PFMCAPR); + reg = enetc_port_rd(hw, ENETC_PFMCAPR); priv->psfp_cap.max_psfp_meter = reg & ENETC_PFMCAPR_MSK; } diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c index b0e278e1f4ad..d3a6367548a1 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c @@ -17,8 +17,9 @@ static u16 enetc_get_max_gcl_len(struct enetc_hw *hw) void enetc_sched_speed_set(struct enetc_ndev_priv *priv, int speed) { + struct enetc_hw *hw = &priv->si->hw; u32 old_speed = priv->speed; - u32 pspeed; + u32 pspeed, tmp; if (speed == old_speed) return; @@ -39,16 +40,15 @@ void enetc_sched_speed_set(struct enetc_ndev_priv *priv, int speed) } priv->speed = speed; - enetc_port_wr(&priv->si->hw, ENETC_PMR, - (enetc_port_rd(&priv->si->hw, ENETC_PMR) - & (~ENETC_PMR_PSPEED_MASK)) - | pspeed); + tmp = enetc_port_rd(hw, ENETC_PMR); + enetc_port_wr(hw, ENETC_PMR, (tmp & ~ENETC_PMR_PSPEED_MASK) | pspeed); } static int enetc_setup_taprio(struct net_device *ndev, struct tc_taprio_qopt_offload *admin_conf) { struct enetc_ndev_priv *priv = netdev_priv(ndev); + struct enetc_hw *hw = &priv->si->hw; struct enetc_cbd cbd = {.cmd = 0}; struct tgs_gcl_conf *gcl_config; struct tgs_gcl_data *gcl_data; @@ -60,15 +60,13 @@ static int enetc_setup_taprio(struct net_device *ndev, int err; int i; - if (admin_conf->num_entries > enetc_get_max_gcl_len(&priv->si->hw)) + if (admin_conf->num_entries > enetc_get_max_gcl_len(hw)) return -EINVAL; gcl_len = admin_conf->num_entries; - tge = enetc_rd(&priv->si->hw, ENETC_QBV_PTGCR_OFFSET); + tge = enetc_rd(hw, ENETC_QBV_PTGCR_OFFSET); if (!admin_conf->enable) { - enetc_wr(&priv->si->hw, - ENETC_QBV_PTGCR_OFFSET, - tge & (~ENETC_QBV_TGE)); + enetc_wr(hw, ENETC_QBV_PTGCR_OFFSET, tge & ~ENETC_QBV_TGE); priv->active_offloads &= ~ENETC_F_QBV; @@ -126,14 +124,11 @@ static int enetc_setup_taprio(struct net_device *ndev, cbd.cls = BDCR_CMD_PORT_GCL; cbd.status_flags = 0; - enetc_wr(&priv->si->hw, ENETC_QBV_PTGCR_OFFSET, - tge | ENETC_QBV_TGE); + enetc_wr(hw, ENETC_QBV_PTGCR_OFFSET, tge | ENETC_QBV_TGE); err = enetc_send_cmd(priv->si, &cbd); if (err) - enetc_wr(&priv->si->hw, - ENETC_QBV_PTGCR_OFFSET, - tge & (~ENETC_QBV_TGE)); + enetc_wr(hw, ENETC_QBV_PTGCR_OFFSET, tge & ~ENETC_QBV_TGE); dma_unmap_single(&priv->si->pdev->dev, dma, data_size, DMA_TO_DEVICE); kfree(gcl_data); @@ -148,6 +143,7 @@ int enetc_setup_tc_taprio(struct net_device *ndev, void *type_data) { struct tc_taprio_qopt_offload *taprio = type_data; struct enetc_ndev_priv *priv = netdev_priv(ndev); + struct enetc_hw *hw = &priv->si->hw; int err; int i; @@ -157,16 +153,14 @@ int enetc_setup_tc_taprio(struct net_device *ndev, void *type_data) return -EBUSY; for (i = 0; i < priv->num_tx_rings; i++) - enetc_set_bdr_prio(&priv->si->hw, - priv->tx_ring[i]->index, + enetc_set_bdr_prio(hw, priv->tx_ring[i]->index, taprio->enable ? i : 0); err = enetc_setup_taprio(ndev, taprio); if (err) for (i = 0; i < priv->num_tx_rings; i++) - enetc_set_bdr_prio(&priv->si->hw, - priv->tx_ring[i]->index, + enetc_set_bdr_prio(hw, priv->tx_ring[i]->index, taprio->enable ? 0 : i); return err; @@ -188,7 +182,7 @@ int enetc_setup_tc_cbs(struct net_device *ndev, void *type_data) struct tc_cbs_qopt_offload *cbs = type_data; u32 port_transmit_rate = priv->speed; u8 tc_nums = netdev_get_num_tc(ndev); - struct enetc_si *si = priv->si; + struct enetc_hw *hw = &priv->si->hw; u32 hi_credit_bit, hi_credit_reg; u32 max_interference_size; u32 port_frame_max_size; @@ -209,15 +203,15 @@ int enetc_setup_tc_cbs(struct net_device *ndev, void *type_data) * lower than this TC have been disabled. */ if (tc == prio_top && - enetc_get_cbs_enable(&si->hw, prio_next)) { + enetc_get_cbs_enable(hw, prio_next)) { dev_err(&ndev->dev, "Disable TC%d before disable TC%d\n", prio_next, tc); return -EINVAL; } - enetc_port_wr(&si->hw, ENETC_PTCCBSR1(tc), 0); - enetc_port_wr(&si->hw, ENETC_PTCCBSR0(tc), 0); + enetc_port_wr(hw, ENETC_PTCCBSR1(tc), 0); + enetc_port_wr(hw, ENETC_PTCCBSR0(tc), 0); return 0; } @@ -234,13 +228,13 @@ int enetc_setup_tc_cbs(struct net_device *ndev, void *type_data) * higher than this TC have been enabled. */ if (tc == prio_next) { - if (!enetc_get_cbs_enable(&si->hw, prio_top)) { + if (!enetc_get_cbs_enable(hw, prio_top)) { dev_err(&ndev->dev, "Enable TC%d first before enable TC%d\n", prio_top, prio_next); return -EINVAL; } - bw_sum += enetc_get_cbs_bw(&si->hw, prio_top); + bw_sum += enetc_get_cbs_bw(hw, prio_top); } if (bw_sum + bw >= 100) { @@ -249,7 +243,7 @@ int enetc_setup_tc_cbs(struct net_device *ndev, void *type_data) return -EINVAL; } - enetc_port_rd(&si->hw, ENETC_PTCMSDUR(tc)); + enetc_port_rd(hw, ENETC_PTCMSDUR(tc)); /* For top prio TC, the max_interfrence_size is maxSizedFrame. * @@ -269,8 +263,8 @@ int enetc_setup_tc_cbs(struct net_device *ndev, void *type_data) u32 m0, ma, r0, ra; m0 = port_frame_max_size * 8; - ma = enetc_port_rd(&si->hw, ENETC_PTCMSDUR(prio_top)) * 8; - ra = enetc_get_cbs_bw(&si->hw, prio_top) * + ma = enetc_port_rd(hw, ENETC_PTCMSDUR(prio_top)) * 8; + ra = enetc_get_cbs_bw(hw, prio_top) * port_transmit_rate * 10000ULL; r0 = port_transmit_rate * 1000000ULL; max_interference_size = m0 + ma + @@ -290,10 +284,10 @@ int enetc_setup_tc_cbs(struct net_device *ndev, void *type_data) hi_credit_reg = (u32)div_u64((ENETC_CLK * 100ULL) * hi_credit_bit, port_transmit_rate * 1000000ULL); - enetc_port_wr(&si->hw, ENETC_PTCCBSR1(tc), hi_credit_reg); + enetc_port_wr(hw, ENETC_PTCCBSR1(tc), hi_credit_reg); /* Set bw register and enable this traffic class */ - enetc_port_wr(&si->hw, ENETC_PTCCBSR0(tc), bw | ENETC_CBSE); + enetc_port_wr(hw, ENETC_PTCCBSR0(tc), bw | ENETC_CBSE); return 0; } @@ -303,6 +297,7 @@ int enetc_setup_tc_txtime(struct net_device *ndev, void *type_data) struct enetc_ndev_priv *priv = netdev_priv(ndev); struct tc_etf_qopt_offload *qopt = type_data; u8 tc_nums = netdev_get_num_tc(ndev); + struct enetc_hw *hw = &priv->si->hw; int tc; if (!tc_nums) @@ -318,12 +313,11 @@ int enetc_setup_tc_txtime(struct net_device *ndev, void *type_data) return -EBUSY; /* TSD and Qbv are mutually exclusive in hardware */ - if (enetc_rd(&priv->si->hw, ENETC_QBV_PTGCR_OFFSET) & ENETC_QBV_TGE) + if (enetc_rd(hw, ENETC_QBV_PTGCR_OFFSET) & ENETC_QBV_TGE) return -EBUSY; priv->tx_ring[tc]->tsd_enable = qopt->enable; - enetc_port_wr(&priv->si->hw, ENETC_PTCTSDR(tc), - qopt->enable ? ENETC_TSDE : 0); + enetc_port_wr(hw, ENETC_PTCTSDR(tc), qopt->enable ? ENETC_TSDE : 0); return 0; } From c0cf8bc259e002d1518ad4ae1c8829e44c8d415e Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 22 Nov 2022 15:09:36 +0200 Subject: [PATCH 1022/1477] net: enetc: preserve TX ring priority across reconfiguration [ Upstream commit 290b5fe096e7dd0aad730d1af4f7f2d9fea43e11 ] In the blamed commit, a rudimentary reallocation procedure for RX buffer descriptors was implemented, for the situation when their format changes between normal (no PTP) and extended (PTP). enetc_hwtstamp_set() calls enetc_close() and enetc_open() in a sequence, and this sequence loses information which was previously configured in the TX BDR Mode Register, specifically via the enetc_set_bdr_prio() call. The TX ring priority is configured by tc-mqprio and tc-taprio, and affects important things for TSN such as the TX time of packets. The issue manifests itself most visibly by the fact that isochron --txtime reports premature packet transmissions when PTP is first enabled on an enetc interface. Save the TX ring priority in a new field in struct enetc_bdr (occupies a 2 byte hole on arm64) in order to make this survive a ring reconfiguration. Fixes: 434cebabd3a2 ("enetc: Add dynamic allocation of extended Rx BD rings") Signed-off-by: Vladimir Oltean Reviewed-by: Alexander Lobakin Link: https://lore.kernel.org/r/20221122130936.1704151-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/freescale/enetc/enetc.c | 8 ++++--- drivers/net/ethernet/freescale/enetc/enetc.h | 1 + .../net/ethernet/freescale/enetc/enetc_qos.c | 21 ++++++++++++------- 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 65fa21776a98..975762ccb66f 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -1212,7 +1212,7 @@ static void enetc_setup_txbdr(struct enetc_hw *hw, struct enetc_bdr *tx_ring) /* enable Tx ints by setting pkt thr to 1 */ enetc_txbdr_wr(hw, idx, ENETC_TBICR0, ENETC_TBICR0_ICEN | 0x1); - tbmr = ENETC_TBMR_EN; + tbmr = ENETC_TBMR_EN | ENETC_TBMR_SET_PRIO(tx_ring->prio); if (tx_ring->ndev->features & NETIF_F_HW_VLAN_CTAG_TX) tbmr |= ENETC_TBMR_VIH; @@ -1583,7 +1583,8 @@ static int enetc_setup_tc_mqprio(struct net_device *ndev, void *type_data) /* Reset all ring priorities to 0 */ for (i = 0; i < priv->num_tx_rings; i++) { tx_ring = priv->tx_ring[i]; - enetc_set_bdr_prio(hw, tx_ring->index, 0); + tx_ring->prio = 0; + enetc_set_bdr_prio(hw, tx_ring->index, tx_ring->prio); } return 0; @@ -1602,7 +1603,8 @@ static int enetc_setup_tc_mqprio(struct net_device *ndev, void *type_data) */ for (i = 0; i < num_tc; i++) { tx_ring = priv->tx_ring[i]; - enetc_set_bdr_prio(hw, tx_ring->index, i); + tx_ring->prio = i; + enetc_set_bdr_prio(hw, tx_ring->index, tx_ring->prio); } /* Reset the number of netdev queues based on the TC count */ diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h index 38d8ea48b931..725c3d1cbb19 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.h +++ b/drivers/net/ethernet/freescale/enetc/enetc.h @@ -58,6 +58,7 @@ struct enetc_bdr { void __iomem *rcir; }; u16 index; + u16 prio; int bd_count; /* # of BDs */ int next_to_use; int next_to_clean; diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c index d3a6367548a1..5841721c8119 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c @@ -144,6 +144,7 @@ int enetc_setup_tc_taprio(struct net_device *ndev, void *type_data) struct tc_taprio_qopt_offload *taprio = type_data; struct enetc_ndev_priv *priv = netdev_priv(ndev); struct enetc_hw *hw = &priv->si->hw; + struct enetc_bdr *tx_ring; int err; int i; @@ -152,16 +153,20 @@ int enetc_setup_tc_taprio(struct net_device *ndev, void *type_data) if (priv->tx_ring[i]->tsd_enable) return -EBUSY; - for (i = 0; i < priv->num_tx_rings; i++) - enetc_set_bdr_prio(hw, priv->tx_ring[i]->index, - taprio->enable ? i : 0); + for (i = 0; i < priv->num_tx_rings; i++) { + tx_ring = priv->tx_ring[i]; + tx_ring->prio = taprio->enable ? i : 0; + enetc_set_bdr_prio(hw, tx_ring->index, tx_ring->prio); + } err = enetc_setup_taprio(ndev, taprio); - - if (err) - for (i = 0; i < priv->num_tx_rings; i++) - enetc_set_bdr_prio(hw, priv->tx_ring[i]->index, - taprio->enable ? 0 : i); + if (err) { + for (i = 0; i < priv->num_tx_rings; i++) { + tx_ring = priv->tx_ring[i]; + tx_ring->prio = taprio->enable ? 0 : i; + enetc_set_bdr_prio(hw, tx_ring->index, tx_ring->prio); + } + } return err; } From d21d26e65b5f6d14c453f924d1edd689ad7b855e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 20 Sep 2022 19:06:33 +0200 Subject: [PATCH 1023/1477] lib/vdso: use "grep -E" instead of "egrep" commit 8ac3b5cd3e0521d92f9755e90d140382fc292510 upstream. The latest version of grep claims the egrep is now obsolete so the build now contains warnings that look like: egrep: warning: egrep is obsolescent; using grep -E fix this up by moving the vdso Makefile to use "grep -E" instead. Cc: Andy Lutomirski Cc: Thomas Gleixner Reviewed-by: Vincenzo Frascino Link: https://lore.kernel.org/r/20220920170633.3133829-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- lib/vdso/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/vdso/Makefile b/lib/vdso/Makefile index c415a685d61b..e814061d6aa0 100644 --- a/lib/vdso/Makefile +++ b/lib/vdso/Makefile @@ -17,6 +17,6 @@ $(error ARCH_REL_TYPE_ABS is not set) endif quiet_cmd_vdso_check = VDSOCHK $@ - cmd_vdso_check = if $(OBJDUMP) -R $@ | egrep -h "$(ARCH_REL_TYPE_ABS)"; \ + cmd_vdso_check = if $(OBJDUMP) -R $@ | grep -E -h "$(ARCH_REL_TYPE_ABS)"; \ then (echo >&2 "$@: dynamic relocations are not supported"; \ rm -f $@; /bin/false); fi From 350e98a08af1cfa5cc870c93a87a6d7011261836 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 10 Nov 2022 16:41:31 +0100 Subject: [PATCH 1024/1477] usb: dwc3: exynos: Fix remove() function commit e0481e5b3cc12ea7ccf4552d41518c89d3509004 upstream. The core DWC3 device node was not properly removed by the custom dwc3_exynos_remove_child() function. Replace it with generic of_platform_depopulate() which does that job right. Fixes: adcf20dcd262 ("usb: dwc3: exynos: Use of_platform API to create dwc3 core pdev") Signed-off-by: Marek Szyprowski Acked-by: Thinh Nguyen Cc: stable@vger.kernel.org Reviewed-by: Sam Protsenko Link: https://lore.kernel.org/r/20221110154131.2577-1-m.szyprowski@samsung.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-exynos.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/usb/dwc3/dwc3-exynos.c b/drivers/usb/dwc3/dwc3-exynos.c index 90bb022737da..ee7b71827216 100644 --- a/drivers/usb/dwc3/dwc3-exynos.c +++ b/drivers/usb/dwc3/dwc3-exynos.c @@ -37,15 +37,6 @@ struct dwc3_exynos { struct regulator *vdd10; }; -static int dwc3_exynos_remove_child(struct device *dev, void *unused) -{ - struct platform_device *pdev = to_platform_device(dev); - - platform_device_unregister(pdev); - - return 0; -} - static int dwc3_exynos_probe(struct platform_device *pdev) { struct dwc3_exynos *exynos; @@ -142,7 +133,7 @@ static int dwc3_exynos_remove(struct platform_device *pdev) struct dwc3_exynos *exynos = platform_get_drvdata(pdev); int i; - device_for_each_child(&pdev->dev, NULL, dwc3_exynos_remove_child); + of_platform_depopulate(&pdev->dev); for (i = exynos->num_clks - 1; i >= 0; i--) clk_disable_unprepare(exynos->clks[i]); From 86ba9c859577a7f3a6320b328e74b18d8b5bc025 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 22 Sep 2022 20:04:34 +0800 Subject: [PATCH 1025/1477] ext4: fix use-after-free in ext4_ext_shift_extents MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f6b1a1cf1c3ee430d3f5e47847047ce789a690aa upstream. If the starting position of our insert range happens to be in the hole between the two ext4_extent_idx, because the lblk of the ext4_extent in the previous ext4_extent_idx is always less than the start, which leads to the "extent" variable access across the boundary, the following UAF is triggered: ================================================================== BUG: KASAN: use-after-free in ext4_ext_shift_extents+0x257/0x790 Read of size 4 at addr ffff88819807a008 by task fallocate/8010 CPU: 3 PID: 8010 Comm: fallocate Tainted: G E 5.10.0+ #492 Call Trace: dump_stack+0x7d/0xa3 print_address_description.constprop.0+0x1e/0x220 kasan_report.cold+0x67/0x7f ext4_ext_shift_extents+0x257/0x790 ext4_insert_range+0x5b6/0x700 ext4_fallocate+0x39e/0x3d0 vfs_fallocate+0x26f/0x470 ksys_fallocate+0x3a/0x70 __x64_sys_fallocate+0x4f/0x60 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xa9 ================================================================== For right shifts, we can divide them into the following situations: 1. When the first ee_block of ext4_extent_idx is greater than or equal to start, make right shifts directly from the first ee_block. 1) If it is greater than start, we need to continue searching in the previous ext4_extent_idx. 2) If it is equal to start, we can exit the loop (iterator=NULL). 2. When the first ee_block of ext4_extent_idx is less than start, then traverse from the last extent to find the first extent whose ee_block is less than start. 1) If extent is still the last extent after traversal, it means that the last ee_block of ext4_extent_idx is less than start, that is, start is located in the hole between idx and (idx+1), so we can exit the loop directly (break) without right shifts. 2) Otherwise, make right shifts at the corresponding position of the found extent, and then exit the loop (iterator=NULL). Fixes: 331573febb6a ("ext4: Add support FALLOC_FL_INSERT_RANGE for fallocate") Cc: stable@vger.kernel.org # v4.2+ Signed-off-by: Zhihao Cheng Signed-off-by: Baokun Li Link: https://lore.kernel.org/r/20220922120434.1294789-1-libaokun1@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/extents.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 30add5a3df3d..54750b7c162d 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -5182,6 +5182,7 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, * and it is decreased till we reach start. */ again: + ret = 0; if (SHIFT == SHIFT_LEFT) iterator = &start; else @@ -5225,14 +5226,21 @@ again: ext4_ext_get_actual_len(extent); } else { extent = EXT_FIRST_EXTENT(path[depth].p_hdr); - if (le32_to_cpu(extent->ee_block) > 0) + if (le32_to_cpu(extent->ee_block) > start) *iterator = le32_to_cpu(extent->ee_block) - 1; - else - /* Beginning is reached, end of the loop */ + else if (le32_to_cpu(extent->ee_block) == start) iterator = NULL; - /* Update path extent in case we need to stop */ - while (le32_to_cpu(extent->ee_block) < start) + else { + extent = EXT_LAST_EXTENT(path[depth].p_hdr); + while (le32_to_cpu(extent->ee_block) >= start) + extent--; + + if (extent == EXT_LAST_EXTENT(path[depth].p_hdr)) + break; + extent++; + iterator = NULL; + } path[depth].p_ext = extent; } ret = ext4_ext_shift_path_extents(path, shift, inode, From bd1b8041c2f6de086ed4b1b25e373e9ab8f62d35 Mon Sep 17 00:00:00 2001 From: Jakob Unterwurzacher Date: Wed, 19 Oct 2022 16:27:27 +0200 Subject: [PATCH 1026/1477] arm64: dts: rockchip: lower rk3399-puma-haikou SD controller clock frequency commit 91e8b74fe6381e083f8aa55217bb0562785ab398 upstream. CRC errors (code -84 EILSEQ) have been observed for some SanDisk Ultra A1 cards when running at 50MHz. Waveform analysis suggest that the level shifters that are used on the RK3399-Q7 module for voltage translation between 3.0 and 3.3V don't handle clock rates at or above 48MHz properly. Back off to 40MHz for some safety margin. Cc: stable@vger.kernel.org Fixes: 60fd9f72ce8a ("arm64: dts: rockchip: add Haikou baseboard with RK3399-Q7 SoM") Signed-off-by: Jakob Unterwurzacher Signed-off-by: Quentin Schulz Link: https://lore.kernel.org/r/20221019-upstream-puma-sd-40mhz-v1-0-754a76421518@theobroma-systems.com Signed-off-by: Heiko Stuebner Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts index a8d363568fd6..3fc761c8d550 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts @@ -203,7 +203,7 @@ cap-sd-highspeed; cd-gpios = <&gpio0 RK_PA7 GPIO_ACTIVE_LOW>; disable-wp; - max-frequency = <150000000>; + max-frequency = <40000000>; pinctrl-names = "default"; pinctrl-0 = <&sdmmc_clk &sdmmc_cmd &sdmmc_cd &sdmmc_bus4>; vmmc-supply = <&vcc3v3_baseboard>; From fa9efcbfbf77a9b727271935ba99385a4f2d36ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20Concepci=C3=B3n=20Rodr=C3=ADguez?= Date: Sun, 6 Nov 2022 01:56:51 +0000 Subject: [PATCH 1027/1477] iio: light: apds9960: fix wrong register for gesture gain commit 0aa60ff5d996d4ecdd4a62699c01f6d00f798d59 upstream. Gesture Gain Control is in REG_GCONF_2 (0xa3), not in REG_CONFIG_2 (0x90). Fixes: aff268cd532e ("iio: light: add APDS9960 ALS + promixity driver") Signed-off-by: Alejandro Concepcion-Rodriguez Acked-by: Matt Ranostay Cc: Link: https://lore.kernel.org/r/EaT-NKC-H4DNX5z4Lg9B6IWPD5TrTrYBr5DYB784wfDKQkTmzPXkoYqyUOrOgJH-xvTsEkFLcVkeAPZRUODEFI5dGziaWXwjpfBNLeNGfNc=@acoro.eu Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/light/apds9960.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/iio/light/apds9960.c b/drivers/iio/light/apds9960.c index 9afb3fcc74e6..4a7ccf268ebf 100644 --- a/drivers/iio/light/apds9960.c +++ b/drivers/iio/light/apds9960.c @@ -53,9 +53,6 @@ #define APDS9960_REG_CONTROL_PGAIN_MASK_SHIFT 2 #define APDS9960_REG_CONFIG_2 0x90 -#define APDS9960_REG_CONFIG_2_GGAIN_MASK 0x60 -#define APDS9960_REG_CONFIG_2_GGAIN_MASK_SHIFT 5 - #define APDS9960_REG_ID 0x92 #define APDS9960_REG_STATUS 0x93 @@ -76,6 +73,9 @@ #define APDS9960_REG_GCONF_1_GFIFO_THRES_MASK_SHIFT 6 #define APDS9960_REG_GCONF_2 0xa3 +#define APDS9960_REG_GCONF_2_GGAIN_MASK 0x60 +#define APDS9960_REG_GCONF_2_GGAIN_MASK_SHIFT 5 + #define APDS9960_REG_GOFFSET_U 0xa4 #define APDS9960_REG_GOFFSET_D 0xa5 #define APDS9960_REG_GPULSE 0xa6 @@ -395,9 +395,9 @@ static int apds9960_set_pxs_gain(struct apds9960_data *data, int val) } ret = regmap_update_bits(data->regmap, - APDS9960_REG_CONFIG_2, - APDS9960_REG_CONFIG_2_GGAIN_MASK, - idx << APDS9960_REG_CONFIG_2_GGAIN_MASK_SHIFT); + APDS9960_REG_GCONF_2, + APDS9960_REG_GCONF_2_GGAIN_MASK, + idx << APDS9960_REG_GCONF_2_GGAIN_MASK_SHIFT); if (!ret) data->pxs_gain = idx; mutex_unlock(&data->lock); From 456e895fd0b84069a25d316885acae40e62a46a2 Mon Sep 17 00:00:00 2001 From: Chen Zhongjin Date: Tue, 8 Nov 2022 11:28:02 +0800 Subject: [PATCH 1028/1477] iio: core: Fix entry not deleted when iio_register_sw_trigger_type() fails commit 4ad09d956f8eacff61e67e5b13ba8ebec3232f76 upstream. In iio_register_sw_trigger_type(), configfs_register_default_group() is possible to fail, but the entry add to iio_trigger_types_list is not deleted. This leaves wild in iio_trigger_types_list, which can cause page fault when module is loading again. So fix this by list_del(&t->list) in error path. BUG: unable to handle page fault for address: fffffbfff81d7400 Call Trace: iio_register_sw_trigger_type do_one_initcall do_init_module load_module ... Fixes: b662f809d410 ("iio: core: Introduce IIO software triggers") Signed-off-by: Chen Zhongjin Link: https://lore.kernel.org/r/20221108032802.168623-1-chenzhongjin@huawei.com Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/industrialio-sw-trigger.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/iio/industrialio-sw-trigger.c b/drivers/iio/industrialio-sw-trigger.c index 9ae793a70b8b..a7714d32a641 100644 --- a/drivers/iio/industrialio-sw-trigger.c +++ b/drivers/iio/industrialio-sw-trigger.c @@ -58,8 +58,12 @@ int iio_register_sw_trigger_type(struct iio_sw_trigger_type *t) t->group = configfs_register_default_group(iio_triggers_group, t->name, &iio_trigger_type_group_type); - if (IS_ERR(t->group)) + if (IS_ERR(t->group)) { + mutex_lock(&iio_trigger_types_lock); + list_del(&t->list); + mutex_unlock(&iio_trigger_types_lock); ret = PTR_ERR(t->group); + } return ret; } From c0a9c9973d24f224cabccd5ffec1887a1d77fe2f Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Tue, 15 Nov 2022 12:01:58 +0100 Subject: [PATCH 1029/1477] init/Kconfig: fix CC_HAS_ASM_GOTO_TIED_OUTPUT test with dash MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 534bd70374d646f17e2cebe0e6e4cdd478ce4f0c ] When using dash as /bin/sh, the CC_HAS_ASM_GOTO_TIED_OUTPUT test fails with a syntax error which is not the one we are looking for: : In function ‘foo’: :1:29: warning: missing terminating " character :1:29: error: missing terminating " character :2:5: error: expected ‘:’ before ‘+’ token :2:7: warning: missing terminating " character :2:7: error: missing terminating " character :2:5: error: expected declaration or statement at end of input Removing '\n' solves this. Fixes: 1aa0e8b144b6 ("Kconfig: Add option for asm goto w/ tied outputs to workaround clang-13 bug") Signed-off-by: Alexandre Belloni Reviewed-by: Sean Christopherson Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin --- init/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/init/Kconfig b/init/Kconfig index 22912631d79b..eba883d6d9ed 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -71,7 +71,7 @@ config CC_HAS_ASM_GOTO_OUTPUT config CC_HAS_ASM_GOTO_TIED_OUTPUT depends on CC_HAS_ASM_GOTO_OUTPUT # Detect buggy gcc and clang, fixed in gcc-11 clang-14. - def_bool $(success,echo 'int foo(int *x) { asm goto (".long (%l[bar]) - .\n": "+m"(*x) ::: bar); return *x; bar: return 0; }' | $CC -x c - -c -o /dev/null) + def_bool $(success,echo 'int foo(int *x) { asm goto (".long (%l[bar]) - .": "+m"(*x) ::: bar); return *x; bar: return 0; }' | $CC -x c - -c -o /dev/null) config TOOLS_SUPPORT_RELR def_bool $(success,env "CC=$(CC)" "LD=$(LD)" "NM=$(NM)" "OBJCOPY=$(OBJCOPY)" $(srctree)/scripts/tools-support-relr.sh) From fbe955be268b0fc9205146789178231ad0ab4820 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 23 Nov 2022 19:20:53 -0800 Subject: [PATCH 1030/1477] nios2: add FORCE for vmlinuz.gz [ Upstream commit 869e4ae4cd2a23d625aaa14ae62dbebf768cb77d ] Add FORCE to placate a warning from make: arch/nios2/boot/Makefile:24: FORCE prerequisite is missing Fixes: 2fc8483fdcde ("nios2: Build infrastructure") Signed-off-by: Randy Dunlap Reviewed-by: Masahiro Yamada Signed-off-by: Sasha Levin --- arch/nios2/boot/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/nios2/boot/Makefile b/arch/nios2/boot/Makefile index 37dfc7e584bc..0b704c1f379f 100644 --- a/arch/nios2/boot/Makefile +++ b/arch/nios2/boot/Makefile @@ -20,7 +20,7 @@ $(obj)/vmlinux.bin: vmlinux FORCE $(obj)/vmlinux.gz: $(obj)/vmlinux.bin FORCE $(call if_changed,gzip) -$(obj)/vmImage: $(obj)/vmlinux.gz +$(obj)/vmImage: $(obj)/vmlinux.gz FORCE $(call if_changed,uimage) @$(kecho) 'Kernel: $@ is ready' From 049194538cb85bae1f617dc055153f9afdab58df Mon Sep 17 00:00:00 2001 From: Al Cooper Date: Wed, 27 Apr 2022 14:08:50 -0400 Subject: [PATCH 1031/1477] mmc: sdhci-brcmstb: Re-organize flags [ Upstream commit f3a70f991dd07330225ea11e158e1d07ad5733fb ] Re-organize the flags by basing the bit names on the flag that they apply to. Also change the "flags" member in the "brcmstb_match_priv" struct to const. Signed-off-by: Al Cooper Signed-off-by: Kamal Dasu Acked-by: Florian Fainelli Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/20220427180853.35970-2-kdasu.kdev@gmail.com Signed-off-by: Ulf Hansson Stable-dep-of: 56baa208f910 ("mmc: sdhci-brcmstb: Fix SDHCI_RESET_ALL for CQHCI") Signed-off-by: Sasha Levin --- drivers/mmc/host/sdhci-brcmstb.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/mmc/host/sdhci-brcmstb.c b/drivers/mmc/host/sdhci-brcmstb.c index f24623aac2db..244780481193 100644 --- a/drivers/mmc/host/sdhci-brcmstb.c +++ b/drivers/mmc/host/sdhci-brcmstb.c @@ -18,20 +18,22 @@ #define SDHCI_VENDOR 0x78 #define SDHCI_VENDOR_ENHANCED_STRB 0x1 -#define BRCMSTB_PRIV_FLAGS_NO_64BIT BIT(0) -#define BRCMSTB_PRIV_FLAGS_BROKEN_TIMEOUT BIT(1) +#define BRCMSTB_MATCH_FLAGS_NO_64BIT BIT(0) +#define BRCMSTB_MATCH_FLAGS_BROKEN_TIMEOUT BIT(1) + +#define BRCMSTB_PRIV_FLAGS_HAS_CQE BIT(0) #define SDHCI_ARASAN_CQE_BASE_ADDR 0x200 struct sdhci_brcmstb_priv { void __iomem *cfg_regs; - bool has_cqe; + unsigned int flags; }; struct brcmstb_match_priv { void (*hs400es)(struct mmc_host *mmc, struct mmc_ios *ios); struct sdhci_ops *ops; - unsigned int flags; + const unsigned int flags; }; static void sdhci_brcmstb_hs400es(struct mmc_host *mmc, struct mmc_ios *ios) @@ -134,13 +136,13 @@ static struct sdhci_ops sdhci_brcmstb_ops_7216 = { }; static struct brcmstb_match_priv match_priv_7425 = { - .flags = BRCMSTB_PRIV_FLAGS_NO_64BIT | - BRCMSTB_PRIV_FLAGS_BROKEN_TIMEOUT, + .flags = BRCMSTB_MATCH_FLAGS_NO_64BIT | + BRCMSTB_MATCH_FLAGS_BROKEN_TIMEOUT, .ops = &sdhci_brcmstb_ops, }; static struct brcmstb_match_priv match_priv_7445 = { - .flags = BRCMSTB_PRIV_FLAGS_BROKEN_TIMEOUT, + .flags = BRCMSTB_MATCH_FLAGS_BROKEN_TIMEOUT, .ops = &sdhci_brcmstb_ops, }; @@ -176,7 +178,7 @@ static int sdhci_brcmstb_add_host(struct sdhci_host *host, bool dma64; int ret; - if (!priv->has_cqe) + if ((priv->flags & BRCMSTB_PRIV_FLAGS_HAS_CQE) == 0) return sdhci_add_host(host); dev_dbg(mmc_dev(host->mmc), "CQE is enabled\n"); @@ -225,7 +227,6 @@ static int sdhci_brcmstb_probe(struct platform_device *pdev) struct sdhci_brcmstb_priv *priv; struct sdhci_host *host; struct resource *iomem; - bool has_cqe = false; struct clk *clk; int res; @@ -244,10 +245,6 @@ static int sdhci_brcmstb_probe(struct platform_device *pdev) return res; memset(&brcmstb_pdata, 0, sizeof(brcmstb_pdata)); - if (device_property_read_bool(&pdev->dev, "supports-cqe")) { - has_cqe = true; - match_priv->ops->irq = sdhci_brcmstb_cqhci_irq; - } brcmstb_pdata.ops = match_priv->ops; host = sdhci_pltfm_init(pdev, &brcmstb_pdata, sizeof(struct sdhci_brcmstb_priv)); @@ -258,7 +255,10 @@ static int sdhci_brcmstb_probe(struct platform_device *pdev) pltfm_host = sdhci_priv(host); priv = sdhci_pltfm_priv(pltfm_host); - priv->has_cqe = has_cqe; + if (device_property_read_bool(&pdev->dev, "supports-cqe")) { + priv->flags |= BRCMSTB_PRIV_FLAGS_HAS_CQE; + match_priv->ops->irq = sdhci_brcmstb_cqhci_irq; + } /* Map in the non-standard CFG registers */ iomem = platform_get_resource(pdev, IORESOURCE_MEM, 1); @@ -287,14 +287,14 @@ static int sdhci_brcmstb_probe(struct platform_device *pdev) * properties through mmc_of_parse(). */ host->caps = sdhci_readl(host, SDHCI_CAPABILITIES); - if (match_priv->flags & BRCMSTB_PRIV_FLAGS_NO_64BIT) + if (match_priv->flags & BRCMSTB_MATCH_FLAGS_NO_64BIT) host->caps &= ~SDHCI_CAN_64BIT; host->caps1 = sdhci_readl(host, SDHCI_CAPABILITIES_1); host->caps1 &= ~(SDHCI_SUPPORT_SDR50 | SDHCI_SUPPORT_SDR104 | SDHCI_SUPPORT_DDR50); host->quirks |= SDHCI_QUIRK_MISSING_CAPS; - if (match_priv->flags & BRCMSTB_PRIV_FLAGS_BROKEN_TIMEOUT) + if (match_priv->flags & BRCMSTB_MATCH_FLAGS_BROKEN_TIMEOUT) host->quirks |= SDHCI_QUIRK_BROKEN_TIMEOUT_VAL; res = sdhci_brcmstb_add_host(host, priv); From b5d770977b1846dc74cb78f4dc48c9c0c40a786d Mon Sep 17 00:00:00 2001 From: Al Cooper Date: Wed, 27 Apr 2022 14:08:51 -0400 Subject: [PATCH 1032/1477] mmc: sdhci-brcmstb: Enable Clock Gating to save power [ Upstream commit 6bcc55fe648b860ef0c2b8dc23adc05bcddb93c2 ] Enabling this feature will allow the controller to stop the bus clock when the bus is idle. The feature is not part of the standard and is unique to newer Arasan cores and is enabled with a bit in a vendor specific register. This feature will only be enabled for non-removable devices because they don't switch the voltage and clock gating breaks SD Card volatge switching. Signed-off-by: Al Cooper Signed-off-by: Kamal Dasu Acked-by: Florian Fainelli Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/20220427180853.35970-3-kdasu.kdev@gmail.com Signed-off-by: Ulf Hansson Stable-dep-of: 56baa208f910 ("mmc: sdhci-brcmstb: Fix SDHCI_RESET_ALL for CQHCI") Signed-off-by: Sasha Levin --- drivers/mmc/host/sdhci-brcmstb.c | 35 +++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-brcmstb.c b/drivers/mmc/host/sdhci-brcmstb.c index 244780481193..683d0c685748 100644 --- a/drivers/mmc/host/sdhci-brcmstb.c +++ b/drivers/mmc/host/sdhci-brcmstb.c @@ -17,11 +17,14 @@ #define SDHCI_VENDOR 0x78 #define SDHCI_VENDOR_ENHANCED_STRB 0x1 +#define SDHCI_VENDOR_GATE_SDCLK_EN 0x2 #define BRCMSTB_MATCH_FLAGS_NO_64BIT BIT(0) #define BRCMSTB_MATCH_FLAGS_BROKEN_TIMEOUT BIT(1) +#define BRCMSTB_MATCH_FLAGS_HAS_CLOCK_GATE BIT(2) #define BRCMSTB_PRIV_FLAGS_HAS_CQE BIT(0) +#define BRCMSTB_PRIV_FLAGS_GATE_CLOCK BIT(1) #define SDHCI_ARASAN_CQE_BASE_ADDR 0x200 @@ -36,6 +39,27 @@ struct brcmstb_match_priv { const unsigned int flags; }; +static inline void enable_clock_gating(struct sdhci_host *host) +{ + u32 reg; + + reg = sdhci_readl(host, SDHCI_VENDOR); + reg |= SDHCI_VENDOR_GATE_SDCLK_EN; + sdhci_writel(host, reg, SDHCI_VENDOR); +} + +void brcmstb_reset(struct sdhci_host *host, u8 mask) +{ + struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); + struct sdhci_brcmstb_priv *priv = sdhci_pltfm_priv(pltfm_host); + + sdhci_reset(host, mask); + + /* Reset will clear this, so re-enable it */ + if (priv->flags & BRCMSTB_PRIV_FLAGS_GATE_CLOCK) + enable_clock_gating(host); +} + static void sdhci_brcmstb_hs400es(struct mmc_host *mmc, struct mmc_ios *ios) { struct sdhci_host *host = mmc_priv(mmc); @@ -131,7 +155,7 @@ static struct sdhci_ops sdhci_brcmstb_ops = { static struct sdhci_ops sdhci_brcmstb_ops_7216 = { .set_clock = sdhci_brcmstb_set_clock, .set_bus_width = sdhci_set_bus_width, - .reset = sdhci_reset, + .reset = brcmstb_reset, .set_uhs_signaling = sdhci_brcmstb_set_uhs_signaling, }; @@ -147,6 +171,7 @@ static struct brcmstb_match_priv match_priv_7445 = { }; static const struct brcmstb_match_priv match_priv_7216 = { + .flags = BRCMSTB_MATCH_FLAGS_HAS_CLOCK_GATE, .hs400es = sdhci_brcmstb_hs400es, .ops = &sdhci_brcmstb_ops_7216, }; @@ -273,6 +298,14 @@ static int sdhci_brcmstb_probe(struct platform_device *pdev) if (res) goto err; + /* + * Automatic clock gating does not work for SD cards that may + * voltage switch so only enable it for non-removable devices. + */ + if ((match_priv->flags & BRCMSTB_MATCH_FLAGS_HAS_CLOCK_GATE) && + (host->mmc->caps & MMC_CAP_NONREMOVABLE)) + priv->flags |= BRCMSTB_PRIV_FLAGS_GATE_CLOCK; + /* * If the chip has enhanced strobe and it's enabled, add * callback From 57112da86b1bf8c87540ce82b6be6c99d1d50b23 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 26 Oct 2022 12:42:05 -0700 Subject: [PATCH 1033/1477] mmc: sdhci-brcmstb: Fix SDHCI_RESET_ALL for CQHCI [ Upstream commit 56baa208f91061ff27ec2d93fbc483f624d373b4 ] [[ NOTE: this is completely untested by the author, but included solely because, as noted in commit df57d73276b8 ("mmc: sdhci-pci: Fix SDHCI_RESET_ALL for CQHCI for Intel GLK-based controllers"), "other drivers using CQHCI might benefit from a similar change, if they also have CQHCI reset by SDHCI_RESET_ALL." We've now seen the same bug on at least MSM, Arasan, and Intel hardware. ]] SDHCI_RESET_ALL resets will reset the hardware CQE state, but we aren't tracking that properly in software. When out of sync, we may trigger various timeouts. It's not typical to perform resets while CQE is enabled, but this may occur in some suspend or error recovery scenarios. Include this fix by way of the new sdhci_and_cqhci_reset() helper. I only patch the bcm7216 variant even though others potentially *could* provide the 'supports-cqe' property (and thus enable CQHCI), because d46ba2d17f90 ("mmc: sdhci-brcmstb: Add support for Command Queuing (CQE)") and some Broadcom folks confirm that only the 7216 variant actually supports it. This patch depends on (and should not compile without) the patch entitled "mmc: cqhci: Provide helper for resetting both SDHCI and CQHCI". Fixes: d46ba2d17f90 ("mmc: sdhci-brcmstb: Add support for Command Queuing (CQE)") Signed-off-by: Brian Norris Reviewed-by: Florian Fainelli Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20221026124150.v4.3.I6a715feab6d01f760455865e968ecf0d85036018@changeid Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/mmc/host/sdhci-brcmstb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-brcmstb.c b/drivers/mmc/host/sdhci-brcmstb.c index 683d0c685748..4d42b1810ace 100644 --- a/drivers/mmc/host/sdhci-brcmstb.c +++ b/drivers/mmc/host/sdhci-brcmstb.c @@ -12,6 +12,7 @@ #include #include +#include "sdhci-cqhci.h" #include "sdhci-pltfm.h" #include "cqhci.h" @@ -53,7 +54,7 @@ void brcmstb_reset(struct sdhci_host *host, u8 mask) struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct sdhci_brcmstb_priv *priv = sdhci_pltfm_priv(pltfm_host); - sdhci_reset(host, mask); + sdhci_and_cqhci_reset(host, mask); /* Reset will clear this, so re-enable it */ if (priv->flags & BRCMSTB_PRIV_FLAGS_GATE_CLOCK) From 8382cdf0ab5df84cae9603de927145a7f140b937 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Mon, 7 Dec 2020 11:32:18 +0100 Subject: [PATCH 1034/1477] usb: cdns3: Add support for DRD CDNSP [ Upstream commit db8892bb1bb64b6e3d1381ac342a2ee31e1b76b6 ] Patch adds support for Cadence DRD Super Speed Plus controller(CDNSP). CDNSP DRD is a part of Cadence CDNSP controller. The DRD CDNSP controller has a lot of difference on hardware level but on software level is quite compatible with CDNS3 DRD. For this reason CDNS3 DRD part of CDNS3 driver was reused for CDNSP driver. Signed-off-by: Pawel Laszczak Tested-by: Aswath Govindraju Signed-off-by: Peter Chen Stable-dep-of: 9d5333c93134 ("usb: cdns3: host: fix endless superspeed hub port reset") Signed-off-by: Sasha Levin --- drivers/usb/cdns3/core.c | 24 ++++++--- drivers/usb/cdns3/core.h | 5 ++ drivers/usb/cdns3/drd.c | 103 +++++++++++++++++++++++++++------------ drivers/usb/cdns3/drd.h | 67 ++++++++++++++++++++----- 4 files changed, 149 insertions(+), 50 deletions(-) diff --git a/drivers/usb/cdns3/core.c b/drivers/usb/cdns3/core.c index 6eeb7ed8e91f..8fe7420de033 100644 --- a/drivers/usb/cdns3/core.c +++ b/drivers/usb/cdns3/core.c @@ -97,13 +97,23 @@ static int cdns3_core_init_role(struct cdns3 *cdns) * can be restricted later depending on strap pin configuration. */ if (dr_mode == USB_DR_MODE_UNKNOWN) { - if (IS_ENABLED(CONFIG_USB_CDNS3_HOST) && - IS_ENABLED(CONFIG_USB_CDNS3_GADGET)) - dr_mode = USB_DR_MODE_OTG; - else if (IS_ENABLED(CONFIG_USB_CDNS3_HOST)) - dr_mode = USB_DR_MODE_HOST; - else if (IS_ENABLED(CONFIG_USB_CDNS3_GADGET)) - dr_mode = USB_DR_MODE_PERIPHERAL; + if (cdns->version == CDNSP_CONTROLLER_V2) { + if (IS_ENABLED(CONFIG_USB_CDNSP_HOST) && + IS_ENABLED(CONFIG_USB_CDNSP_GADGET)) + dr_mode = USB_DR_MODE_OTG; + else if (IS_ENABLED(CONFIG_USB_CDNSP_HOST)) + dr_mode = USB_DR_MODE_HOST; + else if (IS_ENABLED(CONFIG_USB_CDNSP_GADGET)) + dr_mode = USB_DR_MODE_PERIPHERAL; + } else { + if (IS_ENABLED(CONFIG_USB_CDNS3_HOST) && + IS_ENABLED(CONFIG_USB_CDNS3_GADGET)) + dr_mode = USB_DR_MODE_OTG; + else if (IS_ENABLED(CONFIG_USB_CDNS3_HOST)) + dr_mode = USB_DR_MODE_HOST; + else if (IS_ENABLED(CONFIG_USB_CDNS3_GADGET)) + dr_mode = USB_DR_MODE_PERIPHERAL; + } } /* diff --git a/drivers/usb/cdns3/core.h b/drivers/usb/cdns3/core.h index 3176f924293a..0d87871499ea 100644 --- a/drivers/usb/cdns3/core.h +++ b/drivers/usb/cdns3/core.h @@ -55,7 +55,9 @@ struct cdns3_platform_data { * @otg_res: the resource for otg * @otg_v0_regs: pointer to base of v0 otg registers * @otg_v1_regs: pointer to base of v1 otg registers + * @otg_cdnsp_regs: pointer to base of CDNSP otg registers * @otg_regs: pointer to base of otg registers + * @otg_irq_regs: pointer to interrupt registers * @otg_irq: irq number for otg controller * @dev_irq: irq number for device controller * @wakeup_irq: irq number for wakeup event, it is optional @@ -86,9 +88,12 @@ struct cdns3 { struct resource otg_res; struct cdns3_otg_legacy_regs *otg_v0_regs; struct cdns3_otg_regs *otg_v1_regs; + struct cdnsp_otg_regs *otg_cdnsp_regs; struct cdns3_otg_common_regs *otg_regs; + struct cdns3_otg_irq_regs *otg_irq_regs; #define CDNS3_CONTROLLER_V0 0 #define CDNS3_CONTROLLER_V1 1 +#define CDNSP_CONTROLLER_V2 2 u32 version; bool phyrst_a_enable; diff --git a/drivers/usb/cdns3/drd.c b/drivers/usb/cdns3/drd.c index 38ccd29e4cde..95863d44e3e0 100644 --- a/drivers/usb/cdns3/drd.c +++ b/drivers/usb/cdns3/drd.c @@ -2,13 +2,12 @@ /* * Cadence USBSS DRD Driver. * - * Copyright (C) 2018-2019 Cadence. + * Copyright (C) 2018-2020 Cadence. * Copyright (C) 2019 Texas Instruments * * Author: Pawel Laszczak * Roger Quadros * - * */ #include #include @@ -28,8 +27,9 @@ * * Returns 0 on success otherwise negative errno */ -int cdns3_set_mode(struct cdns3 *cdns, enum usb_dr_mode mode) +static int cdns3_set_mode(struct cdns3 *cdns, enum usb_dr_mode mode) { + u32 __iomem *override_reg; u32 reg; switch (mode) { @@ -39,11 +39,24 @@ int cdns3_set_mode(struct cdns3 *cdns, enum usb_dr_mode mode) break; case USB_DR_MODE_OTG: dev_dbg(cdns->dev, "Set controller to OTG mode\n"); - if (cdns->version == CDNS3_CONTROLLER_V1) { - reg = readl(&cdns->otg_v1_regs->override); - reg |= OVERRIDE_IDPULLUP; - writel(reg, &cdns->otg_v1_regs->override); + if (cdns->version == CDNSP_CONTROLLER_V2) + override_reg = &cdns->otg_cdnsp_regs->override; + else if (cdns->version == CDNS3_CONTROLLER_V1) + override_reg = &cdns->otg_v1_regs->override; + else + override_reg = &cdns->otg_v0_regs->ctrl1; + + reg = readl(override_reg); + + if (cdns->version != CDNS3_CONTROLLER_V0) + reg |= OVERRIDE_IDPULLUP; + else + reg |= OVERRIDE_IDPULLUP_V0; + + writel(reg, override_reg); + + if (cdns->version == CDNS3_CONTROLLER_V1) { /* * Enable work around feature built into the * controller to address issue with RX Sensitivity @@ -55,10 +68,6 @@ int cdns3_set_mode(struct cdns3 *cdns, enum usb_dr_mode mode) reg |= PHYRST_CFG_PHYRST_A_ENABLE; writel(reg, &cdns->otg_v1_regs->phyrst_cfg); } - } else { - reg = readl(&cdns->otg_v0_regs->ctrl1); - reg |= OVERRIDE_IDPULLUP_V0; - writel(reg, &cdns->otg_v0_regs->ctrl1); } /* @@ -123,7 +132,7 @@ bool cdns3_is_device(struct cdns3 *cdns) */ static void cdns3_otg_disable_irq(struct cdns3 *cdns) { - writel(0, &cdns->otg_regs->ien); + writel(0, &cdns->otg_irq_regs->ien); } /** @@ -133,7 +142,7 @@ static void cdns3_otg_disable_irq(struct cdns3 *cdns) static void cdns3_otg_enable_irq(struct cdns3 *cdns) { writel(OTGIEN_ID_CHANGE_INT | OTGIEN_VBUSVALID_RISE_INT | - OTGIEN_VBUSVALID_FALL_INT, &cdns->otg_regs->ien); + OTGIEN_VBUSVALID_FALL_INT, &cdns->otg_irq_regs->ien); } /** @@ -144,16 +153,21 @@ static void cdns3_otg_enable_irq(struct cdns3 *cdns) */ int cdns3_drd_host_on(struct cdns3 *cdns) { - u32 val; + u32 val, ready_bit; int ret; /* Enable host mode. */ writel(OTGCMD_HOST_BUS_REQ | OTGCMD_OTG_DIS, &cdns->otg_regs->cmd); + if (cdns->version == CDNSP_CONTROLLER_V2) + ready_bit = OTGSTS_CDNSP_XHCI_READY; + else + ready_bit = OTGSTS_CDNS3_XHCI_READY; + dev_dbg(cdns->dev, "Waiting till Host mode is turned on\n"); ret = readl_poll_timeout_atomic(&cdns->otg_regs->sts, val, - val & OTGSTS_XHCI_READY, 1, 100000); + val & ready_bit, 1, 100000); if (ret) dev_err(cdns->dev, "timeout waiting for xhci_ready\n"); @@ -189,17 +203,22 @@ void cdns3_drd_host_off(struct cdns3 *cdns) */ int cdns3_drd_gadget_on(struct cdns3 *cdns) { - int ret, val; u32 reg = OTGCMD_OTG_DIS; + u32 ready_bit; + int ret, val; /* switch OTG core */ writel(OTGCMD_DEV_BUS_REQ | reg, &cdns->otg_regs->cmd); dev_dbg(cdns->dev, "Waiting till Device mode is turned on\n"); + if (cdns->version == CDNSP_CONTROLLER_V2) + ready_bit = OTGSTS_CDNSP_DEV_READY; + else + ready_bit = OTGSTS_CDNS3_DEV_READY; + ret = readl_poll_timeout_atomic(&cdns->otg_regs->sts, val, - val & OTGSTS_DEV_READY, - 1, 100000); + val & ready_bit, 1, 100000); if (ret) { dev_err(cdns->dev, "timeout waiting for dev_ready\n"); return ret; @@ -244,7 +263,7 @@ static int cdns3_init_otg_mode(struct cdns3 *cdns) cdns3_otg_disable_irq(cdns); /* clear all interrupts */ - writel(~0, &cdns->otg_regs->ivect); + writel(~0, &cdns->otg_irq_regs->ivect); ret = cdns3_set_mode(cdns, USB_DR_MODE_OTG); if (ret) @@ -313,7 +332,7 @@ static irqreturn_t cdns3_drd_irq(int irq, void *data) if (cdns->in_lpm) return ret; - reg = readl(&cdns->otg_regs->ivect); + reg = readl(&cdns->otg_irq_regs->ivect); if (!reg) return IRQ_NONE; @@ -332,7 +351,7 @@ static irqreturn_t cdns3_drd_irq(int irq, void *data) ret = IRQ_WAKE_THREAD; } - writel(~0, &cdns->otg_regs->ivect); + writel(~0, &cdns->otg_irq_regs->ivect); return ret; } @@ -347,28 +366,43 @@ int cdns3_drd_init(struct cdns3 *cdns) return PTR_ERR(regs); /* Detection of DRD version. Controller has been released - * in two versions. Both are similar, but they have same changes - * in register maps. - * The first register in old version is command register and it's read - * only, so driver should read 0 from it. On the other hand, in v1 - * the first register contains device ID number which is not set to 0. - * Driver uses this fact to detect the proper version of + * in three versions. All are very similar and are software compatible, + * but they have same changes in register maps. + * The first register in oldest version is command register and it's + * read only. Driver should read 0 from it. On the other hand, in v1 + * and v2 the first register contains device ID number which is not + * set to 0. Driver uses this fact to detect the proper version of * controller. */ cdns->otg_v0_regs = regs; if (!readl(&cdns->otg_v0_regs->cmd)) { cdns->version = CDNS3_CONTROLLER_V0; cdns->otg_v1_regs = NULL; + cdns->otg_cdnsp_regs = NULL; cdns->otg_regs = regs; + cdns->otg_irq_regs = (struct cdns3_otg_irq_regs *) + &cdns->otg_v0_regs->ien; writel(1, &cdns->otg_v0_regs->simulate); dev_dbg(cdns->dev, "DRD version v0 (%08x)\n", readl(&cdns->otg_v0_regs->version)); } else { cdns->otg_v0_regs = NULL; cdns->otg_v1_regs = regs; + cdns->otg_cdnsp_regs = regs; + cdns->otg_regs = (void *)&cdns->otg_v1_regs->cmd; - cdns->version = CDNS3_CONTROLLER_V1; - writel(1, &cdns->otg_v1_regs->simulate); + + if (cdns->otg_cdnsp_regs->did == OTG_CDNSP_DID) { + cdns->otg_irq_regs = (struct cdns3_otg_irq_regs *) + &cdns->otg_cdnsp_regs->ien; + cdns->version = CDNSP_CONTROLLER_V2; + } else { + cdns->otg_irq_regs = (struct cdns3_otg_irq_regs *) + &cdns->otg_v1_regs->ien; + writel(1, &cdns->otg_v1_regs->simulate); + cdns->version = CDNS3_CONTROLLER_V1; + } + dev_dbg(cdns->dev, "DRD version v1 (ID: %08x, rev: %08x)\n", readl(&cdns->otg_v1_regs->did), readl(&cdns->otg_v1_regs->rid)); @@ -378,10 +412,17 @@ int cdns3_drd_init(struct cdns3 *cdns) /* Update dr_mode according to STRAP configuration. */ cdns->dr_mode = USB_DR_MODE_OTG; - if (state == OTGSTS_STRAP_HOST) { + + if ((cdns->version == CDNSP_CONTROLLER_V2 && + state == OTGSTS_CDNSP_STRAP_HOST) || + (cdns->version != CDNSP_CONTROLLER_V2 && + state == OTGSTS_STRAP_HOST)) { dev_dbg(cdns->dev, "Controller strapped to HOST\n"); cdns->dr_mode = USB_DR_MODE_HOST; - } else if (state == OTGSTS_STRAP_GADGET) { + } else if ((cdns->version == CDNSP_CONTROLLER_V2 && + state == OTGSTS_CDNSP_STRAP_GADGET) || + (cdns->version != CDNSP_CONTROLLER_V2 && + state == OTGSTS_STRAP_GADGET)) { dev_dbg(cdns->dev, "Controller strapped to PERIPHERAL\n"); cdns->dr_mode = USB_DR_MODE_PERIPHERAL; } diff --git a/drivers/usb/cdns3/drd.h b/drivers/usb/cdns3/drd.h index f1ccae285a16..a767b6893938 100644 --- a/drivers/usb/cdns3/drd.h +++ b/drivers/usb/cdns3/drd.h @@ -1,8 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Cadence USB3 DRD header file. + * Cadence USB3 and USBSSP DRD header file. * - * Copyright (C) 2018-2019 Cadence. + * Copyright (C) 2018-2020 Cadence. * * Author: Pawel Laszczak */ @@ -13,7 +13,7 @@ #include #include "core.h" -/* DRD register interface for version v1. */ +/* DRD register interface for version v1 of cdns3 driver. */ struct cdns3_otg_regs { __le32 did; __le32 rid; @@ -38,7 +38,7 @@ struct cdns3_otg_regs { __le32 ctrl2; }; -/* DRD register interface for version v0. */ +/* DRD register interface for version v0 of cdns3 driver. */ struct cdns3_otg_legacy_regs { __le32 cmd; __le32 sts; @@ -57,14 +57,45 @@ struct cdns3_otg_legacy_regs { __le32 ctrl1; }; +/* DRD register interface for cdnsp driver */ +struct cdnsp_otg_regs { + __le32 did; + __le32 rid; + __le32 cfgs1; + __le32 cfgs2; + __le32 cmd; + __le32 sts; + __le32 state; + __le32 ien; + __le32 ivect; + __le32 tmr; + __le32 simulate; + __le32 adpbc_sts; + __le32 adp_ramp_time; + __le32 adpbc_ctrl1; + __le32 adpbc_ctrl2; + __le32 override; + __le32 vbusvalid_dbnc_cfg; + __le32 sessvalid_dbnc_cfg; + __le32 susp_timing_ctrl; +}; + +#define OTG_CDNSP_DID 0x0004034E + /* - * Common registers interface for both version of DRD. + * Common registers interface for both CDNS3 and CDNSP version of DRD. */ struct cdns3_otg_common_regs { __le32 cmd; __le32 sts; __le32 state; - __le32 different1; +}; + +/* + * Interrupt related registers. This registers are mapped in different + * location for CDNSP controller. + */ +struct cdns3_otg_irq_regs { __le32 ien; __le32 ivect; }; @@ -92,9 +123,9 @@ struct cdns3_otg_common_regs { #define OTGCMD_DEV_BUS_DROP BIT(8) /* Drop the bus for Host mode*/ #define OTGCMD_HOST_BUS_DROP BIT(9) -/* Power Down USBSS-DEV. */ +/* Power Down USBSS-DEV - only for CDNS3.*/ #define OTGCMD_DEV_POWER_OFF BIT(11) -/* Power Down CDNSXHCI. */ +/* Power Down CDNSXHCI - only for CDNS3. */ #define OTGCMD_HOST_POWER_OFF BIT(12) /* OTGIEN - bitmasks */ @@ -123,20 +154,31 @@ struct cdns3_otg_common_regs { #define OTGSTS_OTG_NRDY_MASK BIT(11) #define OTGSTS_OTG_NRDY(p) ((p) & OTGSTS_OTG_NRDY_MASK) /* - * Value of the strap pins. + * Value of the strap pins for: + * CDNS3: * 000 - no default configuration * 010 - Controller initiall configured as Host * 100 - Controller initially configured as Device + * CDNSP: + * 000 - No default configuration. + * 010 - Controller initiall configured as Host. + * 100 - Controller initially configured as Device. */ #define OTGSTS_STRAP(p) (((p) & GENMASK(14, 12)) >> 12) #define OTGSTS_STRAP_NO_DEFAULT_CFG 0x00 #define OTGSTS_STRAP_HOST_OTG 0x01 #define OTGSTS_STRAP_HOST 0x02 #define OTGSTS_STRAP_GADGET 0x04 +#define OTGSTS_CDNSP_STRAP_HOST 0x01 +#define OTGSTS_CDNSP_STRAP_GADGET 0x02 + /* Host mode is turned on. */ -#define OTGSTS_XHCI_READY BIT(26) +#define OTGSTS_CDNS3_XHCI_READY BIT(26) +#define OTGSTS_CDNSP_XHCI_READY BIT(27) + /* "Device mode is turned on .*/ -#define OTGSTS_DEV_READY BIT(27) +#define OTGSTS_CDNS3_DEV_READY BIT(27) +#define OTGSTS_CDNSP_DEV_READY BIT(26) /* OTGSTATE- bitmasks */ #define OTGSTATE_DEV_STATE_MASK GENMASK(2, 0) @@ -152,6 +194,8 @@ struct cdns3_otg_common_regs { #define OVERRIDE_IDPULLUP BIT(0) /* Only for CDNS3_CONTROLLER_V0 version */ #define OVERRIDE_IDPULLUP_V0 BIT(24) +/* Vbusvalid/Sesvalid override select. */ +#define OVERRIDE_SESS_VLD_SEL BIT(10) /* PHYRST_CFG - bitmasks */ #define PHYRST_CFG_PHYRST_A_ENABLE BIT(0) @@ -170,6 +214,5 @@ int cdns3_drd_gadget_on(struct cdns3 *cdns); void cdns3_drd_gadget_off(struct cdns3 *cdns); int cdns3_drd_host_on(struct cdns3 *cdns); void cdns3_drd_host_off(struct cdns3 *cdns); -int cdns3_set_mode(struct cdns3 *cdns, enum usb_dr_mode mode); #endif /* __LINUX_CDNS3_DRD */ From 9ac038d3c2f2032d0849f2354236ee1603da034b Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Mon, 5 Jul 2021 09:22:54 +0800 Subject: [PATCH 1035/1477] ceph: make ceph_create_session_msg a global symbol [ Upstream commit fba97e8025015b63b1bdb73cd868c8ea832a1620 ] Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov Stable-dep-of: 5bd76b8de5b7 ("ceph: fix NULL pointer dereference for req->r_session") Signed-off-by: Sasha Levin --- fs/ceph/mds_client.c | 16 +++++++++------- fs/ceph/mds_client.h | 1 + 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 6859967df2b1..36cf3638f501 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1157,7 +1157,7 @@ random: /* * session messages */ -static struct ceph_msg *create_session_msg(u32 op, u64 seq) +struct ceph_msg *ceph_create_session_msg(u32 op, u64 seq) { struct ceph_msg *msg; struct ceph_mds_session_head *h; @@ -1165,7 +1165,8 @@ static struct ceph_msg *create_session_msg(u32 op, u64 seq) msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h), GFP_NOFS, false); if (!msg) { - pr_err("create_session_msg ENOMEM creating msg\n"); + pr_err("ENOMEM creating session %s msg\n", + ceph_session_op_name(op)); return NULL; } h = msg->front.iov_base; @@ -1299,7 +1300,7 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h) + extra_bytes, GFP_NOFS, false); if (!msg) { - pr_err("create_session_msg ENOMEM creating msg\n"); + pr_err("ENOMEM creating session open msg\n"); return ERR_PTR(-ENOMEM); } p = msg->front.iov_base; @@ -1833,8 +1834,8 @@ static int send_renew_caps(struct ceph_mds_client *mdsc, dout("send_renew_caps to mds%d (%s)\n", session->s_mds, ceph_mds_state_name(state)); - msg = create_session_msg(CEPH_SESSION_REQUEST_RENEWCAPS, - ++session->s_renew_seq); + msg = ceph_create_session_msg(CEPH_SESSION_REQUEST_RENEWCAPS, + ++session->s_renew_seq); if (!msg) return -ENOMEM; ceph_con_send(&session->s_con, msg); @@ -1848,7 +1849,7 @@ static int send_flushmsg_ack(struct ceph_mds_client *mdsc, dout("send_flushmsg_ack to mds%d (%s)s seq %lld\n", session->s_mds, ceph_session_state_name(session->s_state), seq); - msg = create_session_msg(CEPH_SESSION_FLUSHMSG_ACK, seq); + msg = ceph_create_session_msg(CEPH_SESSION_FLUSHMSG_ACK, seq); if (!msg) return -ENOMEM; ceph_con_send(&session->s_con, msg); @@ -1900,7 +1901,8 @@ static int request_close_session(struct ceph_mds_session *session) dout("request_close_session mds%d state %s seq %lld\n", session->s_mds, ceph_session_state_name(session->s_state), session->s_seq); - msg = create_session_msg(CEPH_SESSION_REQUEST_CLOSE, session->s_seq); + msg = ceph_create_session_msg(CEPH_SESSION_REQUEST_CLOSE, + session->s_seq); if (!msg) return -ENOMEM; ceph_con_send(&session->s_con, msg); diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index acf33d7192bb..c0cff765cbf5 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -518,6 +518,7 @@ static inline void ceph_mdsc_put_request(struct ceph_mds_request *req) kref_put(&req->r_kref, ceph_mdsc_release_request); } +extern struct ceph_msg *ceph_create_session_msg(u32 op, u64 seq); extern void __ceph_queue_cap_release(struct ceph_mds_session *session, struct ceph_cap *cap); extern void ceph_flush_cap_releases(struct ceph_mds_client *mdsc, From d94ba7b3b7e78c6b5ceac35dff0e0a53a2b1a228 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Mon, 5 Jul 2021 09:22:55 +0800 Subject: [PATCH 1036/1477] ceph: make iterate_sessions a global symbol [ Upstream commit 59b312f36230ea91ebb6ce1b11f2781604495d30 ] Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov Stable-dep-of: 5bd76b8de5b7 ("ceph: fix NULL pointer dereference for req->r_session") Signed-off-by: Sasha Levin --- fs/ceph/caps.c | 26 +---------------------- fs/ceph/mds_client.c | 49 +++++++++++++++++++++++++++++--------------- fs/ceph/mds_client.h | 3 +++ 3 files changed, 36 insertions(+), 42 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 76e43a487bc6..7ae27a18cf18 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -4310,33 +4310,9 @@ static void flush_dirty_session_caps(struct ceph_mds_session *s) dout("flush_dirty_caps done\n"); } -static void iterate_sessions(struct ceph_mds_client *mdsc, - void (*cb)(struct ceph_mds_session *)) -{ - int mds; - - mutex_lock(&mdsc->mutex); - for (mds = 0; mds < mdsc->max_sessions; ++mds) { - struct ceph_mds_session *s; - - if (!mdsc->sessions[mds]) - continue; - - s = ceph_get_mds_session(mdsc->sessions[mds]); - if (!s) - continue; - - mutex_unlock(&mdsc->mutex); - cb(s); - ceph_put_mds_session(s); - mutex_lock(&mdsc->mutex); - } - mutex_unlock(&mdsc->mutex); -} - void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc) { - iterate_sessions(mdsc, flush_dirty_session_caps); + ceph_mdsc_iterate_sessions(mdsc, flush_dirty_session_caps, true); } void __ceph_touch_fmode(struct ceph_inode_info *ci, diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 36cf3638f501..45587b3025e4 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -809,6 +809,33 @@ static void put_request_session(struct ceph_mds_request *req) } } +void ceph_mdsc_iterate_sessions(struct ceph_mds_client *mdsc, + void (*cb)(struct ceph_mds_session *), + bool check_state) +{ + int mds; + + mutex_lock(&mdsc->mutex); + for (mds = 0; mds < mdsc->max_sessions; ++mds) { + struct ceph_mds_session *s; + + s = __ceph_lookup_mds_session(mdsc, mds); + if (!s) + continue; + + if (check_state && !check_session_state(s)) { + ceph_put_mds_session(s); + continue; + } + + mutex_unlock(&mdsc->mutex); + cb(s); + ceph_put_mds_session(s); + mutex_lock(&mdsc->mutex); + } + mutex_unlock(&mdsc->mutex); +} + void ceph_mdsc_release_request(struct kref *kref) { struct ceph_mds_request *req = container_of(kref, @@ -4377,24 +4404,12 @@ void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session, } /* - * lock unlock sessions, to wait ongoing session activities + * lock unlock the session, to wait ongoing session activities */ -static void lock_unlock_sessions(struct ceph_mds_client *mdsc) +static void lock_unlock_session(struct ceph_mds_session *s) { - int i; - - mutex_lock(&mdsc->mutex); - for (i = 0; i < mdsc->max_sessions; i++) { - struct ceph_mds_session *s = __ceph_lookup_mds_session(mdsc, i); - if (!s) - continue; - mutex_unlock(&mdsc->mutex); - mutex_lock(&s->s_mutex); - mutex_unlock(&s->s_mutex); - ceph_put_mds_session(s); - mutex_lock(&mdsc->mutex); - } - mutex_unlock(&mdsc->mutex); + mutex_lock(&s->s_mutex); + mutex_unlock(&s->s_mutex); } static void maybe_recover_session(struct ceph_mds_client *mdsc) @@ -4658,7 +4673,7 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc) dout("pre_umount\n"); mdsc->stopping = 1; - lock_unlock_sessions(mdsc); + ceph_mdsc_iterate_sessions(mdsc, lock_unlock_session, false); ceph_flush_dirty_caps(mdsc); wait_requests(mdsc); diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index c0cff765cbf5..88fc80832016 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -518,6 +518,9 @@ static inline void ceph_mdsc_put_request(struct ceph_mds_request *req) kref_put(&req->r_kref, ceph_mdsc_release_request); } +extern void ceph_mdsc_iterate_sessions(struct ceph_mds_client *mdsc, + void (*cb)(struct ceph_mds_session *), + bool check_state); extern struct ceph_msg *ceph_create_session_msg(u32 op, u64 seq); extern void __ceph_queue_cap_release(struct ceph_mds_session *session, struct ceph_cap *cap); From 78b2f546f789d33ac951921adb61873462a74025 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Mon, 5 Jul 2021 09:22:56 +0800 Subject: [PATCH 1037/1477] ceph: flush mdlog before umounting [ Upstream commit d095559ce4100f0c02aea229705230deac329c97 ] Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov Stable-dep-of: 5bd76b8de5b7 ("ceph: fix NULL pointer dereference for req->r_session") Signed-off-by: Sasha Levin --- fs/ceph/mds_client.c | 25 +++++++++++++++++++++++++ fs/ceph/mds_client.h | 1 + fs/ceph/strings.c | 1 + include/linux/ceph/ceph_fs.h | 1 + 4 files changed, 28 insertions(+) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 45587b3025e4..fa51872ff850 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -4664,6 +4664,30 @@ static void wait_requests(struct ceph_mds_client *mdsc) dout("wait_requests done\n"); } +void send_flush_mdlog(struct ceph_mds_session *s) +{ + struct ceph_msg *msg; + + /* + * Pre-luminous MDS crashes when it sees an unknown session request + */ + if (!CEPH_HAVE_FEATURE(s->s_con.peer_features, SERVER_LUMINOUS)) + return; + + mutex_lock(&s->s_mutex); + dout("request mdlog flush to mds%d (%s)s seq %lld\n", s->s_mds, + ceph_session_state_name(s->s_state), s->s_seq); + msg = ceph_create_session_msg(CEPH_SESSION_REQUEST_FLUSH_MDLOG, + s->s_seq); + if (!msg) { + pr_err("failed to request mdlog flush to mds%d (%s) seq %lld\n", + s->s_mds, ceph_session_state_name(s->s_state), s->s_seq); + } else { + ceph_con_send(&s->s_con, msg); + } + mutex_unlock(&s->s_mutex); +} + /* * called before mount is ro, and before dentries are torn down. * (hmm, does this still race with new lookups?) @@ -4673,6 +4697,7 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc) dout("pre_umount\n"); mdsc->stopping = 1; + ceph_mdsc_iterate_sessions(mdsc, send_flush_mdlog, true); ceph_mdsc_iterate_sessions(mdsc, lock_unlock_session, false); ceph_flush_dirty_caps(mdsc); wait_requests(mdsc); diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 88fc80832016..a92e42e8a9f8 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -518,6 +518,7 @@ static inline void ceph_mdsc_put_request(struct ceph_mds_request *req) kref_put(&req->r_kref, ceph_mdsc_release_request); } +extern void send_flush_mdlog(struct ceph_mds_session *s); extern void ceph_mdsc_iterate_sessions(struct ceph_mds_client *mdsc, void (*cb)(struct ceph_mds_session *), bool check_state); diff --git a/fs/ceph/strings.c b/fs/ceph/strings.c index 4a79f3632260..573bb9556fb5 100644 --- a/fs/ceph/strings.c +++ b/fs/ceph/strings.c @@ -46,6 +46,7 @@ const char *ceph_session_op_name(int op) case CEPH_SESSION_FLUSHMSG_ACK: return "flushmsg_ack"; case CEPH_SESSION_FORCE_RO: return "force_ro"; case CEPH_SESSION_REJECT: return "reject"; + case CEPH_SESSION_REQUEST_FLUSH_MDLOG: return "flush_mdlog"; } return "???"; } diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 455e9b9e2adf..8287382d3d1d 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -288,6 +288,7 @@ enum { CEPH_SESSION_FLUSHMSG_ACK, CEPH_SESSION_FORCE_RO, CEPH_SESSION_REJECT, + CEPH_SESSION_REQUEST_FLUSH_MDLOG, }; extern const char *ceph_session_op_name(int op); From 8a31ae7f77943b390f8d2002751c3a6e136b0bad Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Mon, 5 Jul 2021 09:22:57 +0800 Subject: [PATCH 1038/1477] ceph: flush the mdlog before waiting on unsafe reqs [ Upstream commit e1a4541ec0b951685a49d1f72d183681e6433a45 ] For the client requests who will have unsafe and safe replies from MDS daemons, in the MDS side the MDS daemons won't flush the mdlog (journal log) immediatelly, because they think it's unnecessary. That's true for most cases but not all, likes the fsync request. The fsync will wait until all the unsafe replied requests to be safely replied. Normally if there have multiple threads or clients are running, the whole mdlog in MDS daemons could be flushed in time if any request will trigger the mdlog submit thread. So usually we won't experience the normal operations will stuck for a long time. But in case there has only one client with only thread is running, the stuck phenomenon maybe obvious and the worst case it must wait at most 5 seconds to wait the mdlog to be flushed by the MDS's tick thread periodically. This patch will trigger to flush the mdlog in the relevant and auth MDSes to which the in-flight requests are sent just before waiting the unsafe requests to finish. Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov Stable-dep-of: 5bd76b8de5b7 ("ceph: fix NULL pointer dereference for req->r_session") Signed-off-by: Sasha Levin --- fs/ceph/caps.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 7ae27a18cf18..2fa6b7cc0cc4 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2294,6 +2294,7 @@ static int caps_are_flushed(struct inode *inode, u64 flush_tid) */ static int unsafe_request_wait(struct inode *inode) { + struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_request *req1 = NULL, *req2 = NULL; int ret, err = 0; @@ -2313,6 +2314,81 @@ static int unsafe_request_wait(struct inode *inode) } spin_unlock(&ci->i_unsafe_lock); + /* + * Trigger to flush the journal logs in all the relevant MDSes + * manually, or in the worst case we must wait at most 5 seconds + * to wait the journal logs to be flushed by the MDSes periodically. + */ + if (req1 || req2) { + struct ceph_mds_session **sessions = NULL; + struct ceph_mds_session *s; + struct ceph_mds_request *req; + unsigned int max; + int i; + + /* + * The mdsc->max_sessions is unlikely to be changed + * mostly, here we will retry it by reallocating the + * sessions arrary memory to get rid of the mdsc->mutex + * lock. + */ +retry: + max = mdsc->max_sessions; + sessions = krealloc(sessions, max * sizeof(s), __GFP_ZERO); + if (!sessions) + return -ENOMEM; + + spin_lock(&ci->i_unsafe_lock); + if (req1) { + list_for_each_entry(req, &ci->i_unsafe_dirops, + r_unsafe_dir_item) { + s = req->r_session; + if (unlikely(s->s_mds > max)) { + spin_unlock(&ci->i_unsafe_lock); + goto retry; + } + if (!sessions[s->s_mds]) { + s = ceph_get_mds_session(s); + sessions[s->s_mds] = s; + } + } + } + if (req2) { + list_for_each_entry(req, &ci->i_unsafe_iops, + r_unsafe_target_item) { + s = req->r_session; + if (unlikely(s->s_mds > max)) { + spin_unlock(&ci->i_unsafe_lock); + goto retry; + } + if (!sessions[s->s_mds]) { + s = ceph_get_mds_session(s); + sessions[s->s_mds] = s; + } + } + } + spin_unlock(&ci->i_unsafe_lock); + + /* the auth MDS */ + spin_lock(&ci->i_ceph_lock); + if (ci->i_auth_cap) { + s = ci->i_auth_cap->session; + if (!sessions[s->s_mds]) + sessions[s->s_mds] = ceph_get_mds_session(s); + } + spin_unlock(&ci->i_ceph_lock); + + /* send flush mdlog request to MDSes */ + for (i = 0; i < max; i++) { + s = sessions[i]; + if (s) { + send_flush_mdlog(s); + ceph_put_mds_session(s); + } + } + kfree(sessions); + } + dout("unsafe_request_wait %p wait on tid %llu %llu\n", inode, req1 ? req1->r_tid : 0ULL, req2 ? req2->r_tid : 0ULL); if (req1) { From 38993788f40c78c64ff68aa68877ca6cbeac05a1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 6 Sep 2021 12:43:01 +0300 Subject: [PATCH 1039/1477] ceph: fix off by one bugs in unsafe_request_wait() [ Upstream commit 708c87168b6121abc74b2a57d0c498baaf70cbea ] The "> max" tests should be ">= max" to prevent an out of bounds access on the next lines. Fixes: e1a4541ec0b9 ("ceph: flush the mdlog before waiting on unsafe reqs") Signed-off-by: Dan Carpenter Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov Stable-dep-of: 5bd76b8de5b7 ("ceph: fix NULL pointer dereference for req->r_session") Signed-off-by: Sasha Levin --- fs/ceph/caps.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 2fa6b7cc0cc4..f14d52848b91 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2343,7 +2343,7 @@ retry: list_for_each_entry(req, &ci->i_unsafe_dirops, r_unsafe_dir_item) { s = req->r_session; - if (unlikely(s->s_mds > max)) { + if (unlikely(s->s_mds >= max)) { spin_unlock(&ci->i_unsafe_lock); goto retry; } @@ -2357,7 +2357,7 @@ retry: list_for_each_entry(req, &ci->i_unsafe_iops, r_unsafe_target_item) { s = req->r_session; - if (unlikely(s->s_mds > max)) { + if (unlikely(s->s_mds >= max)) { spin_unlock(&ci->i_unsafe_lock); goto retry; } From 8e137ace53339eae14ced15143f95194a14999fd Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Wed, 12 Jan 2022 12:29:04 +0800 Subject: [PATCH 1040/1477] ceph: put the requests/sessions when it fails to alloc memory [ Upstream commit 89d43d0551a848e70e63d9ba11534aaeabc82443 ] When failing to allocate the sessions memory we should make sure the req1 and req2 and the sessions get put. And also in case the max_sessions decreased so when kreallocate the new memory some sessions maybe missed being put. And if the max_sessions is 0 krealloc will return ZERO_SIZE_PTR, which will lead to a distinct access fault. URL: https://tracker.ceph.com/issues/53819 Fixes: e1a4541ec0b9 ("ceph: flush the mdlog before waiting on unsafe reqs") Signed-off-by: Xiubo Li Reviewed-by: Venky Shankar Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov Stable-dep-of: 5bd76b8de5b7 ("ceph: fix NULL pointer dereference for req->r_session") Signed-off-by: Sasha Levin --- fs/ceph/caps.c | 55 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 37 insertions(+), 18 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index f14d52848b91..4e2fada35808 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2297,6 +2297,7 @@ static int unsafe_request_wait(struct inode *inode) struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_request *req1 = NULL, *req2 = NULL; + unsigned int max_sessions; int ret, err = 0; spin_lock(&ci->i_unsafe_lock); @@ -2314,37 +2315,45 @@ static int unsafe_request_wait(struct inode *inode) } spin_unlock(&ci->i_unsafe_lock); + /* + * The mdsc->max_sessions is unlikely to be changed + * mostly, here we will retry it by reallocating the + * sessions array memory to get rid of the mdsc->mutex + * lock. + */ +retry: + max_sessions = mdsc->max_sessions; + /* * Trigger to flush the journal logs in all the relevant MDSes * manually, or in the worst case we must wait at most 5 seconds * to wait the journal logs to be flushed by the MDSes periodically. */ - if (req1 || req2) { + if ((req1 || req2) && likely(max_sessions)) { struct ceph_mds_session **sessions = NULL; struct ceph_mds_session *s; struct ceph_mds_request *req; - unsigned int max; int i; - /* - * The mdsc->max_sessions is unlikely to be changed - * mostly, here we will retry it by reallocating the - * sessions arrary memory to get rid of the mdsc->mutex - * lock. - */ -retry: - max = mdsc->max_sessions; - sessions = krealloc(sessions, max * sizeof(s), __GFP_ZERO); - if (!sessions) - return -ENOMEM; + sessions = kzalloc(max_sessions * sizeof(s), GFP_KERNEL); + if (!sessions) { + err = -ENOMEM; + goto out; + } spin_lock(&ci->i_unsafe_lock); if (req1) { list_for_each_entry(req, &ci->i_unsafe_dirops, r_unsafe_dir_item) { s = req->r_session; - if (unlikely(s->s_mds >= max)) { + if (unlikely(s->s_mds >= max_sessions)) { spin_unlock(&ci->i_unsafe_lock); + for (i = 0; i < max_sessions; i++) { + s = sessions[i]; + if (s) + ceph_put_mds_session(s); + } + kfree(sessions); goto retry; } if (!sessions[s->s_mds]) { @@ -2357,8 +2366,14 @@ retry: list_for_each_entry(req, &ci->i_unsafe_iops, r_unsafe_target_item) { s = req->r_session; - if (unlikely(s->s_mds >= max)) { + if (unlikely(s->s_mds >= max_sessions)) { spin_unlock(&ci->i_unsafe_lock); + for (i = 0; i < max_sessions; i++) { + s = sessions[i]; + if (s) + ceph_put_mds_session(s); + } + kfree(sessions); goto retry; } if (!sessions[s->s_mds]) { @@ -2379,7 +2394,7 @@ retry: spin_unlock(&ci->i_ceph_lock); /* send flush mdlog request to MDSes */ - for (i = 0; i < max; i++) { + for (i = 0; i < max_sessions; i++) { s = sessions[i]; if (s) { send_flush_mdlog(s); @@ -2396,15 +2411,19 @@ retry: ceph_timeout_jiffies(req1->r_timeout)); if (ret) err = -EIO; - ceph_mdsc_put_request(req1); } if (req2) { ret = !wait_for_completion_timeout(&req2->r_safe_completion, ceph_timeout_jiffies(req2->r_timeout)); if (ret) err = -EIO; - ceph_mdsc_put_request(req2); } + +out: + if (req1) + ceph_mdsc_put_request(req1); + if (req2) + ceph_mdsc_put_request(req2); return err; } From 69263bf781bef3841f22815b834a5243474aa6d4 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 14 Apr 2022 09:07:21 +0800 Subject: [PATCH 1041/1477] ceph: fix possible NULL pointer dereference for req->r_session [ Upstream commit 7acae6183cf37c48b8da48bbbdb78820fb3913f3 ] The request will be inserted into the ci->i_unsafe_dirops before assigning the req->r_session, so it's possible that we will hit NULL pointer dereference bug here. Cc: stable@vger.kernel.org URL: https://tracker.ceph.com/issues/55327 Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Tested-by: Aaron Tomlin Signed-off-by: Ilya Dryomov Stable-dep-of: 5bd76b8de5b7 ("ceph: fix NULL pointer dereference for req->r_session") Signed-off-by: Sasha Levin --- fs/ceph/caps.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 4e2fada35808..ce6a858e765a 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2346,6 +2346,8 @@ retry: list_for_each_entry(req, &ci->i_unsafe_dirops, r_unsafe_dir_item) { s = req->r_session; + if (!s) + continue; if (unlikely(s->s_mds >= max_sessions)) { spin_unlock(&ci->i_unsafe_lock); for (i = 0; i < max_sessions; i++) { @@ -2366,6 +2368,8 @@ retry: list_for_each_entry(req, &ci->i_unsafe_iops, r_unsafe_target_item) { s = req->r_session; + if (!s) + continue; if (unlikely(s->s_mds >= max_sessions)) { spin_unlock(&ci->i_unsafe_lock); for (i = 0; i < max_sessions; i++) { From 00c004c070f22a47063de4ff2a5638fce1f94d95 Mon Sep 17 00:00:00 2001 From: Kenneth Lee Date: Thu, 18 Aug 2022 22:42:55 -0700 Subject: [PATCH 1042/1477] ceph: Use kcalloc for allocating multiple elements [ Upstream commit aa1d627207cace003163dee24d1c06fa4e910c6b ] Prefer using kcalloc(a, b) over kzalloc(a * b) as this improves semantics since kcalloc is intended for allocating an array of memory. Signed-off-by: Kenneth Lee Reviewed-by: Xiubo Li Signed-off-by: Ilya Dryomov Stable-dep-of: 5bd76b8de5b7 ("ceph: fix NULL pointer dereference for req->r_session") Signed-off-by: Sasha Levin --- fs/ceph/caps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index ce6a858e765a..668be87ffee6 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2335,7 +2335,7 @@ retry: struct ceph_mds_request *req; int i; - sessions = kzalloc(max_sessions * sizeof(s), GFP_KERNEL); + sessions = kcalloc(max_sessions, sizeof(s), GFP_KERNEL); if (!sessions) { err = -ENOMEM; goto out; From ca3a08e9d9ebda0557c3a9d316e768f4d52c2168 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 10 Nov 2022 21:01:59 +0800 Subject: [PATCH 1043/1477] ceph: fix NULL pointer dereference for req->r_session [ Upstream commit 5bd76b8de5b74fa941a6eafee87728a0fe072267 ] The request's r_session maybe changed when it was forwarded or resent. Both the forwarding and resending cases the requests will be protected by the mdsc->mutex. Cc: stable@vger.kernel.org Link: https://bugzilla.redhat.com/show_bug.cgi?id=2137955 Signed-off-by: Xiubo Li Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov Signed-off-by: Sasha Levin --- fs/ceph/caps.c | 48 ++++++++++++------------------------------------ 1 file changed, 12 insertions(+), 36 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 668be87ffee6..51562d36fa83 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2297,7 +2297,6 @@ static int unsafe_request_wait(struct inode *inode) struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_request *req1 = NULL, *req2 = NULL; - unsigned int max_sessions; int ret, err = 0; spin_lock(&ci->i_unsafe_lock); @@ -2315,28 +2314,24 @@ static int unsafe_request_wait(struct inode *inode) } spin_unlock(&ci->i_unsafe_lock); - /* - * The mdsc->max_sessions is unlikely to be changed - * mostly, here we will retry it by reallocating the - * sessions array memory to get rid of the mdsc->mutex - * lock. - */ -retry: - max_sessions = mdsc->max_sessions; - /* * Trigger to flush the journal logs in all the relevant MDSes * manually, or in the worst case we must wait at most 5 seconds * to wait the journal logs to be flushed by the MDSes periodically. */ - if ((req1 || req2) && likely(max_sessions)) { - struct ceph_mds_session **sessions = NULL; - struct ceph_mds_session *s; + if (req1 || req2) { struct ceph_mds_request *req; + struct ceph_mds_session **sessions; + struct ceph_mds_session *s; + unsigned int max_sessions; int i; + mutex_lock(&mdsc->mutex); + max_sessions = mdsc->max_sessions; + sessions = kcalloc(max_sessions, sizeof(s), GFP_KERNEL); if (!sessions) { + mutex_unlock(&mdsc->mutex); err = -ENOMEM; goto out; } @@ -2348,16 +2343,6 @@ retry: s = req->r_session; if (!s) continue; - if (unlikely(s->s_mds >= max_sessions)) { - spin_unlock(&ci->i_unsafe_lock); - for (i = 0; i < max_sessions; i++) { - s = sessions[i]; - if (s) - ceph_put_mds_session(s); - } - kfree(sessions); - goto retry; - } if (!sessions[s->s_mds]) { s = ceph_get_mds_session(s); sessions[s->s_mds] = s; @@ -2370,16 +2355,6 @@ retry: s = req->r_session; if (!s) continue; - if (unlikely(s->s_mds >= max_sessions)) { - spin_unlock(&ci->i_unsafe_lock); - for (i = 0; i < max_sessions; i++) { - s = sessions[i]; - if (s) - ceph_put_mds_session(s); - } - kfree(sessions); - goto retry; - } if (!sessions[s->s_mds]) { s = ceph_get_mds_session(s); sessions[s->s_mds] = s; @@ -2391,11 +2366,12 @@ retry: /* the auth MDS */ spin_lock(&ci->i_ceph_lock); if (ci->i_auth_cap) { - s = ci->i_auth_cap->session; - if (!sessions[s->s_mds]) - sessions[s->s_mds] = ceph_get_mds_session(s); + s = ci->i_auth_cap->session; + if (!sessions[s->s_mds]) + sessions[s->s_mds] = ceph_get_mds_session(s); } spin_unlock(&ci->i_ceph_lock); + mutex_unlock(&mdsc->mutex); /* send flush mdlog request to MDSes */ for (i = 0; i < max_sessions; i++) { From a32635528d6552cb729968af2db09e64c4ac6f24 Mon Sep 17 00:00:00 2001 From: Michael Grzeschik Date: Wed, 20 Jul 2022 23:35:23 +0200 Subject: [PATCH 1044/1477] usb: dwc3: gadget: conditionally remove requests [ Upstream commit b44c0e7fef51ee7e8ca8c6efbf706f5613787100 ] The functions stop_active_transfers and ep_disable are both calling remove_requests. This functions in both cases will giveback the requests with status ESHUTDOWN, which also represents an physical disconnection. For ep_disable this is not true. This patch adds the status parameter to remove_requests and sets the status to ECONNRESET on ep_disable. Signed-off-by: Michael Grzeschik Link: https://lore.kernel.org/r/20220720213523.1055897-1-m.grzeschik@pengutronix.de Signed-off-by: Greg Kroah-Hartman Stable-dep-of: f90f5afd5083 ("usb: dwc3: gadget: Clear ep descriptor last") Signed-off-by: Sasha Levin --- drivers/usb/dwc3/gadget.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 347ba7e4bd81..c753d889ae1c 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -752,7 +752,7 @@ out: return 0; } -static void dwc3_remove_requests(struct dwc3 *dwc, struct dwc3_ep *dep) +static void dwc3_remove_requests(struct dwc3 *dwc, struct dwc3_ep *dep, int status) { struct dwc3_request *req; @@ -762,19 +762,19 @@ static void dwc3_remove_requests(struct dwc3 *dwc, struct dwc3_ep *dep) while (!list_empty(&dep->started_list)) { req = next_request(&dep->started_list); - dwc3_gadget_giveback(dep, req, -ESHUTDOWN); + dwc3_gadget_giveback(dep, req, status); } while (!list_empty(&dep->pending_list)) { req = next_request(&dep->pending_list); - dwc3_gadget_giveback(dep, req, -ESHUTDOWN); + dwc3_gadget_giveback(dep, req, status); } while (!list_empty(&dep->cancelled_list)) { req = next_request(&dep->cancelled_list); - dwc3_gadget_giveback(dep, req, -ESHUTDOWN); + dwc3_gadget_giveback(dep, req, status); } } @@ -809,7 +809,7 @@ static int __dwc3_gadget_ep_disable(struct dwc3_ep *dep) dep->endpoint.desc = NULL; } - dwc3_remove_requests(dwc, dep); + dwc3_remove_requests(dwc, dep, -ECONNRESET); dep->stream_capable = false; dep->type = 0; @@ -2067,7 +2067,7 @@ static void dwc3_stop_active_transfers(struct dwc3 *dwc) if (!dep) continue; - dwc3_remove_requests(dwc, dep); + dwc3_remove_requests(dwc, dep, -ESHUTDOWN); } } From cff7523ab8b87e958c31a52ac7ac31112de269c8 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Mon, 7 Nov 2022 18:45:44 -0800 Subject: [PATCH 1045/1477] usb: dwc3: gadget: Return -ESHUTDOWN on ep disable [ Upstream commit ffb9da4a04c69567bad717707b6fdfbc4c216ef4 ] The usb_request API clearly noted that removed requests due to disabled endpoint should have -ESHUTDOWN status returned. Don't change this behavior. Fixes: b44c0e7fef51 ("usb: dwc3: gadget: conditionally remove requests") Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/3421859485cb32d77e2068549679a6c07a7797bc.1667875427.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman Stable-dep-of: f90f5afd5083 ("usb: dwc3: gadget: Clear ep descriptor last") Signed-off-by: Sasha Levin --- drivers/usb/dwc3/gadget.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index c753d889ae1c..2b4e1c0d02d5 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -809,7 +809,7 @@ static int __dwc3_gadget_ep_disable(struct dwc3_ep *dep) dep->endpoint.desc = NULL; } - dwc3_remove_requests(dwc, dep, -ECONNRESET); + dwc3_remove_requests(dwc, dep, -ESHUTDOWN); dep->stream_capable = false; dep->type = 0; From f06b7e6a77c177d30fe3e2d64ccc02daf59b51f8 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Tue, 15 Nov 2022 17:19:43 -0800 Subject: [PATCH 1046/1477] usb: dwc3: gadget: Clear ep descriptor last [ Upstream commit f90f5afd5083a7cb4aee13bd4cc0ae600bd381ca ] Until the endpoint is disabled, its descriptors should remain valid. When its requests are removed from ep disable, the request completion routine may attempt to access the endpoint's descriptor. Don't clear the descriptors before that. Fixes: f09ddcfcb8c5 ("usb: dwc3: gadget: Prevent EP queuing while stopping transfers") Cc: stable@vger.kernel.org Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/45db7c83b209259115bf652af210f8b2b3b1a383.1668561364.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/dwc3/gadget.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 2b4e1c0d02d5..a9a43d649478 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -803,18 +803,18 @@ static int __dwc3_gadget_ep_disable(struct dwc3_ep *dep) reg &= ~DWC3_DALEPENA_EP(dep->number); dwc3_writel(dwc->regs, DWC3_DALEPENA, reg); - /* Clear out the ep descriptors for non-ep0 */ - if (dep->number > 1) { - dep->endpoint.comp_desc = NULL; - dep->endpoint.desc = NULL; - } - dwc3_remove_requests(dwc, dep, -ESHUTDOWN); dep->stream_capable = false; dep->type = 0; dep->flags = 0; + /* Clear out the ep descriptors for non-ep0 */ + if (dep->number > 1) { + dep->endpoint.comp_desc = NULL; + dep->endpoint.desc = NULL; + } + return 0; } From e7f21d10e93e9fb3a724933e7a3cb4bb1b6f03a4 Mon Sep 17 00:00:00 2001 From: Chen Zhongjin Date: Fri, 18 Nov 2022 14:33:04 +0800 Subject: [PATCH 1047/1477] nilfs2: fix nilfs_sufile_mark_dirty() not set segment usage as dirty commit 512c5ca01a3610ab14ff6309db363de51f1c13a6 upstream. When extending segments, nilfs_sufile_alloc() is called to get an unassigned segment, then mark it as dirty to avoid accidentally allocating the same segment in the future. But for some special cases such as a corrupted image it can be unreliable. If such corruption of the dirty state of the segment occurs, nilfs2 may reallocate a segment that is in use and pick the same segment for writing twice at the same time. This will cause the problem reported by syzkaller: https://syzkaller.appspot.com/bug?id=c7c4748e11ffcc367cef04f76e02e931833cbd24 This case started with segbuf1.segnum = 3, nextnum = 4 when constructed. It supposed segment 4 has already been allocated and marked as dirty. However the dirty state was corrupted and segment 4 usage was not dirty. For the first time nilfs_segctor_extend_segments() segment 4 was allocated again, which made segbuf2 and next segbuf3 had same segment 4. sb_getblk() will get same bh for segbuf2 and segbuf3, and this bh is added to both buffer lists of two segbuf. It makes the lists broken which causes NULL pointer dereference. Fix the problem by setting usage as dirty every time in nilfs_sufile_mark_dirty(), which is called during constructing current segment to be written out and before allocating next segment. [chenzhongjin@huawei.com: add lock protection per Ryusuke] Link: https://lkml.kernel.org/r/20221121091141.214703-1-chenzhongjin@huawei.com Link: https://lkml.kernel.org/r/20221118063304.140187-1-chenzhongjin@huawei.com Fixes: 9ff05123e3bf ("nilfs2: segment constructor") Signed-off-by: Chen Zhongjin Reported-by: Reported-by: Liu Shixin Acked-by: Ryusuke Konishi Tested-by: Ryusuke Konishi Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/nilfs2/sufile.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index 63722475e17e..51f4cb060231 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -495,14 +495,22 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum, int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum) { struct buffer_head *bh; + void *kaddr; + struct nilfs_segment_usage *su; int ret; + down_write(&NILFS_MDT(sufile)->mi_sem); ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh); if (!ret) { mark_buffer_dirty(bh); nilfs_mdt_mark_dirty(sufile); + kaddr = kmap_atomic(bh->b_page); + su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr); + nilfs_segment_usage_set_dirty(su); + kunmap_atomic(kaddr); brelse(bh); } + up_write(&NILFS_MDT(sufile)->mi_sem); return ret; } From a4a62a23fadc0f79d867dcf4d9dc96d80ae04c18 Mon Sep 17 00:00:00 2001 From: Mukesh Ojha Date: Thu, 10 Nov 2022 00:31:37 +0530 Subject: [PATCH 1048/1477] gcov: clang: fix the buffer overflow issue commit a6f810efabfd789d3bbafeacb4502958ec56c5ce upstream. Currently, in clang version of gcov code when module is getting removed gcov_info_add() incorrectly adds the sfn_ptr->counter to all the dst->functions and it result in the kernel panic in below crash report. Fix this by properly handling it. [ 8.899094][ T599] Unable to handle kernel write to read-only memory at virtual address ffffff80461cc000 [ 8.899100][ T599] Mem abort info: [ 8.899102][ T599] ESR = 0x9600004f [ 8.899103][ T599] EC = 0x25: DABT (current EL), IL = 32 bits [ 8.899105][ T599] SET = 0, FnV = 0 [ 8.899107][ T599] EA = 0, S1PTW = 0 [ 8.899108][ T599] FSC = 0x0f: level 3 permission fault [ 8.899110][ T599] Data abort info: [ 8.899111][ T599] ISV = 0, ISS = 0x0000004f [ 8.899113][ T599] CM = 0, WnR = 1 [ 8.899114][ T599] swapper pgtable: 4k pages, 39-bit VAs, pgdp=00000000ab8de000 [ 8.899116][ T599] [ffffff80461cc000] pgd=18000009ffcde003, p4d=18000009ffcde003, pud=18000009ffcde003, pmd=18000009ffcad003, pte=00600000c61cc787 [ 8.899124][ T599] Internal error: Oops: 9600004f [#1] PREEMPT SMP [ 8.899265][ T599] Skip md ftrace buffer dump for: 0x1609e0 .... .., [ 8.899544][ T599] CPU: 7 PID: 599 Comm: modprobe Tainted: G S OE 5.15.41-android13-8-g38e9b1af6bce #1 [ 8.899547][ T599] Hardware name: XXX (DT) [ 8.899549][ T599] pstate: 82400005 (Nzcv daif +PAN -UAO +TCO -DIT -SSBS BTYPE=--) [ 8.899551][ T599] pc : gcov_info_add+0x9c/0xb8 [ 8.899557][ T599] lr : gcov_event+0x28c/0x6b8 [ 8.899559][ T599] sp : ffffffc00e733b00 [ 8.899560][ T599] x29: ffffffc00e733b00 x28: ffffffc00e733d30 x27: ffffffe8dc297470 [ 8.899563][ T599] x26: ffffffe8dc297000 x25: ffffffe8dc297000 x24: ffffffe8dc297000 [ 8.899566][ T599] x23: ffffffe8dc0a6200 x22: ffffff880f68bf20 x21: 0000000000000000 [ 8.899569][ T599] x20: ffffff880f68bf00 x19: ffffff8801babc00 x18: ffffffc00d7f9058 [ 8.899572][ T599] x17: 0000000000088793 x16: ffffff80461cbe00 x15: 9100052952800785 [ 8.899575][ T599] x14: 0000000000000200 x13: 0000000000000041 x12: 9100052952800785 [ 8.899577][ T599] x11: ffffffe8dc297000 x10: ffffffe8dc297000 x9 : ffffff80461cbc80 [ 8.899580][ T599] x8 : ffffff8801babe80 x7 : ffffffe8dc2ec000 x6 : ffffffe8dc2ed000 [ 8.899583][ T599] x5 : 000000008020001f x4 : fffffffe2006eae0 x3 : 000000008020001f [ 8.899586][ T599] x2 : ffffff8027c49200 x1 : ffffff8801babc20 x0 : ffffff80461cb3a0 [ 8.899589][ T599] Call trace: [ 8.899590][ T599] gcov_info_add+0x9c/0xb8 [ 8.899592][ T599] gcov_module_notifier+0xbc/0x120 [ 8.899595][ T599] blocking_notifier_call_chain+0xa0/0x11c [ 8.899598][ T599] do_init_module+0x2a8/0x33c [ 8.899600][ T599] load_module+0x23cc/0x261c [ 8.899602][ T599] __arm64_sys_finit_module+0x158/0x194 [ 8.899604][ T599] invoke_syscall+0x94/0x2bc [ 8.899607][ T599] el0_svc_common+0x1d8/0x34c [ 8.899609][ T599] do_el0_svc+0x40/0x54 [ 8.899611][ T599] el0_svc+0x94/0x2f0 [ 8.899613][ T599] el0t_64_sync_handler+0x88/0xec [ 8.899615][ T599] el0t_64_sync+0x1b4/0x1b8 [ 8.899618][ T599] Code: f905f56c f86e69ec f86e6a0f 8b0c01ec (f82e6a0c) [ 8.899620][ T599] ---[ end trace ed5218e9e5b6e2e6 ]--- Link: https://lkml.kernel.org/r/1668020497-13142-1-git-send-email-quic_mojha@quicinc.com Fixes: e178a5beb369 ("gcov: clang support") Signed-off-by: Mukesh Ojha Reviewed-by: Peter Oberparleiter Tested-by: Peter Oberparleiter Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Tom Rix Cc: [5.2+] Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- kernel/gcov/clang.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/gcov/clang.c b/kernel/gcov/clang.c index c466c7fbdece..ea6b45d0fa0d 100644 --- a/kernel/gcov/clang.c +++ b/kernel/gcov/clang.c @@ -327,6 +327,8 @@ void gcov_info_add(struct gcov_info *dst, struct gcov_info *src) for (i = 0; i < sfn_ptr->num_counters; i++) dfn_ptr->counters[i] += sfn_ptr->counters[i]; + + sfn_ptr = list_next_entry(sfn_ptr, head); } } From d925dd3e444cb7f0fab0208fed82673fd61f9765 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Tue, 2 Aug 2022 12:28:11 -0400 Subject: [PATCH 1049/1477] mm: vmscan: fix extreme overreclaim and swap floods commit f53af4285d775cd9a9a146fc438bd0a1bee1838a upstream. During proactive reclaim, we sometimes observe severe overreclaim, with several thousand times more pages reclaimed than requested. This trace was obtained from shrink_lruvec() during such an instance: prio:0 anon_cost:1141521 file_cost:7767 nr_reclaimed:4387406 nr_to_reclaim:1047 (or_factor:4190) nr=[7161123 345 578 1111] While he reclaimer requested 4M, vmscan reclaimed close to 16G, most of it by swapping. These requests take over a minute, during which the write() to memory.reclaim is unkillably stuck inside the kernel. Digging into the source, this is caused by the proportional reclaim bailout logic. This code tries to resolve a fundamental conflict: to reclaim roughly what was requested, while also aging all LRUs fairly and in accordance to their size, swappiness, refault rates etc. The way it attempts fairness is that once the reclaim goal has been reached, it stops scanning the LRUs with the smaller remaining scan targets, and adjusts the remainder of the bigger LRUs according to how much of the smaller LRUs was scanned. It then finishes scanning that remainder regardless of the reclaim goal. This works fine if priority levels are low and the LRU lists are comparable in size. However, in this instance, the cgroup that is targeted by proactive reclaim has almost no files left - they've already been squeezed out by proactive reclaim earlier - and the remaining anon pages are hot. Anon rotations cause the priority level to drop to 0, which results in reclaim targeting all of anon (a lot) and all of file (almost nothing). By the time reclaim decides to bail, it has scanned most or all of the file target, and therefor must also scan most or all of the enormous anon target. This target is thousands of times larger than the reclaim goal, thus causing the overreclaim. The bailout code hasn't changed in years, why is this failing now? The most likely explanations are two other recent changes in anon reclaim: 1. Before the series starting with commit 5df741963d52 ("mm: fix LRU balancing effect of new transparent huge pages"), the VM was overall relatively reluctant to swap at all, even if swap was configured. This means the LRU balancing code didn't come into play as often as it does now, and mostly in high pressure situations where pronounced swap activity wouldn't be as surprising. 2. For historic reasons, shrink_lruvec() loops on the scan targets of all LRU lists except the active anon one, meaning it would bail if the only remaining pages to scan were active anon - even if there were a lot of them. Before the series starting with commit ccc5dc67340c ("mm/vmscan: make active/inactive ratio as 1:1 for anon lru"), most anon pages would live on the active LRU; the inactive one would contain only a handful of preselected reclaim candidates. After the series, anon gets aged similarly to file, and the inactive list is the default for new anon pages as well, making it often the much bigger list. As a result, the VM is now more likely to actually finish large anon targets than before. Change the code such that only one SWAP_CLUSTER_MAX-sized nudge toward the larger LRU lists is made before bailing out on a met reclaim goal. This fixes the extreme overreclaim problem. Fairness is more subtle and harder to evaluate. No obvious misbehavior was observed on the test workload, in any case. Conceptually, fairness should primarily be a cumulative effect from regular, lower priority scans. Once the VM is in trouble and needs to escalate scan targets to make forward progress, fairness needs to take a backseat. This is also acknowledged by the myriad exceptions in get_scan_count(). This patch makes fairness decrease gradually, as it keeps fairness work static over increasing priority levels with growing scan targets. This should make more sense - although we may have to re-visit the exact values. Link: https://lkml.kernel.org/r/20220802162811.39216-1-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Reviewed-by: Rik van Riel Acked-by: Mel Gorman Cc: Hugh Dickins Cc: Joonsoo Kim Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/vmscan.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index f2817e80a1ab..51ccd80e70b6 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2439,8 +2439,8 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) enum lru_list lru; unsigned long nr_reclaimed = 0; unsigned long nr_to_reclaim = sc->nr_to_reclaim; + bool proportional_reclaim; struct blk_plug plug; - bool scan_adjusted; get_scan_count(lruvec, sc, nr); @@ -2458,8 +2458,8 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) * abort proportional reclaim if either the file or anon lru has already * dropped to zero at the first pass. */ - scan_adjusted = (!cgroup_reclaim(sc) && !current_is_kswapd() && - sc->priority == DEF_PRIORITY); + proportional_reclaim = (!cgroup_reclaim(sc) && !current_is_kswapd() && + sc->priority == DEF_PRIORITY); blk_start_plug(&plug); while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || @@ -2479,7 +2479,7 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) cond_resched(); - if (nr_reclaimed < nr_to_reclaim || scan_adjusted) + if (nr_reclaimed < nr_to_reclaim || proportional_reclaim) continue; /* @@ -2530,8 +2530,6 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) nr_scanned = targets[lru] - nr[lru]; nr[lru] = targets[lru] * (100 - percentage) / 100; nr[lru] -= min(nr[lru], nr_scanned); - - scan_adjusted = true; } blk_finish_plug(&plug); sc->nr_reclaimed += nr_reclaimed; From 7e5cb13091e62b2ad2fe1d3a48753c7219a3e9f9 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Thu, 3 Nov 2022 16:13:43 +0200 Subject: [PATCH 1050/1477] KVM: x86: nSVM: leave nested mode on vCPU free commit 917401f26a6af5756d89b550a8e1bd50cf42b07e upstream. If the VM was terminated while nested, we free the nested state while the vCPU still is in nested mode. Soon a warning will be added for this condition. Cc: stable@vger.kernel.org Signed-off-by: Maxim Levitsky Message-Id: <20221103141351.50662-2-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm/svm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 2b7528821577..8f78cd8bfe92 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1357,6 +1357,7 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) */ svm_clear_current_vmcb(svm->vmcb); + svm_leave_nested(vcpu); svm_free_nested(svm); __free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT)); From 3fdeacf087ff92c85162b8a0e111dfe2479238ac Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Thu, 3 Nov 2022 16:13:51 +0200 Subject: [PATCH 1051/1477] KVM: x86: remove exit_int_info warning in svm_handle_exit commit 05311ce954aebe75935d9ae7d38ac82b5b796e33 upstream. It is valid to receive external interrupt and have broken IDT entry, which will lead to #GP with exit_int_into that will contain the index of the IDT entry (e.g any value). Other exceptions can happen as well, like #NP or #SS (if stack switch fails). Thus this warning can be user triggred and has very little value. Cc: stable@vger.kernel.org Signed-off-by: Maxim Levitsky Message-Id: <20221103141351.50662-10-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm/svm.c | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 8f78cd8bfe92..c34ba034ca11 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -305,12 +305,6 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) return 0; } -static int is_external_interrupt(u32 info) -{ - info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID; - return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR); -} - static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -3116,15 +3110,6 @@ static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) return 0; } - if (is_external_interrupt(svm->vmcb->control.exit_int_info) && - exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR && - exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH && - exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI) - printk(KERN_ERR "%s: unexpected exit_int_info 0x%x " - "exit_code 0x%x\n", - __func__, svm->vmcb->control.exit_int_info, - exit_code); - if (exit_fastpath != EXIT_FASTPATH_NONE) return 1; From 22870431cd250df3eec96753d9422bfc9f0d52a4 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Wed, 16 Nov 2022 10:41:24 -0800 Subject: [PATCH 1052/1477] x86/ioremap: Fix page aligned size calculation in __ioremap_caller() commit 4dbd6a3e90e03130973688fd79e19425f720d999 upstream. Current code re-calculates the size after aligning the starting and ending physical addresses on a page boundary. But the re-calculation also embeds the masking of high order bits that exceed the size of the physical address space (via PHYSICAL_PAGE_MASK). If the masking removes any high order bits, the size calculation results in a huge value that is likely to immediately fail. Fix this by re-calculating the page-aligned size first. Then mask any high order bits using PHYSICAL_PAGE_MASK. Fixes: ffa71f33a820 ("x86, ioremap: Fix incorrect physical address handling in PAE mode") Signed-off-by: Michael Kelley Signed-off-by: Borislav Petkov Acked-by: Dave Hansen Cc: Link: https://lore.kernel.org/r/1668624097-14884-2-git-send-email-mikelley@microsoft.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/ioremap.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 91e61dbba3e0..88cb537ccdea 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -216,9 +216,15 @@ __ioremap_caller(resource_size_t phys_addr, unsigned long size, * Mappings have to be page-aligned */ offset = phys_addr & ~PAGE_MASK; - phys_addr &= PHYSICAL_PAGE_MASK; + phys_addr &= PAGE_MASK; size = PAGE_ALIGN(last_addr+1) - phys_addr; + /* + * Mask out any bits not part of the actual physical + * address, like memory encryption bits. + */ + phys_addr &= PHYSICAL_PAGE_MASK; + retval = memtype_reserve(phys_addr, (u64)phys_addr + size, pcm, &new_pcm); if (retval) { From 23e9d815fad84c1bee3742a8de4bd39510435362 Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Wed, 30 Nov 2022 03:58:00 +0000 Subject: [PATCH 1053/1477] binder: avoid potential data leakage when copying txn commit 6d98eb95b450a75adb4516a1d33652dc78d2b20c upstream. Transactions are copied from the sender to the target first and objects like BINDER_TYPE_PTR and BINDER_TYPE_FDA are then fixed up. This means there is a short period where the sender's version of these objects are visible to the target prior to the fixups. Instead of copying all of the data first, copy data only after any needed fixups have been applied. Fixes: 457b9a6f09f0 ("Staging: android: add binder driver") Reviewed-by: Martijn Coenen Acked-by: Christian Brauner Signed-off-by: Todd Kjos Link: https://lore.kernel.org/r/20211130185152.437403-3-tkjos@google.com Signed-off-by: Greg Kroah-Hartman [cmllamas: fix trivial merge conflict] Signed-off-by: Carlos Llamas Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 94 ++++++++++++++++++++++++++++++---------- 1 file changed, 70 insertions(+), 24 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index cfb1393a0891..58ab76b7a787 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2008,15 +2008,21 @@ static void binder_cleanup_transaction(struct binder_transaction *t, /** * binder_get_object() - gets object and checks for valid metadata * @proc: binder_proc owning the buffer + * @u: sender's user pointer to base of buffer * @buffer: binder_buffer that we're parsing. * @offset: offset in the @buffer at which to validate an object. * @object: struct binder_object to read into * - * Return: If there's a valid metadata object at @offset in @buffer, the + * Copy the binder object at the given offset into @object. If @u is + * provided then the copy is from the sender's buffer. If not, then + * it is copied from the target's @buffer. + * + * Return: If there's a valid metadata object at @offset, the * size of that object. Otherwise, it returns zero. The object * is read into the struct binder_object pointed to by @object. */ static size_t binder_get_object(struct binder_proc *proc, + const void __user *u, struct binder_buffer *buffer, unsigned long offset, struct binder_object *object) @@ -2026,10 +2032,16 @@ static size_t binder_get_object(struct binder_proc *proc, size_t object_size = 0; read_size = min_t(size_t, sizeof(*object), buffer->data_size - offset); - if (offset > buffer->data_size || read_size < sizeof(*hdr) || - binder_alloc_copy_from_buffer(&proc->alloc, object, buffer, - offset, read_size)) + if (offset > buffer->data_size || read_size < sizeof(*hdr)) return 0; + if (u) { + if (copy_from_user(object, u + offset, read_size)) + return 0; + } else { + if (binder_alloc_copy_from_buffer(&proc->alloc, object, buffer, + offset, read_size)) + return 0; + } /* Ok, now see if we read a complete object. */ hdr = &object->hdr; @@ -2102,7 +2114,7 @@ static struct binder_buffer_object *binder_validate_ptr( b, buffer_offset, sizeof(object_offset))) return NULL; - object_size = binder_get_object(proc, b, object_offset, object); + object_size = binder_get_object(proc, NULL, b, object_offset, object); if (!object_size || object->hdr.type != BINDER_TYPE_PTR) return NULL; if (object_offsetp) @@ -2167,7 +2179,8 @@ static bool binder_validate_fixup(struct binder_proc *proc, unsigned long buffer_offset; struct binder_object last_object; struct binder_buffer_object *last_bbo; - size_t object_size = binder_get_object(proc, b, last_obj_offset, + size_t object_size = binder_get_object(proc, NULL, b, + last_obj_offset, &last_object); if (object_size != sizeof(*last_bbo)) return false; @@ -2282,7 +2295,7 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, if (!binder_alloc_copy_from_buffer(&proc->alloc, &object_offset, buffer, buffer_offset, sizeof(object_offset))) - object_size = binder_get_object(proc, buffer, + object_size = binder_get_object(proc, NULL, buffer, object_offset, &object); if (object_size == 0) { pr_err("transaction release %d bad object at offset %lld, size %zd\n", @@ -2848,6 +2861,7 @@ static void binder_transaction(struct binder_proc *proc, binder_size_t off_start_offset, off_end_offset; binder_size_t off_min; binder_size_t sg_buf_offset, sg_buf_end_offset; + binder_size_t user_offset = 0; struct binder_proc *target_proc = NULL; struct binder_thread *target_thread = NULL; struct binder_node *target_node = NULL; @@ -2862,6 +2876,8 @@ static void binder_transaction(struct binder_proc *proc, int t_debug_id = atomic_inc_return(&binder_last_id); char *secctx = NULL; u32 secctx_sz = 0; + const void __user *user_buffer = (const void __user *) + (uintptr_t)tr->data.ptr.buffer; e = binder_transaction_log_add(&binder_transaction_log); e->debug_id = t_debug_id; @@ -3173,19 +3189,6 @@ static void binder_transaction(struct binder_proc *proc, t->buffer->clear_on_free = !!(t->flags & TF_CLEAR_BUF); trace_binder_transaction_alloc_buf(t->buffer); - if (binder_alloc_copy_user_to_buffer( - &target_proc->alloc, - t->buffer, 0, - (const void __user *) - (uintptr_t)tr->data.ptr.buffer, - tr->data_size)) { - binder_user_error("%d:%d got transaction with invalid data ptr\n", - proc->pid, thread->pid); - return_error = BR_FAILED_REPLY; - return_error_param = -EFAULT; - return_error_line = __LINE__; - goto err_copy_data_failed; - } if (binder_alloc_copy_user_to_buffer( &target_proc->alloc, t->buffer, @@ -3230,6 +3233,7 @@ static void binder_transaction(struct binder_proc *proc, size_t object_size; struct binder_object object; binder_size_t object_offset; + binder_size_t copy_size; if (binder_alloc_copy_from_buffer(&target_proc->alloc, &object_offset, @@ -3241,8 +3245,27 @@ static void binder_transaction(struct binder_proc *proc, return_error_line = __LINE__; goto err_bad_offset; } - object_size = binder_get_object(target_proc, t->buffer, - object_offset, &object); + + /* + * Copy the source user buffer up to the next object + * that will be processed. + */ + copy_size = object_offset - user_offset; + if (copy_size && (user_offset > object_offset || + binder_alloc_copy_user_to_buffer( + &target_proc->alloc, + t->buffer, user_offset, + user_buffer + user_offset, + copy_size))) { + binder_user_error("%d:%d got transaction with invalid data ptr\n", + proc->pid, thread->pid); + return_error = BR_FAILED_REPLY; + return_error_param = -EFAULT; + return_error_line = __LINE__; + goto err_copy_data_failed; + } + object_size = binder_get_object(target_proc, user_buffer, + t->buffer, object_offset, &object); if (object_size == 0 || object_offset < off_min) { binder_user_error("%d:%d got transaction with invalid offset (%lld, min %lld max %lld) or object.\n", proc->pid, thread->pid, @@ -3254,6 +3277,11 @@ static void binder_transaction(struct binder_proc *proc, return_error_line = __LINE__; goto err_bad_offset; } + /* + * Set offset to the next buffer fragment to be + * copied + */ + user_offset = object_offset + object_size; hdr = &object.hdr; off_min = object_offset + object_size; @@ -3349,9 +3377,14 @@ static void binder_transaction(struct binder_proc *proc, } ret = binder_translate_fd_array(fda, parent, t, thread, in_reply_to); - if (ret < 0) { + if (!ret) + ret = binder_alloc_copy_to_buffer(&target_proc->alloc, + t->buffer, + object_offset, + fda, sizeof(*fda)); + if (ret) { return_error = BR_FAILED_REPLY; - return_error_param = ret; + return_error_param = ret > 0 ? -EINVAL : ret; return_error_line = __LINE__; goto err_translate_failed; } @@ -3421,6 +3454,19 @@ static void binder_transaction(struct binder_proc *proc, goto err_bad_object_type; } } + /* Done processing objects, copy the rest of the buffer */ + if (binder_alloc_copy_user_to_buffer( + &target_proc->alloc, + t->buffer, user_offset, + user_buffer + user_offset, + tr->data_size - user_offset)) { + binder_user_error("%d:%d got transaction with invalid data ptr\n", + proc->pid, thread->pid); + return_error = BR_FAILED_REPLY; + return_error_param = -EFAULT; + return_error_line = __LINE__; + goto err_copy_data_failed; + } tcomplete->type = BINDER_WORK_TRANSACTION_COMPLETE; t->work.type = BINDER_WORK_TRANSACTION; From 5204296fc76623552d53f042e2dc411b49c151f2 Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Wed, 30 Nov 2022 03:58:01 +0000 Subject: [PATCH 1054/1477] binder: read pre-translated fds from sender buffer commit 656e01f3ab54afe71bed066996fc2640881e1220 upstream. This patch is to prepare for an up coming patch where we read pre-translated fds from the sender buffer and translate them before copying them to the target. It does not change run time. The patch adds two new parameters to binder_translate_fd_array() to hold the sender buffer and sender buffer parent. These parameters let us call copy_from_user() directly from the sender instead of using binder_alloc_copy_from_buffer() to copy from the target. Also the patch adds some new alignment checks. Previously the alignment checks would have been done in a different place, but this lets us print more useful error messages. Reviewed-by: Martijn Coenen Acked-by: Christian Brauner Signed-off-by: Todd Kjos Link: https://lore.kernel.org/r/20211130185152.437403-4-tkjos@google.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Carlos Llamas Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 39 ++++++++++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 58ab76b7a787..4abb621300ec 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2634,15 +2634,17 @@ err_fd_not_accepted: } static int binder_translate_fd_array(struct binder_fd_array_object *fda, + const void __user *sender_ubuffer, struct binder_buffer_object *parent, + struct binder_buffer_object *sender_uparent, struct binder_transaction *t, struct binder_thread *thread, struct binder_transaction *in_reply_to) { binder_size_t fdi, fd_buf_size; binder_size_t fda_offset; + const void __user *sender_ufda_base; struct binder_proc *proc = thread->proc; - struct binder_proc *target_proc = t->to_proc; fd_buf_size = sizeof(u32) * fda->num_fds; if (fda->num_fds >= SIZE_MAX / sizeof(u32)) { @@ -2666,7 +2668,10 @@ static int binder_translate_fd_array(struct binder_fd_array_object *fda, */ fda_offset = (parent->buffer - (uintptr_t)t->buffer->user_data) + fda->parent_offset; - if (!IS_ALIGNED((unsigned long)fda_offset, sizeof(u32))) { + sender_ufda_base = (void __user *)sender_uparent->buffer + fda->parent_offset; + + if (!IS_ALIGNED((unsigned long)fda_offset, sizeof(u32)) || + !IS_ALIGNED((unsigned long)sender_ufda_base, sizeof(u32))) { binder_user_error("%d:%d parent offset not aligned correctly.\n", proc->pid, thread->pid); return -EINVAL; @@ -2675,10 +2680,9 @@ static int binder_translate_fd_array(struct binder_fd_array_object *fda, u32 fd; int ret; binder_size_t offset = fda_offset + fdi * sizeof(fd); + binder_size_t sender_uoffset = fdi * sizeof(fd); - ret = binder_alloc_copy_from_buffer(&target_proc->alloc, - &fd, t->buffer, - offset, sizeof(fd)); + ret = copy_from_user(&fd, sender_ufda_base + sender_uoffset, sizeof(fd)); if (!ret) ret = binder_translate_fd(fd, offset, t, thread, in_reply_to); @@ -3344,6 +3348,8 @@ static void binder_transaction(struct binder_proc *proc, case BINDER_TYPE_FDA: { struct binder_object ptr_object; binder_size_t parent_offset; + struct binder_object user_object; + size_t user_parent_size; struct binder_fd_array_object *fda = to_binder_fd_array_object(hdr); size_t num_valid = (buffer_offset - off_start_offset) / @@ -3375,8 +3381,27 @@ static void binder_transaction(struct binder_proc *proc, return_error_line = __LINE__; goto err_bad_parent; } - ret = binder_translate_fd_array(fda, parent, t, thread, - in_reply_to); + /* + * We need to read the user version of the parent + * object to get the original user offset + */ + user_parent_size = + binder_get_object(proc, user_buffer, t->buffer, + parent_offset, &user_object); + if (user_parent_size != sizeof(user_object.bbo)) { + binder_user_error("%d:%d invalid ptr object size: %zd vs %zd\n", + proc->pid, thread->pid, + user_parent_size, + sizeof(user_object.bbo)); + return_error = BR_FAILED_REPLY; + return_error_param = -EINVAL; + return_error_line = __LINE__; + goto err_bad_parent; + } + ret = binder_translate_fd_array(fda, user_buffer, + parent, + &user_object.bbo, t, + thread, in_reply_to); if (!ret) ret = binder_alloc_copy_to_buffer(&target_proc->alloc, t->buffer, From c9d3f25a7f4e3aab3dfd91885e3d428bccdcb0e1 Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Wed, 30 Nov 2022 03:58:02 +0000 Subject: [PATCH 1055/1477] binder: defer copies of pre-patched txn data commit 09184ae9b5756cc469db6fd1d1cfdcffbf627c2d upstream. BINDER_TYPE_PTR objects point to memory areas in the source process to be copied into the target buffer as part of a transaction. This implements a scatter- gather model where non-contiguous memory in a source process is "gathered" into a contiguous region in the target buffer. The data can include pointers that must be fixed up to correctly point to the copied data. To avoid making source process pointers visible to the target process, this patch defers the copy until the fixups are known and then copies and fixeups are done together. There is a special case of BINDER_TYPE_FDA which applies the fixup later in the target process context. In this case the user data is skipped (so no untranslated fds become visible to the target). Reviewed-by: Martijn Coenen Signed-off-by: Todd Kjos Link: https://lore.kernel.org/r/20211130185152.437403-5-tkjos@google.com Signed-off-by: Greg Kroah-Hartman [cmllamas: fix trivial merge conflict] Signed-off-by: Carlos Llamas Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 299 +++++++++++++++++++++++++++++++++++---- 1 file changed, 274 insertions(+), 25 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 4abb621300ec..83c4501153b4 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2633,7 +2633,246 @@ err_fd_not_accepted: return ret; } -static int binder_translate_fd_array(struct binder_fd_array_object *fda, +/** + * struct binder_ptr_fixup - data to be fixed-up in target buffer + * @offset offset in target buffer to fixup + * @skip_size bytes to skip in copy (fixup will be written later) + * @fixup_data data to write at fixup offset + * @node list node + * + * This is used for the pointer fixup list (pf) which is created and consumed + * during binder_transaction() and is only accessed locally. No + * locking is necessary. + * + * The list is ordered by @offset. + */ +struct binder_ptr_fixup { + binder_size_t offset; + size_t skip_size; + binder_uintptr_t fixup_data; + struct list_head node; +}; + +/** + * struct binder_sg_copy - scatter-gather data to be copied + * @offset offset in target buffer + * @sender_uaddr user address in source buffer + * @length bytes to copy + * @node list node + * + * This is used for the sg copy list (sgc) which is created and consumed + * during binder_transaction() and is only accessed locally. No + * locking is necessary. + * + * The list is ordered by @offset. + */ +struct binder_sg_copy { + binder_size_t offset; + const void __user *sender_uaddr; + size_t length; + struct list_head node; +}; + +/** + * binder_do_deferred_txn_copies() - copy and fixup scatter-gather data + * @alloc: binder_alloc associated with @buffer + * @buffer: binder buffer in target process + * @sgc_head: list_head of scatter-gather copy list + * @pf_head: list_head of pointer fixup list + * + * Processes all elements of @sgc_head, applying fixups from @pf_head + * and copying the scatter-gather data from the source process' user + * buffer to the target's buffer. It is expected that the list creation + * and processing all occurs during binder_transaction() so these lists + * are only accessed in local context. + * + * Return: 0=success, else -errno + */ +static int binder_do_deferred_txn_copies(struct binder_alloc *alloc, + struct binder_buffer *buffer, + struct list_head *sgc_head, + struct list_head *pf_head) +{ + int ret = 0; + struct binder_sg_copy *sgc, *tmpsgc; + struct binder_ptr_fixup *pf = + list_first_entry_or_null(pf_head, struct binder_ptr_fixup, + node); + + list_for_each_entry_safe(sgc, tmpsgc, sgc_head, node) { + size_t bytes_copied = 0; + + while (bytes_copied < sgc->length) { + size_t copy_size; + size_t bytes_left = sgc->length - bytes_copied; + size_t offset = sgc->offset + bytes_copied; + + /* + * We copy up to the fixup (pointed to by pf) + */ + copy_size = pf ? min(bytes_left, (size_t)pf->offset - offset) + : bytes_left; + if (!ret && copy_size) + ret = binder_alloc_copy_user_to_buffer( + alloc, buffer, + offset, + sgc->sender_uaddr + bytes_copied, + copy_size); + bytes_copied += copy_size; + if (copy_size != bytes_left) { + BUG_ON(!pf); + /* we stopped at a fixup offset */ + if (pf->skip_size) { + /* + * we are just skipping. This is for + * BINDER_TYPE_FDA where the translated + * fds will be fixed up when we get + * to target context. + */ + bytes_copied += pf->skip_size; + } else { + /* apply the fixup indicated by pf */ + if (!ret) + ret = binder_alloc_copy_to_buffer( + alloc, buffer, + pf->offset, + &pf->fixup_data, + sizeof(pf->fixup_data)); + bytes_copied += sizeof(pf->fixup_data); + } + list_del(&pf->node); + kfree(pf); + pf = list_first_entry_or_null(pf_head, + struct binder_ptr_fixup, node); + } + } + list_del(&sgc->node); + kfree(sgc); + } + BUG_ON(!list_empty(pf_head)); + BUG_ON(!list_empty(sgc_head)); + + return ret > 0 ? -EINVAL : ret; +} + +/** + * binder_cleanup_deferred_txn_lists() - free specified lists + * @sgc_head: list_head of scatter-gather copy list + * @pf_head: list_head of pointer fixup list + * + * Called to clean up @sgc_head and @pf_head if there is an + * error. + */ +static void binder_cleanup_deferred_txn_lists(struct list_head *sgc_head, + struct list_head *pf_head) +{ + struct binder_sg_copy *sgc, *tmpsgc; + struct binder_ptr_fixup *pf, *tmppf; + + list_for_each_entry_safe(sgc, tmpsgc, sgc_head, node) { + list_del(&sgc->node); + kfree(sgc); + } + list_for_each_entry_safe(pf, tmppf, pf_head, node) { + list_del(&pf->node); + kfree(pf); + } +} + +/** + * binder_defer_copy() - queue a scatter-gather buffer for copy + * @sgc_head: list_head of scatter-gather copy list + * @offset: binder buffer offset in target process + * @sender_uaddr: user address in source process + * @length: bytes to copy + * + * Specify a scatter-gather block to be copied. The actual copy must + * be deferred until all the needed fixups are identified and queued. + * Then the copy and fixups are done together so un-translated values + * from the source are never visible in the target buffer. + * + * We are guaranteed that repeated calls to this function will have + * monotonically increasing @offset values so the list will naturally + * be ordered. + * + * Return: 0=success, else -errno + */ +static int binder_defer_copy(struct list_head *sgc_head, binder_size_t offset, + const void __user *sender_uaddr, size_t length) +{ + struct binder_sg_copy *bc = kzalloc(sizeof(*bc), GFP_KERNEL); + + if (!bc) + return -ENOMEM; + + bc->offset = offset; + bc->sender_uaddr = sender_uaddr; + bc->length = length; + INIT_LIST_HEAD(&bc->node); + + /* + * We are guaranteed that the deferred copies are in-order + * so just add to the tail. + */ + list_add_tail(&bc->node, sgc_head); + + return 0; +} + +/** + * binder_add_fixup() - queue a fixup to be applied to sg copy + * @pf_head: list_head of binder ptr fixup list + * @offset: binder buffer offset in target process + * @fixup: bytes to be copied for fixup + * @skip_size: bytes to skip when copying (fixup will be applied later) + * + * Add the specified fixup to a list ordered by @offset. When copying + * the scatter-gather buffers, the fixup will be copied instead of + * data from the source buffer. For BINDER_TYPE_FDA fixups, the fixup + * will be applied later (in target process context), so we just skip + * the bytes specified by @skip_size. If @skip_size is 0, we copy the + * value in @fixup. + * + * This function is called *mostly* in @offset order, but there are + * exceptions. Since out-of-order inserts are relatively uncommon, + * we insert the new element by searching backward from the tail of + * the list. + * + * Return: 0=success, else -errno + */ +static int binder_add_fixup(struct list_head *pf_head, binder_size_t offset, + binder_uintptr_t fixup, size_t skip_size) +{ + struct binder_ptr_fixup *pf = kzalloc(sizeof(*pf), GFP_KERNEL); + struct binder_ptr_fixup *tmppf; + + if (!pf) + return -ENOMEM; + + pf->offset = offset; + pf->fixup_data = fixup; + pf->skip_size = skip_size; + INIT_LIST_HEAD(&pf->node); + + /* Fixups are *mostly* added in-order, but there are some + * exceptions. Look backwards through list for insertion point. + */ + list_for_each_entry_reverse(tmppf, pf_head, node) { + if (tmppf->offset < pf->offset) { + list_add(&pf->node, &tmppf->node); + return 0; + } + } + /* + * if we get here, then the new offset is the lowest so + * insert at the head + */ + list_add(&pf->node, pf_head); + return 0; +} + +static int binder_translate_fd_array(struct list_head *pf_head, + struct binder_fd_array_object *fda, const void __user *sender_ubuffer, struct binder_buffer_object *parent, struct binder_buffer_object *sender_uparent, @@ -2645,6 +2884,7 @@ static int binder_translate_fd_array(struct binder_fd_array_object *fda, binder_size_t fda_offset; const void __user *sender_ufda_base; struct binder_proc *proc = thread->proc; + int ret; fd_buf_size = sizeof(u32) * fda->num_fds; if (fda->num_fds >= SIZE_MAX / sizeof(u32)) { @@ -2676,9 +2916,12 @@ static int binder_translate_fd_array(struct binder_fd_array_object *fda, proc->pid, thread->pid); return -EINVAL; } + ret = binder_add_fixup(pf_head, fda_offset, 0, fda->num_fds * sizeof(u32)); + if (ret) + return ret; + for (fdi = 0; fdi < fda->num_fds; fdi++) { u32 fd; - int ret; binder_size_t offset = fda_offset + fdi * sizeof(fd); binder_size_t sender_uoffset = fdi * sizeof(fd); @@ -2692,7 +2935,8 @@ static int binder_translate_fd_array(struct binder_fd_array_object *fda, return 0; } -static int binder_fixup_parent(struct binder_transaction *t, +static int binder_fixup_parent(struct list_head *pf_head, + struct binder_transaction *t, struct binder_thread *thread, struct binder_buffer_object *bp, binder_size_t off_start_offset, @@ -2738,14 +2982,7 @@ static int binder_fixup_parent(struct binder_transaction *t, } buffer_offset = bp->parent_offset + (uintptr_t)parent->buffer - (uintptr_t)b->user_data; - if (binder_alloc_copy_to_buffer(&target_proc->alloc, b, buffer_offset, - &bp->buffer, sizeof(bp->buffer))) { - binder_user_error("%d:%d got transaction with invalid parent offset\n", - proc->pid, thread->pid); - return -EINVAL; - } - - return 0; + return binder_add_fixup(pf_head, buffer_offset, bp->buffer, 0); } /** @@ -2880,8 +3117,12 @@ static void binder_transaction(struct binder_proc *proc, int t_debug_id = atomic_inc_return(&binder_last_id); char *secctx = NULL; u32 secctx_sz = 0; + struct list_head sgc_head; + struct list_head pf_head; const void __user *user_buffer = (const void __user *) (uintptr_t)tr->data.ptr.buffer; + INIT_LIST_HEAD(&sgc_head); + INIT_LIST_HEAD(&pf_head); e = binder_transaction_log_add(&binder_transaction_log); e->debug_id = t_debug_id; @@ -3398,8 +3639,8 @@ static void binder_transaction(struct binder_proc *proc, return_error_line = __LINE__; goto err_bad_parent; } - ret = binder_translate_fd_array(fda, user_buffer, - parent, + ret = binder_translate_fd_array(&pf_head, fda, + user_buffer, parent, &user_object.bbo, t, thread, in_reply_to); if (!ret) @@ -3431,19 +3672,14 @@ static void binder_transaction(struct binder_proc *proc, return_error_line = __LINE__; goto err_bad_offset; } - if (binder_alloc_copy_user_to_buffer( - &target_proc->alloc, - t->buffer, - sg_buf_offset, - (const void __user *) - (uintptr_t)bp->buffer, - bp->length)) { - binder_user_error("%d:%d got transaction with invalid offsets ptr\n", - proc->pid, thread->pid); - return_error_param = -EFAULT; + ret = binder_defer_copy(&sgc_head, sg_buf_offset, + (const void __user *)(uintptr_t)bp->buffer, + bp->length); + if (ret) { return_error = BR_FAILED_REPLY; + return_error_param = ret; return_error_line = __LINE__; - goto err_copy_data_failed; + goto err_translate_failed; } /* Fixup buffer pointer to target proc address space */ bp->buffer = (uintptr_t) @@ -3452,7 +3688,8 @@ static void binder_transaction(struct binder_proc *proc, num_valid = (buffer_offset - off_start_offset) / sizeof(binder_size_t); - ret = binder_fixup_parent(t, thread, bp, + ret = binder_fixup_parent(&pf_head, t, + thread, bp, off_start_offset, num_valid, last_fixup_obj_off, @@ -3492,6 +3729,17 @@ static void binder_transaction(struct binder_proc *proc, return_error_line = __LINE__; goto err_copy_data_failed; } + + ret = binder_do_deferred_txn_copies(&target_proc->alloc, t->buffer, + &sgc_head, &pf_head); + if (ret) { + binder_user_error("%d:%d got transaction with invalid offsets ptr\n", + proc->pid, thread->pid); + return_error = BR_FAILED_REPLY; + return_error_param = ret; + return_error_line = __LINE__; + goto err_copy_data_failed; + } tcomplete->type = BINDER_WORK_TRANSACTION_COMPLETE; t->work.type = BINDER_WORK_TRANSACTION; @@ -3558,6 +3806,7 @@ err_bad_object_type: err_bad_offset: err_bad_parent: err_copy_data_failed: + binder_cleanup_deferred_txn_lists(&sgc_head, &pf_head); binder_free_txn_fixups(t); trace_binder_transaction_failed_buffer_release(t->buffer); binder_transaction_buffer_release(target_proc, NULL, t->buffer, From 2e3c27f24173c6f3d799080da82126fa044a2f5e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 30 Nov 2022 03:58:03 +0000 Subject: [PATCH 1056/1477] binder: fix pointer cast warning commit 9a0a930fe2535a76ad70d3f43caeccf0d86a3009 upstream. binder_uintptr_t is not the same as uintptr_t, so converting it into a pointer requires a second cast: drivers/android/binder.c: In function 'binder_translate_fd_array': drivers/android/binder.c:2511:28: error: cast to pointer from integer of different size [-Werror=int-to-pointer-cast] 2511 | sender_ufda_base = (void __user *)sender_uparent->buffer + fda->parent_offset; | ^ Fixes: 656e01f3ab54 ("binder: read pre-translated fds from sender buffer") Acked-by: Todd Kjos Acked-by: Randy Dunlap # build-tested Acked-by: Christian Brauner Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20211207122448.1185769-1-arnd@kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Carlos Llamas Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 83c4501153b4..398ce65b578b 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2908,7 +2908,8 @@ static int binder_translate_fd_array(struct list_head *pf_head, */ fda_offset = (parent->buffer - (uintptr_t)t->buffer->user_data) + fda->parent_offset; - sender_ufda_base = (void __user *)sender_uparent->buffer + fda->parent_offset; + sender_ufda_base = (void __user *)(uintptr_t)sender_uparent->buffer + + fda->parent_offset; if (!IS_ALIGNED((unsigned long)fda_offset, sizeof(u32)) || !IS_ALIGNED((unsigned long)sender_ufda_base, sizeof(u32))) { From 017de842533f4334d646f1d480f591f4ca9f5c7a Mon Sep 17 00:00:00 2001 From: Alessandro Astone Date: Wed, 30 Nov 2022 03:58:04 +0000 Subject: [PATCH 1057/1477] binder: Address corner cases in deferred copy and fixup commit 2d1746e3fda0c3612143d7c06f8e1d1830c13e23 upstream. When handling BINDER_TYPE_FDA object we are pushing a parent fixup with a certain skip_size but no scatter-gather copy object, since the copy is handled standalone. If BINDER_TYPE_FDA is the last children the scatter-gather copy loop will never stop to skip it, thus we are left with an item in the parent fixup list. This will trigger the BUG_ON(). This is reproducible in android when playing a video. We receive a transaction that looks like this: obj[0] BINDER_TYPE_PTR, parent obj[1] BINDER_TYPE_PTR, child obj[2] BINDER_TYPE_PTR, child obj[3] BINDER_TYPE_FDA, child Fixes: 09184ae9b575 ("binder: defer copies of pre-patched txn data") Acked-by: Todd Kjos Cc: stable Signed-off-by: Alessandro Astone Link: https://lore.kernel.org/r/20220415120015.52684-2-ales.astone@gmail.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Carlos Llamas Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 398ce65b578b..1ba8a98094b4 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2695,6 +2695,7 @@ static int binder_do_deferred_txn_copies(struct binder_alloc *alloc, { int ret = 0; struct binder_sg_copy *sgc, *tmpsgc; + struct binder_ptr_fixup *tmppf; struct binder_ptr_fixup *pf = list_first_entry_or_null(pf_head, struct binder_ptr_fixup, node); @@ -2749,7 +2750,11 @@ static int binder_do_deferred_txn_copies(struct binder_alloc *alloc, list_del(&sgc->node); kfree(sgc); } - BUG_ON(!list_empty(pf_head)); + list_for_each_entry_safe(pf, tmppf, pf_head, node) { + BUG_ON(pf->skip_size == 0); + list_del(&pf->node); + kfree(pf); + } BUG_ON(!list_empty(sgc_head)); return ret > 0 ? -EINVAL : ret; From ae9e0cc973fb7499ea1b1a8dfd0795f728b84faf Mon Sep 17 00:00:00 2001 From: Alessandro Astone Date: Wed, 30 Nov 2022 03:58:05 +0000 Subject: [PATCH 1058/1477] binder: Gracefully handle BINDER_TYPE_FDA objects with num_fds=0 commit ef38de9217a04c9077629a24652689d8fdb4c6c6 upstream. Some android userspace is sending BINDER_TYPE_FDA objects with num_fds=0. Like the previous patch, this is reproducible when playing a video. Before commit 09184ae9b575 BINDER_TYPE_FDA objects with num_fds=0 were 'correctly handled', as in no fixup was performed. After commit 09184ae9b575 we aggregate fixup and skip regions in binder_ptr_fixup structs and distinguish between the two by using the skip_size field: if it's 0, then it's a fixup, otherwise skip. When processing BINDER_TYPE_FDA objects with num_fds=0 we add a skip region of skip_size=0, and this causes issues because now binder_do_deferred_txn_copies will think this was a fixup region. To address that, return early from binder_translate_fd_array to avoid adding an empty skip region. Fixes: 09184ae9b575 ("binder: defer copies of pre-patched txn data") Acked-by: Todd Kjos Cc: stable Signed-off-by: Alessandro Astone Link: https://lore.kernel.org/r/20220415120015.52684-1-ales.astone@gmail.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Carlos Llamas Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 1ba8a98094b4..4473adef2f5a 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2891,6 +2891,9 @@ static int binder_translate_fd_array(struct list_head *pf_head, struct binder_proc *proc = thread->proc; int ret; + if (fda->num_fds == 0) + return 0; + fd_buf_size = sizeof(u32) * fda->num_fds; if (fda->num_fds >= SIZE_MAX / sizeof(u32)) { binder_user_error("%d:%d got transaction with invalid number of fds (%lld)\n", From 36e0b976196ce56a599802f485693bd93d237891 Mon Sep 17 00:00:00 2001 From: Aman Dhoot Date: Sat, 15 Oct 2022 20:41:17 -0700 Subject: [PATCH 1059/1477] Input: synaptics - switch touchpad on HP Laptop 15-da3001TU to RMI mode [ Upstream commit ac5408991ea6b06e29129b4d4861097c4c3e0d59 ] The device works fine in native RMI mode, there is no reason to use legacy PS/2 mode with it. Signed-off-by: Aman Dhoot Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/mouse/synaptics.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 82577095e175..f1013b950d57 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -191,6 +191,7 @@ static const char * const smbus_pnp_ids[] = { "SYN3221", /* HP 15-ay000 */ "SYN323d", /* HP Spectre X360 13-w013dx */ "SYN3257", /* HP Envy 13-ad105ng */ + "SYN3286", /* HP Laptop 15-da3001TU */ NULL }; From 7c3e39ccf5bd5b1e48f725f122b44ab692498d35 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 25 Oct 2022 16:09:42 +0200 Subject: [PATCH 1060/1477] ASoC: Intel: bytcht_es8316: Add quirk for the Nanote UMPC-01 [ Upstream commit 8bb0ac0e6f64ebdf15d963c26b028de391c9bcf9 ] The Nanote UMPC-01 mini laptop has stereo speakers, while the default bytcht_es8316 settings assume a mono speaker setup. Add a quirk for this. Signed-off-by: Hans de Goede Acked-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20221025140942.509066-1-hdegoede@redhat.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/intel/boards/bytcht_es8316.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/intel/boards/bytcht_es8316.c b/sound/soc/intel/boards/bytcht_es8316.c index 7ed869bf1a92..81269ed5a2aa 100644 --- a/sound/soc/intel/boards/bytcht_es8316.c +++ b/sound/soc/intel/boards/bytcht_es8316.c @@ -450,6 +450,13 @@ static const struct dmi_system_id byt_cht_es8316_quirk_table[] = { | BYT_CHT_ES8316_INTMIC_IN2_MAP | BYT_CHT_ES8316_JD_INVERTED), }, + { /* Nanote UMPC-01 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "RWC CO.,LTD"), + DMI_MATCH(DMI_PRODUCT_NAME, "UMPC-01"), + }, + .driver_data = (void *)BYT_CHT_ES8316_INTMIC_IN1_MAP, + }, { /* Teclast X98 Plus II */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "TECLAST"), From f4db0509587aa33491168354b924dd579f12c987 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 27 Sep 2022 13:52:34 +0200 Subject: [PATCH 1061/1477] serial: 8250: 8250_omap: Avoid RS485 RTS glitch on ->set_termios() [ Upstream commit 038ee49fef18710bedd38b531d173ccd746b2d8d ] RS485-enabled UART ports on TI Sitara SoCs with active-low polarity exhibit a Transmit Enable glitch on ->set_termios(): omap8250_restore_regs(), which is called from omap_8250_set_termios(), sets the TCRTLR bit in the MCR register and clears all other bits, including RTS. If RTS uses active-low polarity, it is now asserted for no reason. The TCRTLR bit is subsequently cleared by writing up->mcr to the MCR register. That variable is always zero, so the RTS bit is still cleared (incorrectly so if RTS is active-high). (up->mcr is not, as one might think, a cache of the MCR register's current value. Rather, it only caches a single bit of that register, the AFE bit. And it only does so if the UART supports the AFE bit, which OMAP does not. For details see serial8250_do_set_termios() and serial8250_do_set_mctrl().) Finally at the end of omap8250_restore_regs(), the MCR register is restored (and RTS deasserted) by a call to up->port.ops->set_mctrl() (which equals serial8250_set_mctrl()) and serial8250_em485_stop_tx(). So there's an RTS glitch between setting TCRTLR and calling serial8250_em485_stop_tx(). Avoid by using a read-modify-write when setting TCRTLR. While at it, drop a redundant initialization of up->mcr. As explained above, the variable isn't used by the driver and it is already initialized to zero because it is part of the static struct serial8250_ports[] declared in 8250_core.c. (Static structs are initialized to zero per section 6.7.8 nr. 10 of the C99 standard.) Cc: Jan Kiszka Cc: Su Bao Cheng Tested-by: Matthias Schiffer Signed-off-by: Lukas Wunner Link: https://lore.kernel.org/r/6554b0241a2c7fd50f32576fdbafed96709e11e8.1664278942.git.lukas@wunner.de Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/8250/8250_omap.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index 3f7379f16a36..483fff3a95c9 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -293,6 +293,7 @@ static void omap8250_restore_regs(struct uart_8250_port *up) { struct omap8250_priv *priv = up->port.private_data; struct uart_8250_dma *dma = up->dma; + u8 mcr = serial8250_in_MCR(up); if (dma && dma->tx_running) { /* @@ -309,7 +310,7 @@ static void omap8250_restore_regs(struct uart_8250_port *up) serial_out(up, UART_EFR, UART_EFR_ECB); serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A); - serial8250_out_MCR(up, UART_MCR_TCRTLR); + serial8250_out_MCR(up, mcr | UART_MCR_TCRTLR); serial_out(up, UART_FCR, up->fcr); omap8250_update_scr(up, priv); @@ -325,7 +326,8 @@ static void omap8250_restore_regs(struct uart_8250_port *up) serial_out(up, UART_LCR, 0); /* drop TCR + TLR access, we setup XON/XOFF later */ - serial8250_out_MCR(up, up->mcr); + serial8250_out_MCR(up, mcr); + serial_out(up, UART_IER, up->ier); serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); @@ -684,7 +686,6 @@ static int omap_8250_startup(struct uart_port *port) pm_runtime_get_sync(port->dev); - up->mcr = 0; serial_out(up, UART_FCR, UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT); serial_out(up, UART_LCR, UART_LCR_WLEN8); From c1620e996d0a4ed7a3001cca755cff013944850d Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 3 Nov 2022 11:31:43 -0700 Subject: [PATCH 1062/1477] Input: goodix - try resetting the controller when no config is set [ Upstream commit c7e37cc6240767f794678d11704935d49cc81d59 ] On ACPI systems (irq_pin_access_method == IRQ_PIN_ACCESS_ACPI_*) the driver does not reset the controller at probe time, because sometimes the system firmware loads a config and resetting might loose this config. On the Nanote UMPC-01 device OTOH the config is in flash of the controller, the controller needs a reset to load this; and the system firmware does not reset the controller on a cold boot. To fix the Nanote UMPC-01 touchscreen not working on a cold boot, try resetting the controller and then re-reading the config when encountering a config with 0 width/height/max_touch_num value and the controller has not already been reset by goodix_ts_probe(). This should be safe to do in general because normally we should never encounter a config with 0 width/height/max_touch_num. Doing this in general not only avoids the need for a DMI quirk, but also might help other systems. Signed-off-by: Hans de Goede Reviewed-by: Bastien Nocera Link: https://lore.kernel.org/r/20221025122930.421377-2-hdegoede@redhat.com Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/touchscreen/goodix.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c index b23abde5d7db..b7f87ad4b9a9 100644 --- a/drivers/input/touchscreen/goodix.c +++ b/drivers/input/touchscreen/goodix.c @@ -1059,6 +1059,7 @@ static int goodix_configure_dev(struct goodix_ts_data *ts) input_set_abs_params(ts->input_dev, ABS_MT_WIDTH_MAJOR, 0, 255, 0, 0); input_set_abs_params(ts->input_dev, ABS_MT_TOUCH_MAJOR, 0, 255, 0, 0); +retry_read_config: /* Read configuration and apply touchscreen parameters */ goodix_read_config(ts); @@ -1066,6 +1067,16 @@ static int goodix_configure_dev(struct goodix_ts_data *ts) touchscreen_parse_properties(ts->input_dev, true, &ts->prop); if (!ts->prop.max_x || !ts->prop.max_y || !ts->max_touch_num) { + if (!ts->reset_controller_at_probe && + ts->irq_pin_access_method != IRQ_PIN_ACCESS_NONE) { + dev_info(&ts->client->dev, "Config not set, resetting controller\n"); + /* Retry after a controller reset */ + ts->reset_controller_at_probe = true; + error = goodix_reset(ts); + if (error) + return error; + goto retry_read_config; + } dev_err(&ts->client->dev, "Invalid config (%d, %d, %d), using defaults\n", ts->prop.max_x, ts->prop.max_y, ts->max_touch_num); From 4ea4316dffda7d83e1997a72f10f8933b9ddb997 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 7 Nov 2022 10:30:25 -0800 Subject: [PATCH 1063/1477] Input: soc_button_array - add use_low_level_irq module parameter [ Upstream commit 8e9ada1d0e72b4737df400fe1bba48dc42a68df7 ] It seems that the Windows drivers for the ACPI0011 soc_button_array device use low level triggered IRQs rather then using edge triggering. Some ACPI tables depend on this, directly poking the GPIO controller's registers to clear the trigger type when closing a laptop's/2-in-1's lid and re-instating the trigger when opening the lid again. Linux sets the edge/level on which to trigger to both low+high since it is using edge type IRQs, the ACPI tables then ends up also setting the bit for level IRQs and since both low and high level have been selected by Linux we get an IRQ storm leading to soft lockups. As a workaround for this the soc_button_array already contains a DMI quirk table with device models known to have this issue. Add a module parameter for this so that users can easily test if their device is affected too and so that they can use the module parameter as a workaround. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20221106215320.67109-1-hdegoede@redhat.com Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/misc/soc_button_array.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/input/misc/soc_button_array.c b/drivers/input/misc/soc_button_array.c index efffcf0ebd3b..46ba8218de99 100644 --- a/drivers/input/misc/soc_button_array.c +++ b/drivers/input/misc/soc_button_array.c @@ -18,6 +18,10 @@ #include #include +static bool use_low_level_irq; +module_param(use_low_level_irq, bool, 0444); +MODULE_PARM_DESC(use_low_level_irq, "Use low-level triggered IRQ instead of edge triggered"); + struct soc_button_info { const char *name; int acpi_index; @@ -164,7 +168,8 @@ soc_button_device_create(struct platform_device *pdev, } /* See dmi_use_low_level_irq[] comment */ - if (!autorepeat && dmi_check_system(dmi_use_low_level_irq)) { + if (!autorepeat && (use_low_level_irq || + dmi_check_system(dmi_use_low_level_irq))) { irq_set_irq_type(irq, IRQ_TYPE_LEVEL_LOW); gpio_keys[n_buttons].irq = irq; gpio_keys[n_buttons].gpio = -ENOENT; From 9bbb587472435b22414a6c0519fc1dedad1b3132 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 7 Nov 2022 10:30:41 -0800 Subject: [PATCH 1064/1477] Input: soc_button_array - add Acer Switch V 10 to dmi_use_low_level_irq[] [ Upstream commit e13757f52496444b994a7ac67b6e517a15d89bbc ] Like on the Acer Switch 10 SW5-012, the Acer Switch V 10 SW5-017's _LID method messes with home- and power-button GPIO IRQ settings, causing an IRQ storm. Add a quirk entry for the Acer Switch V 10 to the dmi_use_low_level_irq[] DMI quirk list, to use low-level IRQs on this model, fixing the IRQ storm. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20221106215320.67109-2-hdegoede@redhat.com Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/misc/soc_button_array.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/input/misc/soc_button_array.c b/drivers/input/misc/soc_button_array.c index 46ba8218de99..31c02c2019c1 100644 --- a/drivers/input/misc/soc_button_array.c +++ b/drivers/input/misc/soc_button_array.c @@ -77,6 +77,13 @@ static const struct dmi_system_id dmi_use_low_level_irq[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Aspire SW5-012"), }, }, + { + /* Acer Switch V 10 SW5-017, same issue as Acer Switch 10 SW5-012. */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "SW5-017"), + }, + }, { /* * Acer One S1003. _LID method messes with power-button GPIO From f45a5a6c9f6d2967c075467fb7ad39b3a39e9732 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Mon, 14 Nov 2022 11:31:08 +0100 Subject: [PATCH 1065/1477] xen-pciback: Allow setting PCI_MSIX_FLAGS_MASKALL too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 5e29500eba2aa19e1323df46f64dafcd4a327092 ] When Xen domain configures MSI-X, the usual approach is to enable MSI-X together with masking all of them via the config space, then fill the table and only then clear PCI_MSIX_FLAGS_MASKALL. Allow doing this via QEMU running in a stub domain. Previously, when changing PCI_MSIX_FLAGS_MASKALL was not allowed, the whole write was aborted, preventing change to the PCI_MSIX_FLAGS_ENABLE bit too. Note the Xen hypervisor intercepts this write anyway, and may keep the PCI_MSIX_FLAGS_MASKALL bit set if it wishes to. It will store the guest-requested state and will apply it eventually. Signed-off-by: Marek Marczykowski-Górecki Reviewed-by: Jan Beulich Link: https://lore.kernel.org/r/20221114103110.1519413-1-marmarek@invisiblethingslab.com Signed-off-by: Juergen Gross Signed-off-by: Sasha Levin --- drivers/xen/xen-pciback/conf_space_capability.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/xen/xen-pciback/conf_space_capability.c b/drivers/xen/xen-pciback/conf_space_capability.c index 5e53b4817f16..097316a74126 100644 --- a/drivers/xen/xen-pciback/conf_space_capability.c +++ b/drivers/xen/xen-pciback/conf_space_capability.c @@ -190,13 +190,16 @@ static const struct config_field caplist_pm[] = { }; static struct msi_msix_field_config { - u16 enable_bit; /* bit for enabling MSI/MSI-X */ - unsigned int int_type; /* interrupt type for exclusiveness check */ + u16 enable_bit; /* bit for enabling MSI/MSI-X */ + u16 allowed_bits; /* bits allowed to be changed */ + unsigned int int_type; /* interrupt type for exclusiveness check */ } msi_field_config = { .enable_bit = PCI_MSI_FLAGS_ENABLE, + .allowed_bits = PCI_MSI_FLAGS_ENABLE, .int_type = INTERRUPT_TYPE_MSI, }, msix_field_config = { .enable_bit = PCI_MSIX_FLAGS_ENABLE, + .allowed_bits = PCI_MSIX_FLAGS_ENABLE | PCI_MSIX_FLAGS_MASKALL, .int_type = INTERRUPT_TYPE_MSIX, }; @@ -229,7 +232,7 @@ static int msi_msix_flags_write(struct pci_dev *dev, int offset, u16 new_value, return 0; if (!dev_data->allow_interrupt_control || - (new_value ^ old_value) & ~field_config->enable_bit) + (new_value ^ old_value) & ~field_config->allowed_bits) return PCIBIOS_SET_FAILED; if (new_value & field_config->enable_bit) { From 4fa717ba2d25ec2a508eb444196c74cc59a15bc1 Mon Sep 17 00:00:00 2001 From: ruanjinjie Date: Mon, 14 Nov 2022 19:21:24 +0800 Subject: [PATCH 1066/1477] xen/platform-pci: add missing free_irq() in error path [ Upstream commit c53717e1e3f0d0f9129b2e0dbc6dcc5e0a8132e9 ] free_irq() is missing in case of error in platform_pci_probe(), fix that. Signed-off-by: ruanjinjie Reviewed-by: Oleksandr Tyshchenko Link: https://lore.kernel.org/r/20221114112124.1965611-1-ruanjinjie@huawei.com Signed-off-by: Juergen Gross Signed-off-by: Sasha Levin --- drivers/xen/platform-pci.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c index 9db557b76511..804d8f4d0e73 100644 --- a/drivers/xen/platform-pci.c +++ b/drivers/xen/platform-pci.c @@ -137,7 +137,7 @@ static int platform_pci_probe(struct pci_dev *pdev, if (ret) { dev_warn(&pdev->dev, "Unable to set the evtchn callback " "err=%d\n", ret); - goto out; + goto irq_out; } } @@ -145,13 +145,16 @@ static int platform_pci_probe(struct pci_dev *pdev, grant_frames = alloc_xen_mmio(PAGE_SIZE * max_nr_gframes); ret = gnttab_setup_auto_xlat_frames(grant_frames); if (ret) - goto out; + goto irq_out; ret = gnttab_init(); if (ret) goto grant_out; return 0; grant_out: gnttab_free_auto_xlat_frames(); +irq_out: + if (!xen_have_vector_callback) + free_irq(pdev->irq, pdev); out: pci_release_region(pdev, 0); mem_out: From 52fb7bcea0c6edc713405ff73a82802006f132e5 Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Fri, 11 Nov 2022 18:07:52 +0800 Subject: [PATCH 1067/1477] platform/x86: asus-wmi: add missing pci_dev_put() in asus_wmi_set_xusb2pr() [ Upstream commit d0cdd85046b15089df71a50548617ac1025300d0 ] pci_get_device() will increase the reference count for the returned pci_dev. We need to use pci_dev_put() to decrease the reference count before asus_wmi_set_xusb2pr() returns. Signed-off-by: Xiongfeng Wang Link: https://lore.kernel.org/r/20221111100752.134311-1-wangxiongfeng2@huawei.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Signed-off-by: Sasha Levin --- drivers/platform/x86/asus-wmi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index 39e1a6396e08..db369cf26111 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -1212,6 +1212,8 @@ static void asus_wmi_set_xusb2pr(struct asus_wmi *asus) pci_write_config_dword(xhci_pdev, USB_INTEL_XUSB2PR, cpu_to_le32(ports_available)); + pci_dev_put(xhci_pdev); + pr_info("set USB_INTEL_XUSB2PR old: 0x%04x, new: 0x%04x\n", orig_ports_available, ports_available); } From a5937dae662b4419c6782c1b3bcdf237205928e9 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 11 Nov 2022 12:16:39 +0100 Subject: [PATCH 1068/1477] platform/x86: acer-wmi: Enable SW_TABLET_MODE on Switch V 10 (SW5-017) [ Upstream commit 1e817b889c7d8c14e7005258e15fec62edafe03c ] Like the Acer Switch 10 (SW5-012) and Acer Switch 10 (S1003) models the Acer Switch V 10 (SW5-017) supports reporting SW_TABLET_MODE through acer-wmi. Add a DMI quirk for the SW5-017 setting force_caps to ACER_CAP_KBD_DOCK (these devices have no other acer-wmi based functionality). Cc: Rudolf Polzer Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20221111111639.35730-1-hdegoede@redhat.com Signed-off-by: Sasha Levin --- drivers/platform/x86/acer-wmi.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c index 8e696262215f..ebec49957ed0 100644 --- a/drivers/platform/x86/acer-wmi.c +++ b/drivers/platform/x86/acer-wmi.c @@ -536,6 +536,15 @@ static const struct dmi_system_id acer_quirks[] __initconst = { }, .driver_data = (void *)ACER_CAP_KBD_DOCK, }, + { + .callback = set_force_caps, + .ident = "Acer Aspire Switch V 10 SW5-017", + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "SW5-017"), + }, + .driver_data = (void *)ACER_CAP_KBD_DOCK, + }, { .callback = set_force_caps, .ident = "Acer One 10 (S1003)", From 0964b77bab5445307177bc6d969d2b57f744ed33 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 25 Oct 2022 13:39:31 +0900 Subject: [PATCH 1069/1477] zonefs: fix zone report size in __zonefs_io_error() [ Upstream commit 7dd12d65ac646046a3fe0bbf9a4e86f4514207b3 ] When an IO error occurs, the function __zonefs_io_error() is used to issue a zone report to obtain the latest zone information from the device. This function gets a zone report for all zones used as storage for a file, which is always 1 zone except for files representing aggregated conventional zones. The number of zones of a zone report for a file is calculated in __zonefs_io_error() by doing a bit-shift of the inode i_zone_size field, which is equal to or larger than the device zone size. However, this calculation does not take into account that the last zone of a zoned device may be smaller than the zone size reported by bdev_zone_sectors() (which is used to set the bit shift size). As a result, if an error occurs for an IO targetting such last smaller zone, the zone report will ask for 0 zones, leading to an invalid zone report. Fix this by using the fact that all files require a 1 zone report, except if the inode i_zone_size field indicates a zone size larger than the device zone size. This exception case corresponds to a mount with aggregated conventional zones. A check for this exception is added to the file inode initialization during mount. If an invalid setup is detected, emit an error and fail the mount (check contributed by Johannes Thumshirn). Signed-off-by: Johannes Thumshirn Signed-off-by: Damien Le Moal Signed-off-by: Sasha Levin --- fs/zonefs/super.c | 37 +++++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index bf5cb6efb8c0..475d23a4f8da 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -440,14 +440,22 @@ static void __zonefs_io_error(struct inode *inode, bool write) struct super_block *sb = inode->i_sb; struct zonefs_sb_info *sbi = ZONEFS_SB(sb); unsigned int noio_flag; - unsigned int nr_zones = - zi->i_zone_size >> (sbi->s_zone_sectors_shift + SECTOR_SHIFT); + unsigned int nr_zones = 1; struct zonefs_ioerr_data err = { .inode = inode, .write = write, }; int ret; + /* + * The only files that have more than one zone are conventional zone + * files with aggregated conventional zones, for which the inode zone + * size is always larger than the device zone size. + */ + if (zi->i_zone_size > bdev_zone_sectors(sb->s_bdev)) + nr_zones = zi->i_zone_size >> + (sbi->s_zone_sectors_shift + SECTOR_SHIFT); + /* * Memory allocations in blkdev_report_zones() can trigger a memory * reclaim which may in turn cause a recursion into zonefs as well as @@ -1364,6 +1372,14 @@ static int zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone, zi->i_ztype = type; zi->i_zsector = zone->start; zi->i_zone_size = zone->len << SECTOR_SHIFT; + if (zi->i_zone_size > bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT && + !(sbi->s_features & ZONEFS_F_AGGRCNV)) { + zonefs_err(sb, + "zone size %llu doesn't match device's zone sectors %llu\n", + zi->i_zone_size, + bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT); + return -EINVAL; + } zi->i_max_size = min_t(loff_t, MAX_LFS_FILESIZE, zone->capacity << SECTOR_SHIFT); @@ -1406,11 +1422,11 @@ static struct dentry *zonefs_create_inode(struct dentry *parent, struct inode *dir = d_inode(parent); struct dentry *dentry; struct inode *inode; - int ret; + int ret = -ENOMEM; dentry = d_alloc_name(parent, name); if (!dentry) - return NULL; + return ERR_PTR(ret); inode = new_inode(parent->d_sb); if (!inode) @@ -1435,7 +1451,7 @@ static struct dentry *zonefs_create_inode(struct dentry *parent, dput: dput(dentry); - return NULL; + return ERR_PTR(ret); } struct zonefs_zone_data { @@ -1455,7 +1471,7 @@ static int zonefs_create_zgroup(struct zonefs_zone_data *zd, struct blk_zone *zone, *next, *end; const char *zgroup_name; char *file_name; - struct dentry *dir; + struct dentry *dir, *dent; unsigned int n = 0; int ret; @@ -1473,8 +1489,8 @@ static int zonefs_create_zgroup(struct zonefs_zone_data *zd, zgroup_name = "seq"; dir = zonefs_create_inode(sb->s_root, zgroup_name, NULL, type); - if (!dir) { - ret = -ENOMEM; + if (IS_ERR(dir)) { + ret = PTR_ERR(dir); goto free; } @@ -1520,8 +1536,9 @@ static int zonefs_create_zgroup(struct zonefs_zone_data *zd, * Use the file number within its group as file name. */ snprintf(file_name, ZONEFS_NAME_MAX - 1, "%u", n); - if (!zonefs_create_inode(dir, file_name, zone, type)) { - ret = -ENOMEM; + dent = zonefs_create_inode(dir, file_name, zone, type); + if (IS_ERR(dent)) { + ret = PTR_ERR(dent); goto free; } From 0acc008cf98ebe239e23a60ef028662773bfdeaa Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Mon, 14 Nov 2022 15:38:41 +0800 Subject: [PATCH 1070/1477] platform/x86: hp-wmi: Ignore Smart Experience App event [ Upstream commit 8b9b6a044b408283b086702b1d9e3cf4ba45b426 ] Sometimes hp-wmi driver complains on system resume: [ 483.116451] hp_wmi: Unknown event_id - 33 - 0x0 According to HP it's a feature called "HP Smart Experience App" and it's safe to be ignored. Signed-off-by: Kai-Heng Feng Link: https://lore.kernel.org/r/20221114073842.205392-1-kai.heng.feng@canonical.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Signed-off-by: Sasha Levin --- drivers/platform/x86/hp-wmi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c index 519b2ab84a63..6642d09b17b5 100644 --- a/drivers/platform/x86/hp-wmi.c +++ b/drivers/platform/x86/hp-wmi.c @@ -63,6 +63,7 @@ enum hp_wmi_event_ids { HPWMI_PEAKSHIFT_PERIOD = 0x0F, HPWMI_BATTERY_CHARGE_PERIOD = 0x10, HPWMI_SANITIZATION_MODE = 0x17, + HPWMI_SMART_EXPERIENCE_APP = 0x21, }; struct bios_args { @@ -632,6 +633,8 @@ static void hp_wmi_notify(u32 value, void *context) break; case HPWMI_SANITIZATION_MODE: break; + case HPWMI_SMART_EXPERIENCE_APP: + break; default: pr_info("Unknown event_id - %d - 0x%x\n", event_id, event_data); break; From 7e8eaa939eea93f961417e33f6d02db41e1f4fb6 Mon Sep 17 00:00:00 2001 From: Gleb Mazovetskiy Date: Mon, 14 Nov 2022 22:56:16 +0000 Subject: [PATCH 1071/1477] tcp: configurable source port perturb table size [ Upstream commit aeac4ec8f46d610a10adbaeff5e2edf6a88ffc62 ] On embedded systems with little memory and no relevant security concerns, it is beneficial to reduce the size of the table. Reducing the size from 2^16 to 2^8 saves 255 KiB of kernel RAM. Makes the table size configurable as an expert option. The size was previously increased from 2^8 to 2^16 in commit 4c2c8f03a5ab ("tcp: increase source port perturb table to 2^16"). Signed-off-by: Gleb Mazovetskiy Reviewed-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/Kconfig | 10 ++++++++++ net/ipv4/inet_hashtables.c | 10 +++++----- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 87983e70f03f..23b06063e1a5 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -403,6 +403,16 @@ config INET_IPCOMP If unsure, say Y. +config INET_TABLE_PERTURB_ORDER + int "INET: Source port perturbation table size (as power of 2)" if EXPERT + default 16 + help + Source port perturbation table size (as power of 2) for + RFC 6056 3.3.4. Algorithm 4: Double-Hash Port Selection Algorithm. + + The default is almost always what you want. + Only change this if you know what you are doing. + config INET_XFRM_TUNNEL tristate select INET_TUNNEL diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index c0de655fffd7..c68a1dae25ca 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -721,13 +721,13 @@ EXPORT_SYMBOL_GPL(inet_unhash); * Note that we use 32bit integers (vs RFC 'short integers') * because 2^16 is not a multiple of num_ephemeral and this * property might be used by clever attacker. + * * RFC claims using TABLE_LENGTH=10 buckets gives an improvement, though - * attacks were since demonstrated, thus we use 65536 instead to really - * give more isolation and privacy, at the expense of 256kB of kernel - * memory. + * attacks were since demonstrated, thus we use 65536 by default instead + * to really give more isolation and privacy, at the expense of 256kB + * of kernel memory. */ -#define INET_TABLE_PERTURB_SHIFT 16 -#define INET_TABLE_PERTURB_SIZE (1 << INET_TABLE_PERTURB_SHIFT) +#define INET_TABLE_PERTURB_SIZE (1 << CONFIG_INET_TABLE_PERTURB_ORDER) static u32 *table_perturb; int __inet_hash_connect(struct inet_timewait_death_row *death_row, From a7f30b5b8d7c56920bfc8436b0fcc624d0454497 Mon Sep 17 00:00:00 2001 From: Enrico Sau Date: Tue, 15 Nov 2022 11:58:59 +0100 Subject: [PATCH 1072/1477] net: usb: qmi_wwan: add Telit 0x103a composition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e103ba33998d0f25653cc8ebe745b68d1ee10cda ] Add the following Telit LE910C4-WWX composition: 0x103a: rmnet Signed-off-by: Enrico Sau Acked-by: Bjørn Mork Link: https://lore.kernel.org/r/20221115105859.14324-1-enrico.sau@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index a1c9233e264d..7313e6e03c12 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1292,6 +1292,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x2357, 0x0201, 4)}, /* TP-LINK HSUPA Modem MA180 */ {QMI_FIXED_INTF(0x2357, 0x9000, 4)}, /* TP-LINK MA260 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1031, 3)}, /* Telit LE910C1-EUX */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x103a, 0)}, /* Telit LE910C4-WWX */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1040, 2)}, /* Telit LE922A */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1050, 2)}, /* Telit FN980 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1060, 2)}, /* Telit LN920 */ From 79d9a11679785d7c5b10ce5ee85f43165d3f891f Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 20 Oct 2022 15:23:40 +0100 Subject: [PATCH 1073/1477] gpu: host1x: Avoid trying to use GART on Tegra20 [ Upstream commit c2418f911a31a266af4fbaca998dc73d3676475a ] Since commit c7e3ca515e78 ("iommu/tegra: gart: Do not register with bus") quite some time ago, the GART driver has effectively disabled itself to avoid issues with the GPU driver expecting it to work in ways that it doesn't. As of commit 57365a04c921 ("iommu: Move bus setup to IOMMU device registration") that bodge no longer works, but really the GPU driver should be responsible for its own behaviour anyway. Make the workaround explicit. Reported-by: Jon Hunter Suggested-by: Dmitry Osipenko Signed-off-by: Robin Murphy Tested-by: Jon Hunter Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin --- drivers/gpu/drm/tegra/drm.c | 4 ++++ drivers/gpu/host1x/dev.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index 2c6ebc328b24..318692ad9680 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -1042,6 +1042,10 @@ static bool host1x_drm_wants_iommu(struct host1x_device *dev) struct host1x *host1x = dev_get_drvdata(dev->dev.parent); struct iommu_domain *domain; + /* Our IOMMU usage policy doesn't currently play well with GART */ + if (of_machine_is_compatible("nvidia,tegra20")) + return false; + /* * If the Tegra DRM clients are backed by an IOMMU, push buffers are * likely to be allocated beyond the 32-bit boundary if sufficient diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index 8659558b518d..9f674a8d5009 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -198,6 +198,10 @@ static void host1x_setup_sid_table(struct host1x *host) static bool host1x_wants_iommu(struct host1x *host1x) { + /* Our IOMMU usage policy doesn't currently play well with GART */ + if (of_machine_is_compatible("nvidia,tegra20")) + return false; + /* * If we support addressing a maximum of 32 bits of physical memory * and if the host1x firewall is enabled, there's no need to enable From d306f73079f36f38eaee3163c7275f06bf75b5f4 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 15 Nov 2022 12:48:26 -0500 Subject: [PATCH 1074/1477] dm integrity: flush the journal on suspend [ Upstream commit 5e5dab5ec763d600fe0a67837dd9155bdc42f961 ] This commit flushes the journal on suspend. It is prerequisite for the next commit that enables activating dm integrity devices in read-only mode. Note that we deliberately didn't flush the journal on suspend, so that the journal replay code would be tested. However, the dm-integrity code is 5 years old now, so that journal replay is well-tested, and we can make this change now. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Sasha Levin --- drivers/md/dm-integrity.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 835b1f3464d0..847dfd682e20 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -2470,10 +2470,6 @@ static void integrity_writer(struct work_struct *w) unsigned prev_free_sectors; - /* the following test is not needed, but it tests the replay code */ - if (unlikely(dm_post_suspending(ic->ti)) && !ic->meta_dev) - return; - spin_lock_irq(&ic->endio_wait.lock); write_start = ic->committed_section; write_sections = ic->n_committed_sections; @@ -2980,8 +2976,7 @@ static void dm_integrity_postsuspend(struct dm_target *ti) drain_workqueue(ic->commit_wq); if (ic->mode == 'J') { - if (ic->meta_dev) - queue_work(ic->writer_wq, &ic->writer_work); + queue_work(ic->writer_wq, &ic->writer_work); drain_workqueue(ic->writer_wq); dm_integrity_flush_buffers(ic, true); } From 64b7f9a7ddfbcc7cb14a0421fdc18610f862bdde Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 15 Nov 2022 12:51:50 -0500 Subject: [PATCH 1075/1477] dm integrity: clear the journal on suspend [ Upstream commit 984bf2cc531e778e49298fdf6730e0396166aa21 ] There was a problem that a user burned a dm-integrity image on CDROM and could not activate it because it had a non-empty journal. Fix this problem by flushing the journal (done by the previous commit) and clearing the journal (done by this commit). Once the journal is cleared, dm-integrity won't attempt to replay it on the next activation. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Sasha Levin --- drivers/md/dm-integrity.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 847dfd682e20..2156a2d5ac70 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -254,6 +254,7 @@ struct dm_integrity_c { struct completion crypto_backoff; + bool wrote_to_journal; bool journal_uptodate; bool just_formatted; bool recalculate_flag; @@ -2256,6 +2257,8 @@ static void integrity_commit(struct work_struct *w) if (!commit_sections) goto release_flush_bios; + ic->wrote_to_journal = true; + i = commit_start; for (n = 0; n < commit_sections; n++) { for (j = 0; j < ic->journal_section_entries; j++) { @@ -2979,6 +2982,14 @@ static void dm_integrity_postsuspend(struct dm_target *ti) queue_work(ic->writer_wq, &ic->writer_work); drain_workqueue(ic->writer_wq); dm_integrity_flush_buffers(ic, true); + if (ic->wrote_to_journal) { + init_journal(ic, ic->free_section, + ic->journal_sections - ic->free_section, ic->commit_seq); + if (ic->free_section) { + init_journal(ic, 0, ic->free_section, + next_commit_seq(ic->commit_seq)); + } + } } if (ic->mode == 'B') { @@ -3006,6 +3017,8 @@ static void dm_integrity_resume(struct dm_target *ti) DEBUG_print("resume\n"); + ic->wrote_to_journal = false; + if (ic->provided_data_sectors != old_provided_data_sectors) { if (ic->provided_data_sectors > old_provided_data_sectors && ic->mode == 'B' && From 7c6535fb4d67ea37c98a1d1d24ca33dd5ec42693 Mon Sep 17 00:00:00 2001 From: Phil Turnbull Date: Wed, 23 Nov 2022 10:35:40 -0500 Subject: [PATCH 1076/1477] wifi: wilc1000: validate pairwise and authentication suite offsets commit cd21d99e595ec1d8721e1058dcdd4f1f7de1d793 upstream. There is no validation of 'offset' which can trigger an out-of-bounds read when extracting RSN capabilities. Signed-off-by: Phil Turnbull Tested-by: Ajay Kathat Acked-by: Ajay Kathat Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20221123153543.8568-2-philipturnbull@github.com Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/microchip/wilc1000/hif.c | 21 ++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/microchip/wilc1000/hif.c b/drivers/net/wireless/microchip/wilc1000/hif.c index d025a3093015..b25847799138 100644 --- a/drivers/net/wireless/microchip/wilc1000/hif.c +++ b/drivers/net/wireless/microchip/wilc1000/hif.c @@ -467,14 +467,25 @@ void *wilc_parse_join_bss_param(struct cfg80211_bss *bss, rsn_ie = cfg80211_find_ie(WLAN_EID_RSN, ies->data, ies->len); if (rsn_ie) { + int rsn_ie_len = sizeof(struct element) + rsn_ie[1]; int offset = 8; - param->mode_802_11i = 2; - param->rsn_found = true; /* extract RSN capabilities */ - offset += (rsn_ie[offset] * 4) + 2; - offset += (rsn_ie[offset] * 4) + 2; - memcpy(param->rsn_cap, &rsn_ie[offset], 2); + if (offset < rsn_ie_len) { + /* skip over pairwise suites */ + offset += (rsn_ie[offset] * 4) + 2; + + if (offset < rsn_ie_len) { + /* skip over authentication suites */ + offset += (rsn_ie[offset] * 4) + 2; + + if (offset + 1 < rsn_ie_len) { + param->mode_802_11i = 2; + param->rsn_found = true; + memcpy(param->rsn_cap, &rsn_ie[offset], 2); + } + } + } } if (param->rsn_found) { From 905f886eae4b065656a575e8a02544045cbaadcf Mon Sep 17 00:00:00 2001 From: Phil Turnbull Date: Wed, 23 Nov 2022 10:35:41 -0500 Subject: [PATCH 1077/1477] wifi: wilc1000: validate length of IEEE80211_P2P_ATTR_OPER_CHANNEL attribute commit 051ae669e4505abbe05165bebf6be7922de11f41 upstream. Validate that the IEEE80211_P2P_ATTR_OPER_CHANNEL attribute contains enough space for a 'struct struct wilc_attr_oper_ch'. If the attribute is too small then it triggers an out-of-bounds write later in the function. Signed-off-by: Phil Turnbull Tested-by: Ajay Kathat Acked-by: Ajay Kathat Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20221123153543.8568-3-philipturnbull@github.com Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/microchip/wilc1000/cfg80211.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/microchip/wilc1000/cfg80211.c b/drivers/net/wireless/microchip/wilc1000/cfg80211.c index 6be5ac8ba518..b42e9eb2631c 100644 --- a/drivers/net/wireless/microchip/wilc1000/cfg80211.c +++ b/drivers/net/wireless/microchip/wilc1000/cfg80211.c @@ -939,14 +939,24 @@ static inline void wilc_wfi_cfg_parse_ch_attr(u8 *buf, u32 len, u8 sta_ch) return; while (index + sizeof(*e) <= len) { + u16 attr_size; + e = (struct wilc_attr_entry *)&buf[index]; + attr_size = le16_to_cpu(e->attr_len); + + if (index + sizeof(*e) + attr_size > len) + return; + if (e->attr_type == IEEE80211_P2P_ATTR_CHANNEL_LIST) ch_list_idx = index; - else if (e->attr_type == IEEE80211_P2P_ATTR_OPER_CHANNEL) + else if (e->attr_type == IEEE80211_P2P_ATTR_OPER_CHANNEL && + attr_size == (sizeof(struct wilc_attr_oper_ch) - sizeof(*e))) op_ch_idx = index; + if (ch_list_idx && op_ch_idx) break; - index += le16_to_cpu(e->attr_len) + sizeof(*e); + + index += sizeof(*e) + attr_size; } if (ch_list_idx) { From 5a068535c0073c8402aa0755e8ef259fb98a33c5 Mon Sep 17 00:00:00 2001 From: Phil Turnbull Date: Wed, 23 Nov 2022 10:35:42 -0500 Subject: [PATCH 1078/1477] wifi: wilc1000: validate length of IEEE80211_P2P_ATTR_CHANNEL_LIST attribute commit f9b62f9843c7b0afdaecabbcebf1dbba18599408 upstream. Validate that the IEEE80211_P2P_ATTR_CHANNEL_LIST attribute contains enough space for a 'struct wilc_attr_oper_ch'. If the attribute is too small then it can trigger an out-of-bounds write later in the function. 'struct wilc_attr_oper_ch' is variable sized so also check 'attr_len' does not extend beyond the end of 'buf'. Signed-off-by: Phil Turnbull Tested-by: Ajay Kathat Acked-by: Ajay Kathat Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20221123153543.8568-4-philipturnbull@github.com Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/microchip/wilc1000/cfg80211.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/microchip/wilc1000/cfg80211.c b/drivers/net/wireless/microchip/wilc1000/cfg80211.c index b42e9eb2631c..df03d4d71a5c 100644 --- a/drivers/net/wireless/microchip/wilc1000/cfg80211.c +++ b/drivers/net/wireless/microchip/wilc1000/cfg80211.c @@ -947,7 +947,8 @@ static inline void wilc_wfi_cfg_parse_ch_attr(u8 *buf, u32 len, u8 sta_ch) if (index + sizeof(*e) + attr_size > len) return; - if (e->attr_type == IEEE80211_P2P_ATTR_CHANNEL_LIST) + if (e->attr_type == IEEE80211_P2P_ATTR_CHANNEL_LIST && + attr_size >= (sizeof(struct wilc_attr_ch_list) - sizeof(*e))) ch_list_idx = index; else if (e->attr_type == IEEE80211_P2P_ATTR_OPER_CHANNEL && attr_size == (sizeof(struct wilc_attr_oper_ch) - sizeof(*e))) From 3eb6b89a4e9f9e44c3170d70d8d16c3c8dc8c800 Mon Sep 17 00:00:00 2001 From: Phil Turnbull Date: Wed, 23 Nov 2022 10:35:43 -0500 Subject: [PATCH 1079/1477] wifi: wilc1000: validate number of channels commit 0cdfa9e6f0915e3d243e2393bfa8a22e12d553b0 upstream. There is no validation of 'e->no_of_channels' which can trigger an out-of-bounds write in the following 'memset' call. Validate that the number of channels does not extends beyond the size of the channel list element. Signed-off-by: Phil Turnbull Tested-by: Ajay Kathat Acked-by: Ajay Kathat Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20221123153543.8568-5-philipturnbull@github.com Signed-off-by: Greg Kroah-Hartman --- .../wireless/microchip/wilc1000/cfg80211.c | 23 ++++++++++++++----- 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/microchip/wilc1000/cfg80211.c b/drivers/net/wireless/microchip/wilc1000/cfg80211.c index df03d4d71a5c..dd26f2086180 100644 --- a/drivers/net/wireless/microchip/wilc1000/cfg80211.c +++ b/drivers/net/wireless/microchip/wilc1000/cfg80211.c @@ -961,19 +961,30 @@ static inline void wilc_wfi_cfg_parse_ch_attr(u8 *buf, u32 len, u8 sta_ch) } if (ch_list_idx) { - u16 attr_size; - struct wilc_ch_list_elem *e; - int i; + unsigned int i; + u16 elem_size; ch_list = (struct wilc_attr_ch_list *)&buf[ch_list_idx]; - attr_size = le16_to_cpu(ch_list->attr_len); - for (i = 0; i < attr_size;) { + /* the number of bytes following the final 'elem' member */ + elem_size = le16_to_cpu(ch_list->attr_len) - + (sizeof(*ch_list) - sizeof(struct wilc_attr_entry)); + for (i = 0; i < elem_size;) { + struct wilc_ch_list_elem *e; + e = (struct wilc_ch_list_elem *)(ch_list->elem + i); + + i += sizeof(*e); + if (i > elem_size) + break; + + i += e->no_of_channels; + if (i > elem_size) + break; + if (e->op_class == WILC_WLAN_OPERATING_CLASS_2_4GHZ) { memset(e->ch_list, sta_ch, e->no_of_channels); break; } - i += e->no_of_channels; } } From f8f80d532f7811a1da04480233dbb4be0256e38d Mon Sep 17 00:00:00 2001 From: Luiz Capitulino Date: Mon, 28 Nov 2022 17:08:32 +0000 Subject: [PATCH 1080/1477] genirq/msi: Shutdown managed interrupts with unsatifiable affinities From: Marc Zyngier commit d802057c7c553ad426520a053da9f9fe08e2c35a upstream. [ This commit is almost a rewrite because it conflicts with Thomas Gleixner's refactoring of this code in v5.17-rc1. I wasn't sure if I should drop all the s-o-bs (including Mark's), but decided to keep as the original commit ] When booting with maxcpus=, interrupt controllers such as the GICv3 ITS may not be able to satisfy the affinity of some managed interrupts, as some of the HW resources are simply not available. The same thing happens when loading a driver using managed interrupts while CPUs are offline. In order to deal with this, do not try to activate such interrupt if there is no online CPU capable of handling it. Instead, place it in shutdown state. Once a capable CPU shows up, it will be activated. Reported-by: John Garry Reported-by: David Decotigny Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Tested-by: John Garry Link: https://lore.kernel.org/r/20220405185040.206297-2-maz@kernel.org Signed-off-by: Luiz Capitulino Signed-off-by: Greg Kroah-Hartman --- kernel/irq/msi.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index d217acc9f71b..b47d95b68ac1 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -456,6 +456,13 @@ int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, irqd_clr_can_reserve(irq_data); if (domain->flags & IRQ_DOMAIN_MSI_NOMASK_QUIRK) irqd_set_msi_nomask_quirk(irq_data); + if ((info->flags & MSI_FLAG_ACTIVATE_EARLY) && + irqd_affinity_is_managed(irq_data) && + !cpumask_intersects(irq_data_get_affinity_mask(irq_data), + cpu_online_mask)) { + irqd_set_managed_shutdown(irq_data); + continue; + } } ret = irq_domain_activate_irq(irq_data, can_reserve); if (ret) From e0d2c59ee9954187737110594220a53007ad0d74 Mon Sep 17 00:00:00 2001 From: Luiz Capitulino Date: Mon, 28 Nov 2022 17:08:33 +0000 Subject: [PATCH 1081/1477] genirq: Always limit the affinity to online CPUs From: Marc Zyngier commit 33de0aa4bae982ed6f7c777f86b5af3e627ac937 upstream. [ Fixed small conflicts due to the HK_FLAG_MANAGED_IRQ flag been renamed on upstream ] When booting with maxcpus= (or even loading a driver while most CPUs are offline), it is pretty easy to observe managed affinities containing a mix of online and offline CPUs being passed to the irqchip driver. This means that the irqchip cannot trust the affinity passed down from the core code, which is a bit annoying and requires (at least in theory) all drivers to implement some sort of affinity narrowing. In order to address this, always limit the cpumask to the set of online CPUs. Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20220405185040.206297-3-maz@kernel.org Signed-off-by: Luiz Capitulino Signed-off-by: Greg Kroah-Hartman --- kernel/irq/manage.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 92d94615cbbb..715d4ed0fe88 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -223,11 +223,16 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, { struct irq_desc *desc = irq_data_to_desc(data); struct irq_chip *chip = irq_data_get_irq_chip(data); + const struct cpumask *prog_mask; int ret; + static DEFINE_RAW_SPINLOCK(tmp_mask_lock); + static struct cpumask tmp_mask; + if (!chip || !chip->irq_set_affinity) return -EINVAL; + raw_spin_lock(&tmp_mask_lock); /* * If this is a managed interrupt and housekeeping is enabled on * it check whether the requested affinity mask intersects with @@ -249,24 +254,28 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, */ if (irqd_affinity_is_managed(data) && housekeeping_enabled(HK_FLAG_MANAGED_IRQ)) { - const struct cpumask *hk_mask, *prog_mask; - - static DEFINE_RAW_SPINLOCK(tmp_mask_lock); - static struct cpumask tmp_mask; + const struct cpumask *hk_mask; hk_mask = housekeeping_cpumask(HK_FLAG_MANAGED_IRQ); - raw_spin_lock(&tmp_mask_lock); cpumask_and(&tmp_mask, mask, hk_mask); if (!cpumask_intersects(&tmp_mask, cpu_online_mask)) prog_mask = mask; else prog_mask = &tmp_mask; - ret = chip->irq_set_affinity(data, prog_mask, force); - raw_spin_unlock(&tmp_mask_lock); } else { - ret = chip->irq_set_affinity(data, mask, force); + prog_mask = mask; } + + /* Make sure we only provide online CPUs to the irqchip */ + cpumask_and(&tmp_mask, prog_mask, cpu_online_mask); + if (!cpumask_empty(&tmp_mask)) + ret = chip->irq_set_affinity(data, &tmp_mask, force); + else + ret = -EINVAL; + + raw_spin_unlock(&tmp_mask_lock); + switch (ret) { case IRQ_SET_MASK_OK: case IRQ_SET_MASK_OK_DONE: From 9d90a2b98e6e5f793cd0c434f28bb98a1eb22fe0 Mon Sep 17 00:00:00 2001 From: Luiz Capitulino Date: Mon, 28 Nov 2022 17:08:34 +0000 Subject: [PATCH 1082/1477] irqchip/gic-v3: Always trust the managed affinity provided by the core code From: Marc Zyngier commit 3f893a5962d31c0164efdbf6174ed0784f1d7603 upstream. Now that the core code has been fixed to always give us an affinity that only includes online CPUs, directly use this affinity when computing a target CPU. Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20220405185040.206297-4-maz@kernel.org Signed-off-by: Luiz Capitulino Signed-off-by: Greg Kroah-Hartman --- drivers/irqchip/irq-gic-v3-its.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 42b295337baf..d8cb5bcd6b10 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -1615,7 +1615,7 @@ static int its_select_cpu(struct irq_data *d, cpu = cpumask_pick_least_loaded(d, tmpmask); } else { - cpumask_and(tmpmask, irq_data_get_affinity_mask(d), cpu_online_mask); + cpumask_copy(tmpmask, aff_mask); /* If we cannot cross sockets, limit the search to that node */ if ((its_dev->its->flags & ITS_FLAGS_WORKAROUND_CAVIUM_23144) && From b56d6e55857bb7dff8f600479df4f11db114417b Mon Sep 17 00:00:00 2001 From: Luiz Capitulino Date: Mon, 28 Nov 2022 17:08:35 +0000 Subject: [PATCH 1083/1477] genirq: Take the proposed affinity at face value if force==true From: Marc Zyngier commit c48c8b829d2b966a6649827426bcdba082ccf922 upstream. Although setting the affinity of an interrupt to a set of CPUs that doesn't have any online CPU is generally frowned apon, there are a few limited cases where such affinity is set from a CPUHP notifier, setting the affinity to a CPU that isn't online yet. The saving grace is that this is always done using the 'force' attribute, which gives a hint that the affinity setting can be outside of the online CPU mask and the callsite set this flag with the knowledge that the underlying interrupt controller knows to handle it. This restores the expected behaviour on Marek's system. Fixes: 33de0aa4bae9 ("genirq: Always limit the affinity to online CPUs") Reported-by: Marek Szyprowski Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Tested-by: Marek Szyprowski Link: https://lore.kernel.org/r/4b7fc13c-887b-a664-26e8-45aed13f048a@samsung.com Link: https://lore.kernel.org/r/20220414140011.541725-1-maz@kernel.org Signed-off-by: Luiz Capitulino Signed-off-by: Greg Kroah-Hartman --- kernel/irq/manage.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 715d4ed0fe88..3cb29835632f 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -267,10 +267,16 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, prog_mask = mask; } - /* Make sure we only provide online CPUs to the irqchip */ + /* + * Make sure we only provide online CPUs to the irqchip, + * unless we are being asked to force the affinity (in which + * case we do as we are told). + */ cpumask_and(&tmp_mask, prog_mask, cpu_online_mask); - if (!cpumask_empty(&tmp_mask)) + if (!force && !cpumask_empty(&tmp_mask)) ret = chip->irq_set_affinity(data, &tmp_mask, force); + else if (force) + ret = chip->irq_set_affinity(data, mask, force); else ret = -EINVAL; From 24a37ba2cb6616494f9af585a8d5d251c85c0c38 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 7 Nov 2022 11:44:51 -0500 Subject: [PATCH 1084/1477] btrfs: free btrfs_path before copying root refs to userspace commit b740d806166979488e798e41743aaec051f2443f upstream. Syzbot reported the following lockdep splat ====================================================== WARNING: possible circular locking dependency detected 6.0.0-rc7-syzkaller-18095-gbbed346d5a96 #0 Not tainted ------------------------------------------------------ syz-executor307/3029 is trying to acquire lock: ffff0000c02525d8 (&mm->mmap_lock){++++}-{3:3}, at: __might_fault+0x54/0xb4 mm/memory.c:5576 but task is already holding lock: ffff0000c958a608 (btrfs-root-00){++++}-{3:3}, at: __btrfs_tree_read_lock fs/btrfs/locking.c:134 [inline] ffff0000c958a608 (btrfs-root-00){++++}-{3:3}, at: btrfs_tree_read_lock fs/btrfs/locking.c:140 [inline] ffff0000c958a608 (btrfs-root-00){++++}-{3:3}, at: btrfs_read_lock_root_node+0x13c/0x1c0 fs/btrfs/locking.c:279 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #3 (btrfs-root-00){++++}-{3:3}: down_read_nested+0x64/0x84 kernel/locking/rwsem.c:1624 __btrfs_tree_read_lock fs/btrfs/locking.c:134 [inline] btrfs_tree_read_lock fs/btrfs/locking.c:140 [inline] btrfs_read_lock_root_node+0x13c/0x1c0 fs/btrfs/locking.c:279 btrfs_search_slot_get_root+0x74/0x338 fs/btrfs/ctree.c:1637 btrfs_search_slot+0x1b0/0xfd8 fs/btrfs/ctree.c:1944 btrfs_update_root+0x6c/0x5a0 fs/btrfs/root-tree.c:132 commit_fs_roots+0x1f0/0x33c fs/btrfs/transaction.c:1459 btrfs_commit_transaction+0x89c/0x12d8 fs/btrfs/transaction.c:2343 flush_space+0x66c/0x738 fs/btrfs/space-info.c:786 btrfs_async_reclaim_metadata_space+0x43c/0x4e0 fs/btrfs/space-info.c:1059 process_one_work+0x2d8/0x504 kernel/workqueue.c:2289 worker_thread+0x340/0x610 kernel/workqueue.c:2436 kthread+0x12c/0x158 kernel/kthread.c:376 ret_from_fork+0x10/0x20 arch/arm64/kernel/entry.S:860 -> #2 (&fs_info->reloc_mutex){+.+.}-{3:3}: __mutex_lock_common+0xd4/0xca8 kernel/locking/mutex.c:603 __mutex_lock kernel/locking/mutex.c:747 [inline] mutex_lock_nested+0x38/0x44 kernel/locking/mutex.c:799 btrfs_record_root_in_trans fs/btrfs/transaction.c:516 [inline] start_transaction+0x248/0x944 fs/btrfs/transaction.c:752 btrfs_start_transaction+0x34/0x44 fs/btrfs/transaction.c:781 btrfs_create_common+0xf0/0x1b4 fs/btrfs/inode.c:6651 btrfs_create+0x8c/0xb0 fs/btrfs/inode.c:6697 lookup_open fs/namei.c:3413 [inline] open_last_lookups fs/namei.c:3481 [inline] path_openat+0x804/0x11c4 fs/namei.c:3688 do_filp_open+0xdc/0x1b8 fs/namei.c:3718 do_sys_openat2+0xb8/0x22c fs/open.c:1313 do_sys_open fs/open.c:1329 [inline] __do_sys_openat fs/open.c:1345 [inline] __se_sys_openat fs/open.c:1340 [inline] __arm64_sys_openat+0xb0/0xe0 fs/open.c:1340 __invoke_syscall arch/arm64/kernel/syscall.c:38 [inline] invoke_syscall arch/arm64/kernel/syscall.c:52 [inline] el0_svc_common+0x138/0x220 arch/arm64/kernel/syscall.c:142 do_el0_svc+0x48/0x164 arch/arm64/kernel/syscall.c:206 el0_svc+0x58/0x150 arch/arm64/kernel/entry-common.c:636 el0t_64_sync_handler+0x84/0xf0 arch/arm64/kernel/entry-common.c:654 el0t_64_sync+0x18c/0x190 arch/arm64/kernel/entry.S:581 -> #1 (sb_internal#2){.+.+}-{0:0}: percpu_down_read include/linux/percpu-rwsem.h:51 [inline] __sb_start_write include/linux/fs.h:1826 [inline] sb_start_intwrite include/linux/fs.h:1948 [inline] start_transaction+0x360/0x944 fs/btrfs/transaction.c:683 btrfs_join_transaction+0x30/0x40 fs/btrfs/transaction.c:795 btrfs_dirty_inode+0x50/0x140 fs/btrfs/inode.c:6103 btrfs_update_time+0x1c0/0x1e8 fs/btrfs/inode.c:6145 inode_update_time fs/inode.c:1872 [inline] touch_atime+0x1f0/0x4a8 fs/inode.c:1945 file_accessed include/linux/fs.h:2516 [inline] btrfs_file_mmap+0x50/0x88 fs/btrfs/file.c:2407 call_mmap include/linux/fs.h:2192 [inline] mmap_region+0x7fc/0xc14 mm/mmap.c:1752 do_mmap+0x644/0x97c mm/mmap.c:1540 vm_mmap_pgoff+0xe8/0x1d0 mm/util.c:552 ksys_mmap_pgoff+0x1cc/0x278 mm/mmap.c:1586 __do_sys_mmap arch/arm64/kernel/sys.c:28 [inline] __se_sys_mmap arch/arm64/kernel/sys.c:21 [inline] __arm64_sys_mmap+0x58/0x6c arch/arm64/kernel/sys.c:21 __invoke_syscall arch/arm64/kernel/syscall.c:38 [inline] invoke_syscall arch/arm64/kernel/syscall.c:52 [inline] el0_svc_common+0x138/0x220 arch/arm64/kernel/syscall.c:142 do_el0_svc+0x48/0x164 arch/arm64/kernel/syscall.c:206 el0_svc+0x58/0x150 arch/arm64/kernel/entry-common.c:636 el0t_64_sync_handler+0x84/0xf0 arch/arm64/kernel/entry-common.c:654 el0t_64_sync+0x18c/0x190 arch/arm64/kernel/entry.S:581 -> #0 (&mm->mmap_lock){++++}-{3:3}: check_prev_add kernel/locking/lockdep.c:3095 [inline] check_prevs_add kernel/locking/lockdep.c:3214 [inline] validate_chain kernel/locking/lockdep.c:3829 [inline] __lock_acquire+0x1530/0x30a4 kernel/locking/lockdep.c:5053 lock_acquire+0x100/0x1f8 kernel/locking/lockdep.c:5666 __might_fault+0x7c/0xb4 mm/memory.c:5577 _copy_to_user include/linux/uaccess.h:134 [inline] copy_to_user include/linux/uaccess.h:160 [inline] btrfs_ioctl_get_subvol_rootref+0x3a8/0x4bc fs/btrfs/ioctl.c:3203 btrfs_ioctl+0xa08/0xa64 fs/btrfs/ioctl.c:5556 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:870 [inline] __se_sys_ioctl fs/ioctl.c:856 [inline] __arm64_sys_ioctl+0xd0/0x140 fs/ioctl.c:856 __invoke_syscall arch/arm64/kernel/syscall.c:38 [inline] invoke_syscall arch/arm64/kernel/syscall.c:52 [inline] el0_svc_common+0x138/0x220 arch/arm64/kernel/syscall.c:142 do_el0_svc+0x48/0x164 arch/arm64/kernel/syscall.c:206 el0_svc+0x58/0x150 arch/arm64/kernel/entry-common.c:636 el0t_64_sync_handler+0x84/0xf0 arch/arm64/kernel/entry-common.c:654 el0t_64_sync+0x18c/0x190 arch/arm64/kernel/entry.S:581 other info that might help us debug this: Chain exists of: &mm->mmap_lock --> &fs_info->reloc_mutex --> btrfs-root-00 Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(btrfs-root-00); lock(&fs_info->reloc_mutex); lock(btrfs-root-00); lock(&mm->mmap_lock); *** DEADLOCK *** 1 lock held by syz-executor307/3029: #0: ffff0000c958a608 (btrfs-root-00){++++}-{3:3}, at: __btrfs_tree_read_lock fs/btrfs/locking.c:134 [inline] #0: ffff0000c958a608 (btrfs-root-00){++++}-{3:3}, at: btrfs_tree_read_lock fs/btrfs/locking.c:140 [inline] #0: ffff0000c958a608 (btrfs-root-00){++++}-{3:3}, at: btrfs_read_lock_root_node+0x13c/0x1c0 fs/btrfs/locking.c:279 stack backtrace: CPU: 0 PID: 3029 Comm: syz-executor307 Not tainted 6.0.0-rc7-syzkaller-18095-gbbed346d5a96 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/30/2022 Call trace: dump_backtrace+0x1c4/0x1f0 arch/arm64/kernel/stacktrace.c:156 show_stack+0x2c/0x54 arch/arm64/kernel/stacktrace.c:163 __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x104/0x16c lib/dump_stack.c:106 dump_stack+0x1c/0x58 lib/dump_stack.c:113 print_circular_bug+0x2c4/0x2c8 kernel/locking/lockdep.c:2053 check_noncircular+0x14c/0x154 kernel/locking/lockdep.c:2175 check_prev_add kernel/locking/lockdep.c:3095 [inline] check_prevs_add kernel/locking/lockdep.c:3214 [inline] validate_chain kernel/locking/lockdep.c:3829 [inline] __lock_acquire+0x1530/0x30a4 kernel/locking/lockdep.c:5053 lock_acquire+0x100/0x1f8 kernel/locking/lockdep.c:5666 __might_fault+0x7c/0xb4 mm/memory.c:5577 _copy_to_user include/linux/uaccess.h:134 [inline] copy_to_user include/linux/uaccess.h:160 [inline] btrfs_ioctl_get_subvol_rootref+0x3a8/0x4bc fs/btrfs/ioctl.c:3203 btrfs_ioctl+0xa08/0xa64 fs/btrfs/ioctl.c:5556 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:870 [inline] __se_sys_ioctl fs/ioctl.c:856 [inline] __arm64_sys_ioctl+0xd0/0x140 fs/ioctl.c:856 __invoke_syscall arch/arm64/kernel/syscall.c:38 [inline] invoke_syscall arch/arm64/kernel/syscall.c:52 [inline] el0_svc_common+0x138/0x220 arch/arm64/kernel/syscall.c:142 do_el0_svc+0x48/0x164 arch/arm64/kernel/syscall.c:206 el0_svc+0x58/0x150 arch/arm64/kernel/entry-common.c:636 el0t_64_sync_handler+0x84/0xf0 arch/arm64/kernel/entry-common.c:654 el0t_64_sync+0x18c/0x190 arch/arm64/kernel/entry.S:581 We do generally the right thing here, copying the references into a temporary buffer, however we are still holding the path when we do copy_to_user from the temporary buffer. Fix this by freeing the path before we copy to user space. Reported-by: syzbot+4ef9e52e464c6ff47d9d@syzkaller.appspotmail.com CC: stable@vger.kernel.org # 4.19+ Reviewed-by: Anand Jain Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ioctl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index b5e9bfe884c4..40760d8e1912 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2903,6 +2903,8 @@ static int btrfs_ioctl_get_subvol_rootref(struct file *file, void __user *argp) } out: + btrfs_free_path(path); + if (!ret || ret == -EOVERFLOW) { rootrefs->num_items = found; /* update min_treeid for next search */ @@ -2914,7 +2916,6 @@ out: } kfree(rootrefs); - btrfs_free_path(path); return ret; } From 0bdb8f7ef87d534b507142185cd03e02f5e4f0e8 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Thu, 10 Nov 2022 11:36:29 +0530 Subject: [PATCH 1085/1477] btrfs: free btrfs_path before copying fspath to userspace commit 8cf96b409d9b3946ece58ced13f92d0f775b0442 upstream. btrfs_ioctl_ino_to_path() frees the search path after the userspace copy from the temp buffer @ipath->fspath. Which potentially can lead to a lock splat warning. Fix this by freeing the path before we copy it to userspace. CC: stable@vger.kernel.org # 4.19+ Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ioctl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 40760d8e1912..a24f937ba49b 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3879,6 +3879,8 @@ static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg) ipath->fspath->val[i] = rel_ptr; } + btrfs_free_path(path); + path = NULL; ret = copy_to_user((void __user *)(unsigned long)ipa->fspath, ipath->fspath, size); if (ret) { From 1581830c0eca75c5e77bc8671ab8ba9cc2e00de7 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Thu, 10 Nov 2022 11:36:31 +0530 Subject: [PATCH 1086/1477] btrfs: free btrfs_path before copying subvol info to userspace commit 013c1c5585ebcfb19c88efe79063d0463b1b6159 upstream. btrfs_ioctl_get_subvol_info() frees the search path after the userspace copy from the temp buffer @subvol_info. This can lead to a lock splat warning. Fix this by freeing the path before we copy it to userspace. CC: stable@vger.kernel.org # 4.19+ Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ioctl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a24f937ba49b..d0c31651ec80 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2811,6 +2811,8 @@ static int btrfs_ioctl_get_subvol_info(struct file *file, void __user *argp) } } + btrfs_free_path(path); + path = NULL; if (copy_to_user(argp, subvol_info, sizeof(*subvol_info))) ret = -EFAULT; From c86c1a7037cd271dbbb315c3add2dcb589a5a2c8 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Tue, 22 Nov 2022 19:50:02 +0800 Subject: [PATCH 1087/1477] btrfs: sysfs: normalize the error handling branch in btrfs_init_sysfs() commit ffdbb44f2f23f963b8f5672e35c3a26088177a62 upstream. Although kset_unregister() can eventually remove all attribute files, explicitly rolling back with the matching function makes the code logic look clearer. CC: stable@vger.kernel.org # 5.4+ Reviewed-by: Qu Wenruo Signed-off-by: Zhen Lei Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/sysfs.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 3bb6b688ece5..ecf190286377 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -1767,8 +1767,11 @@ int __init btrfs_init_sysfs(void) #ifdef CONFIG_BTRFS_DEBUG ret = sysfs_create_group(&btrfs_kset->kobj, &btrfs_debug_feature_attr_group); - if (ret) - goto out2; + if (ret) { + sysfs_unmerge_group(&btrfs_kset->kobj, + &btrfs_static_feature_attr_group); + goto out_remove_group; + } #endif return 0; From 596b7d55d7c65984cb5e8f4b97b899e1df22a2d5 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Mon, 14 Nov 2022 17:20:45 -0500 Subject: [PATCH 1088/1477] drm/amd/dc/dce120: Fix audio register mapping, stop triggering KASAN commit 44035ec2fde1114254ee465f9ba3bb246b0b6283 upstream. There's been a very long running bug that seems to have been neglected for a while, where amdgpu consistently triggers a KASAN error at start: BUG: KASAN: global-out-of-bounds in read_indirect_azalia_reg+0x1d4/0x2a0 [amdgpu] Read of size 4 at addr ffffffffc2274b28 by task modprobe/1889 After digging through amd's rather creative method for accessing registers, I eventually discovered the problem likely has to do with the fact that on my dce120 GPU there are supposedly 7 sets of audio registers. But we only define a register mapping for 6 sets. So, fix this and fix the KASAN warning finally. Signed-off-by: Lyude Paul Cc: stable@vger.kernel.org Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c index 8f362e8c1787..be6d43c9979c 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c @@ -361,7 +361,8 @@ static const struct dce_audio_registers audio_regs[] = { audio_regs(2), audio_regs(3), audio_regs(4), - audio_regs(5) + audio_regs(5), + audio_regs(6), }; #define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\ From feb97cf45e77ec7dfec56a3e1f0a1c8763f286ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 9 Nov 2022 12:14:44 +0100 Subject: [PATCH 1089/1477] drm/amdgpu: always register an MMU notifier for userptr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b39df63b16b64a3af42695acb9bc567aad144776 upstream. Since switching to HMM we always need that because we no longer grab references to the pages. Signed-off-by: Christian König Reviewed-by: Alex Deucher Acked-by: Felix Kuehling CC: stable@vger.kernel.org Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index e8c76bd8c501..6aa9fd9cb83b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -341,11 +341,9 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, if (r) goto release_object; - if (args->flags & AMDGPU_GEM_USERPTR_REGISTER) { - r = amdgpu_mn_register(bo, args->addr); - if (r) - goto release_object; - } + r = amdgpu_mn_register(bo, args->addr); + if (r) + goto release_object; if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) { r = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages); From 86f0082fb9470904b15546726417f28077088fee Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Mon, 14 Nov 2022 11:38:24 +0100 Subject: [PATCH 1090/1477] drm/i915: fix TLB invalidation for Gen12 video and compute engines commit 04aa64375f48a5d430b5550d9271f8428883e550 upstream. In case of Gen12 video and compute engines, TLB_INV registers are masked - to modify one bit, corresponding bit in upper half of the register must be enabled, otherwise nothing happens. CVE: CVE-2022-4139 Suggested-by: Chris Wilson Signed-off-by: Andrzej Hajda Acked-by: Daniel Vetter Fixes: 7938d61591d3 ("drm/i915: Flush TLBs before releasing backing store") Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/gt/intel_gt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index a33887f2464f..5f86d9aacb8a 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -745,6 +745,10 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt) if (!i915_mmio_reg_offset(rb.reg)) continue; + if (INTEL_GEN(i915) == 12 && (engine->class == VIDEO_DECODE_CLASS || + engine->class == VIDEO_ENHANCEMENT_CLASS)) + rb.bit = _MASKED_BIT_ENABLE(rb.bit); + intel_uncore_write_fw(uncore, rb.reg, rb.bit); } From 4801672fb076d546c67504ec9810a3a467e91768 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 23 Nov 2022 09:10:42 +0100 Subject: [PATCH 1091/1477] fuse: lock inode unconditionally in fuse_fallocate() commit 44361e8cf9ddb23f17bdcc40ca944abf32e83e79 upstream. file_modified() must be called with inode lock held. fuse_fallocate() didn't lock the inode in case of just FALLOC_KEEP_SIZE flags value, which resulted in a kernel Warning in notify_change(). Lock the inode unconditionally, like all other fallocate implementations do. Reported-by: Pengfei Xu Reported-and-tested-by: syzbot+462da39f0667b357c4b6@syzkaller.appspotmail.com Fixes: 4a6f278d4827 ("fuse: add file_modified() to fallocate") Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/fuse/file.c | 36 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 253308fcb047..504389568dac 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -3275,10 +3275,9 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, .mode = mode }; int err; - bool lock_inode = !(mode & FALLOC_FL_KEEP_SIZE) || - (mode & FALLOC_FL_PUNCH_HOLE); - - bool block_faults = FUSE_IS_DAX(inode) && lock_inode; + bool block_faults = FUSE_IS_DAX(inode) && + (!(mode & FALLOC_FL_KEEP_SIZE) || + (mode & FALLOC_FL_PUNCH_HOLE)); if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) return -EOPNOTSUPP; @@ -3286,22 +3285,20 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, if (fm->fc->no_fallocate) return -EOPNOTSUPP; - if (lock_inode) { - inode_lock(inode); - if (block_faults) { - down_write(&fi->i_mmap_sem); - err = fuse_dax_break_layouts(inode, 0, 0); - if (err) - goto out; - } + inode_lock(inode); + if (block_faults) { + down_write(&fi->i_mmap_sem); + err = fuse_dax_break_layouts(inode, 0, 0); + if (err) + goto out; + } - if (mode & FALLOC_FL_PUNCH_HOLE) { - loff_t endbyte = offset + length - 1; + if (mode & FALLOC_FL_PUNCH_HOLE) { + loff_t endbyte = offset + length - 1; - err = fuse_writeback_range(inode, offset, endbyte); - if (err) - goto out; - } + err = fuse_writeback_range(inode, offset, endbyte); + if (err) + goto out; } if (!(mode & FALLOC_FL_KEEP_SIZE) && @@ -3351,8 +3348,7 @@ out: if (block_faults) up_write(&fi->i_mmap_sem); - if (lock_inode) - inode_unlock(inode); + inode_unlock(inode); fuse_flush_time_update(inode); From f4245f05389c29c0d556fea359b2fcfd8dce7bdb Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 2 Dec 2022 17:40:07 +0100 Subject: [PATCH 1092/1477] Linux 5.10.157 Link: https://lore.kernel.org/r/20221130180528.466039523@linuxfoundation.org Tested-by: Pavel Machek (CIP) Tested-by: Florian Fainelli Tested-by: Salvatore Bonaccorso Tested-by: Shuah Khan Tested-by: Sudip Mukherjee Tested-by: Rudi Heitbaum Tested-by: Linux Kernel Functional Testing Tested-by: Guenter Roeck Tested-by: Jon Hunter Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 166f87bdc190..bf22df29c4d8 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 10 -SUBLEVEL = 156 +SUBLEVEL = 157 EXTRAVERSION = NAME = Dare mighty things From 37d3df60cb6a10295b062a393b9100bedfd589ca Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 4 Dec 2022 12:09:47 +0000 Subject: [PATCH 1093/1477] ANDROID: CRC ABI fixups in ip.h and ipv6.h In commit 6e9334436d78 ("net: use struct_group to copy ip/ipv6 header addresses"), struct_group() is added to some structures to resolve a build warning. This changes the CRC of a number of networking functions, without changing any actual structure sizes or interactions. To resolve this, use __GENKSYMS__ #ifdef hack to preserve the Android kernel CRC abi. Bug: 161946584 Fixes: 6e9334436d78 ("net: use struct_group to copy ip/ipv6 header addresses") Signed-off-by: Greg Kroah-Hartman Change-Id: Ia70eb3aa41316fbced713c99c85ddaeccec2ffec --- include/uapi/linux/ip.h | 4 ++++ include/uapi/linux/ipv6.h | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/include/uapi/linux/ip.h b/include/uapi/linux/ip.h index d2f143393780..64beb32a2102 100644 --- a/include/uapi/linux/ip.h +++ b/include/uapi/linux/ip.h @@ -100,10 +100,14 @@ struct iphdr { __u8 ttl; __u8 protocol; __sum16 check; +#ifndef __GENKSYMS__ __struct_group(/* no tag */, addrs, /* no attrs */, +#endif __be32 saddr; __be32 daddr; +#ifndef __GENKSYMS__ ); +#endif /*The options start here. */ }; diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 766ab5c8ee65..bfe9542953af 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -130,10 +130,14 @@ struct ipv6hdr { __u8 nexthdr; __u8 hop_limit; +#ifndef __GENKSYMS__ __struct_group(/* no tag */, addrs, /* no attrs */, +#endif struct in6_addr saddr; struct in6_addr daddr; +#ifndef __GENKSYMS__ ); +#endif }; From f3226d86f8ce0e172366c5ae7d3eb7a81905fd63 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 6 Dec 2022 19:05:25 +0000 Subject: [PATCH 1094/1477] Revert "xfrm: fix "disable_policy" on ipv4 early demux" This reverts commit 497653f6d2392c8d4abb041f01e1a19781d995d0 which is commit 3a5913183aa1b14148c723bda030e6102ad73008 upstream. It is not needed here as the commit it fixes, e6175a2ed1f1 ("xfrm: fix "disable_policy" flag use when arriving from different devices"), was already reverted from this branch due to ABI issues. Bug: 161946584 Change-Id: I9bcc08425bf73b6fd1dcad8d801ffc728e1021b6 Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_input.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index eccd7897e7aa..f6b3237e88ca 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -361,11 +361,6 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk, iph->tos, dev); if (unlikely(err)) goto drop_error; - } else { - struct in_device *in_dev = __in_dev_get_rcu(dev); - - if (in_dev && IN_DEV_ORCONF(in_dev, NOPOLICY)) - IPCB(skb)->flags |= IPSKB_NOPOLICY; } #ifdef CONFIG_IP_ROUTE_CLASSID From d5b7a34379faf42cf8ef63fb520f05e5d3d218d4 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 6 Jun 2022 19:32:59 +0200 Subject: [PATCH 1095/1477] btrfs: sink iterator parameter to btrfs_ioctl_logical_to_ino [ Upstream commit e3059ec06b9f1a96826cc2bb6ed131aac0942446 ] There's only one function we pass to iterate_inodes_from_logical as iterator, so we can drop the indirection and call it directly, after moving the function to backref.c Signed-off-by: David Sterba Stable-dep-of: 418ffb9e3cf6 ("btrfs: free btrfs_path before copying inodes to userspace") Signed-off-by: Sasha Levin --- fs/btrfs/backref.c | 25 ++++++++++++++++++++++--- fs/btrfs/backref.h | 3 +-- fs/btrfs/ioctl.c | 22 +--------------------- 3 files changed, 24 insertions(+), 26 deletions(-) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 6942707f8b03..7208ba22e734 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -2060,10 +2060,29 @@ out: return ret; } +static int build_ino_list(u64 inum, u64 offset, u64 root, void *ctx) +{ + struct btrfs_data_container *inodes = ctx; + const size_t c = 3 * sizeof(u64); + + if (inodes->bytes_left >= c) { + inodes->bytes_left -= c; + inodes->val[inodes->elem_cnt] = inum; + inodes->val[inodes->elem_cnt + 1] = offset; + inodes->val[inodes->elem_cnt + 2] = root; + inodes->elem_cnt += 3; + } else { + inodes->bytes_missing += c - inodes->bytes_left; + inodes->bytes_left = 0; + inodes->elem_missed += 3; + } + + return 0; +} + int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, struct btrfs_path *path, - iterate_extent_inodes_t *iterate, void *ctx, - bool ignore_offset) + void *ctx, bool ignore_offset) { int ret; u64 extent_item_pos; @@ -2081,7 +2100,7 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, extent_item_pos = logical - found_key.objectid; ret = iterate_extent_inodes(fs_info, found_key.objectid, extent_item_pos, search_commit_root, - iterate, ctx, ignore_offset); + build_ino_list, ctx, ignore_offset); return ret; } diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index 17abde7f794c..6ed18b807b64 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h @@ -35,8 +35,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, bool ignore_offset); int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, - struct btrfs_path *path, - iterate_extent_inodes_t *iterate, void *ctx, + struct btrfs_path *path, void *ctx, bool ignore_offset); int paths_from_inode(u64 inum, struct inode_fs_paths *ipath); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index d0c31651ec80..58fe58b929d2 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3898,26 +3898,6 @@ out: return ret; } -static int build_ino_list(u64 inum, u64 offset, u64 root, void *ctx) -{ - struct btrfs_data_container *inodes = ctx; - const size_t c = 3 * sizeof(u64); - - if (inodes->bytes_left >= c) { - inodes->bytes_left -= c; - inodes->val[inodes->elem_cnt] = inum; - inodes->val[inodes->elem_cnt + 1] = offset; - inodes->val[inodes->elem_cnt + 2] = root; - inodes->elem_cnt += 3; - } else { - inodes->bytes_missing += c - inodes->bytes_left; - inodes->bytes_left = 0; - inodes->elem_missed += 3; - } - - return 0; -} - static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info, void __user *arg, int version) { @@ -3967,7 +3947,7 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info, } ret = iterate_inodes_from_logical(loi->logical, fs_info, path, - build_ino_list, inodes, ignore_offset); + inodes, ignore_offset); if (ret == -EINVAL) ret = -ENOENT; if (ret < 0) From 7b020665d4824a3ed7c1296ee1ea40d913bce3a9 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Thu, 10 Nov 2022 11:36:28 +0530 Subject: [PATCH 1096/1477] btrfs: free btrfs_path before copying inodes to userspace [ Upstream commit 418ffb9e3cf6c4e2574d3a732b724916684bd133 ] btrfs_ioctl_logical_to_ino() frees the search path after the userspace copy from the temp buffer @inodes. Which potentially can lead to a lock splat. Fix this by freeing the path before we copy @inodes to userspace. CC: stable@vger.kernel.org # 4.19+ Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/ioctl.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 58fe58b929d2..a17076a05c4d 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3933,21 +3933,20 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info, size = min_t(u32, loi->size, SZ_16M); } + inodes = init_data_container(size); + if (IS_ERR(inodes)) { + ret = PTR_ERR(inodes); + goto out_loi; + } + path = btrfs_alloc_path(); if (!path) { ret = -ENOMEM; goto out; } - - inodes = init_data_container(size); - if (IS_ERR(inodes)) { - ret = PTR_ERR(inodes); - inodes = NULL; - goto out; - } - ret = iterate_inodes_from_logical(loi->logical, fs_info, path, inodes, ignore_offset); + btrfs_free_path(path); if (ret == -EINVAL) ret = -ENOENT; if (ret < 0) @@ -3959,7 +3958,6 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info, ret = -EFAULT; out: - btrfs_free_path(path); kvfree(inodes); out_loi: kfree(loi); From 6050872f9f315f1fd09a0cb25a7abf482036357c Mon Sep 17 00:00:00 2001 From: Frieder Schrempf Date: Tue, 15 Nov 2022 19:10:00 +0100 Subject: [PATCH 1097/1477] spi: spi-imx: Fix spi_bus_clk if requested clock is higher than input clock [ Upstream commit db2d2dc9a0b58c6faefb6b002fdbed4f0362d1a4 ] In case the requested bus clock is higher than the input clock, the correct dividers (pre = 0, post = 0) are returned from mx51_ecspi_clkdiv(), but *fres is left uninitialized and therefore contains an arbitrary value. This causes trouble for the recently introduced PIO polling feature as the value in spi_imx->spi_bus_clk is used there to calculate for which transfers to enable PIO polling. Fix this by setting *fres even if no clock dividers are in use. This issue was observed on Kontron BL i.MX8MM with an SPI peripheral clock set to 50 MHz by default and a requested SPI bus clock of 80 MHz for the SPI NOR flash. With the fix applied the debug message from mx51_ecspi_clkdiv() now prints the following: spi_imx 30820000.spi: mx51_ecspi_clkdiv: fin: 50000000, fspi: 50000000, post: 0, pre: 0 Fixes: 6fd8b8503a0d ("spi: spi-imx: Fix out-of-order CS/SCLK operation at low speeds") Fixes: 07e759387788 ("spi: spi-imx: add PIO polling support") Cc: Marc Kleine-Budde Cc: David Jander Cc: Fabio Estevam Cc: Mark Brown Cc: Marek Vasut Cc: stable@vger.kernel.org Signed-off-by: Frieder Schrempf Tested-by: Fabio Estevam Acked-by: Marek Vasut Link: https://lore.kernel.org/r/20221115181002.2068270-1-frieder@fris.de Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-imx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index 0e3bc0b0a526..74b3b6ca15ef 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -434,8 +434,7 @@ static unsigned int mx51_ecspi_clkdiv(struct spi_imx_data *spi_imx, unsigned int pre, post; unsigned int fin = spi_imx->spi_clk; - if (unlikely(fspi > fin)) - return 0; + fspi = min(fspi, fin); post = fls(fin) - fls(fspi); if (fin > fspi << post) From 1faf21bdd111c0c6e7b4dc6b9fc353870a140a9b Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Thu, 13 Jan 2022 17:16:18 +0200 Subject: [PATCH 1098/1477] btrfs: move QUOTA_ENABLED check to rescan_should_stop from btrfs_qgroup_rescan_worker [ Upstream commit db5df254120004471e1c957957ab2f1e612dcbd6 ] Instead of having 2 places that short circuit the qgroup leaf scan have everything in the qgroup_rescan_leaf function. In addition to that, also ensure that the inconsistent qgroup flag is set when rescan_should_stop returns true. This both retains the old behavior when -EINTR was set in the body of the loop and at the same time also extends this behavior when scanning is interrupted due to remount or unmount operations. Signed-off-by: Nikolay Borisov Signed-off-by: David Sterba Stable-dep-of: f7e942b5bb35 ("btrfs: qgroup: fix sleep from invalid context bug in btrfs_qgroup_inherit()") Signed-off-by: Sasha Levin --- fs/btrfs/qgroup.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 36da77534076..81bbb7532eb9 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -3290,7 +3290,8 @@ out: static bool rescan_should_stop(struct btrfs_fs_info *fs_info) { return btrfs_fs_closing(fs_info) || - test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state); + test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state) || + !test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); } static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) @@ -3320,11 +3321,9 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) err = PTR_ERR(trans); break; } - if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) { - err = -EINTR; - } else { - err = qgroup_rescan_leaf(trans, path); - } + + err = qgroup_rescan_leaf(trans, path); + if (err > 0) btrfs_commit_transaction(trans); else @@ -3338,7 +3337,7 @@ out: if (err > 0 && fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) { fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; - } else if (err < 0) { + } else if (err < 0 || stopped) { fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; } mutex_unlock(&fs_info->qgroup_rescan_lock); From 44b204730bf32131f064ebc90145092590c7fe95 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Mon, 14 Nov 2022 17:17:53 -0500 Subject: [PATCH 1099/1477] drm/display/dp_mst: Fix drm_dp_mst_add_affected_dsc_crtcs() return code [ Upstream commit 2f3a1273862cb82cca227630cc7f04ce0c94b6bb ] Looks like that we're accidentally dropping a pretty important return code here. For some reason, we just return -EINVAL if we fail to get the MST topology state. This is wrong: error codes are important and should never be squashed without being handled, which here seems to have the potential to cause a deadlock. Signed-off-by: Lyude Paul Reviewed-by: Wayne Lin Fixes: 8ec046716ca8 ("drm/dp_mst: Add helper to trigger modeset on affected DSC MST CRTCs") Cc: # v5.6+ Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_dp_mst_topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 4272cd3622f8..0feeac52e4eb 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -5238,7 +5238,7 @@ int drm_dp_mst_add_affected_dsc_crtcs(struct drm_atomic_state *state, struct drm mst_state = drm_atomic_get_mst_topology_state(state, mgr); if (IS_ERR(mst_state)) - return -EINVAL; + return PTR_ERR(mst_state); list_for_each_entry(pos, &mst_state->vcpis, next) { From 00570fafc2bc40aa63894ab054342233439c1d12 Mon Sep 17 00:00:00 2001 From: Claudio Suarez Date: Sun, 17 Oct 2021 13:34:58 +0200 Subject: [PATCH 1100/1477] drm/amdgpu: update drm_display_info correctly when the edid is read [ Upstream commit 20543be93ca45968f344261c1a997177e51bd7e1 ] drm_display_info is updated by drm_get_edid() or drm_connector_update_edid_property(). In the amdgpu driver it is almost always updated when the edid is read in amdgpu_connector_get_edid(), but not always. Change amdgpu_connector_get_edid() and amdgpu_connector_free_edid() to keep drm_display_info updated. Reviewed-by: Harry Wentland Signed-off-by: Claudio Suarez Signed-off-by: Alex Deucher Stable-dep-of: 602ad43c3cd8 ("drm/amdgpu: Partially revert "drm/amdgpu: update drm_display_info correctly when the edid is read"") Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c | 5 ++++- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 +-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index 98d3661336a4..b352c4eb5bbd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -315,8 +315,10 @@ static void amdgpu_connector_get_edid(struct drm_connector *connector) if (!amdgpu_connector->edid) { /* some laptops provide a hardcoded edid in rom for LCDs */ if (((connector->connector_type == DRM_MODE_CONNECTOR_LVDS) || - (connector->connector_type == DRM_MODE_CONNECTOR_eDP))) + (connector->connector_type == DRM_MODE_CONNECTOR_eDP))) { amdgpu_connector->edid = amdgpu_connector_get_hardcoded_edid(adev); + drm_connector_update_edid_property(connector, amdgpu_connector->edid); + } } } @@ -326,6 +328,7 @@ static void amdgpu_connector_free_edid(struct drm_connector *connector) kfree(amdgpu_connector->edid); amdgpu_connector->edid = NULL; + drm_connector_update_edid_property(connector, NULL); } static int amdgpu_connector_ddc_get_modes(struct drm_connector *connector) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 55ecc67592eb..167a1ee518a8 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2348,13 +2348,12 @@ void amdgpu_dm_update_connector_after_detect( aconnector->edid = (struct edid *)sink->dc_edid.raw_edid; - drm_connector_update_edid_property(connector, - aconnector->edid); if (aconnector->dc_link->aux_mode) drm_dp_cec_set_edid(&aconnector->dm_dp_aux.aux, aconnector->edid); } + drm_connector_update_edid_property(connector, aconnector->edid); amdgpu_dm_update_freesync_caps(connector, aconnector->edid); update_connector_ext_caps(aconnector); } else { From d3f5be82466948405574c6248e5d6f748b7088db Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 21 Nov 2022 12:34:14 -0500 Subject: [PATCH 1101/1477] drm/amdgpu: Partially revert "drm/amdgpu: update drm_display_info correctly when the edid is read" [ Upstream commit 602ad43c3cd8f15cbb25ce9bb494129edb2024ed ] This partially reverts 20543be93ca45968f344261c1a997177e51bd7e1. Calling drm_connector_update_edid_property() in amdgpu_connector_free_edid() causes a noticeable pause in the system every 10 seconds on polled outputs so revert this part of the change. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/2257 Cc: Claudio Suarez Acked-by: Luben Tuikov Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index b352c4eb5bbd..aabfe5705bb8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -328,7 +328,6 @@ static void amdgpu_connector_free_edid(struct drm_connector *connector) kfree(amdgpu_connector->edid); amdgpu_connector->edid = NULL; - drm_connector_update_edid_property(connector, NULL); } static int amdgpu_connector_ddc_get_modes(struct drm_connector *connector) From 01d7c41eac9129fba80d8aed0060caab4a7dbe09 Mon Sep 17 00:00:00 2001 From: ChenXiaoSong Date: Wed, 16 Nov 2022 22:23:54 +0800 Subject: [PATCH 1102/1477] btrfs: qgroup: fix sleep from invalid context bug in btrfs_qgroup_inherit() [ Upstream commit f7e942b5bb35d8e3af54053d19a6bf04143a3955 ] Syzkaller reported BUG as follows: BUG: sleeping function called from invalid context at include/linux/sched/mm.h:274 Call Trace: dump_stack_lvl+0xcd/0x134 __might_resched.cold+0x222/0x26b kmem_cache_alloc+0x2e7/0x3c0 update_qgroup_limit_item+0xe1/0x390 btrfs_qgroup_inherit+0x147b/0x1ee0 create_subvol+0x4eb/0x1710 btrfs_mksubvol+0xfe5/0x13f0 __btrfs_ioctl_snap_create+0x2b0/0x430 btrfs_ioctl_snap_create_v2+0x25a/0x520 btrfs_ioctl+0x2a1c/0x5ce0 __x64_sys_ioctl+0x193/0x200 do_syscall_64+0x35/0x80 Fix this by calling qgroup_dirty() on @dstqgroup, and update limit item in btrfs_run_qgroups() later outside of the spinlock context. CC: stable@vger.kernel.org # 4.9+ Reviewed-by: Qu Wenruo Signed-off-by: ChenXiaoSong Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/qgroup.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 81bbb7532eb9..74cbbb5d8897 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -2913,14 +2913,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid, dstgroup->rsv_rfer = inherit->lim.rsv_rfer; dstgroup->rsv_excl = inherit->lim.rsv_excl; - ret = update_qgroup_limit_item(trans, dstgroup); - if (ret) { - fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; - btrfs_info(fs_info, - "unable to update quota limit for %llu", - dstgroup->qgroupid); - goto unlock; - } + qgroup_dirty(fs_info, dstgroup); } if (srcid) { From b1756af172fb80a3edc143772d49e166ec691b6c Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 7 Nov 2022 15:19:46 +0000 Subject: [PATCH 1103/1477] iio: health: afe4403: Fix oob read in afe4403_read_raw [ Upstream commit 58143c1ed5882c138a3cd2251a336fc8755f23d9 ] KASAN report out-of-bounds read as follows: BUG: KASAN: global-out-of-bounds in afe4403_read_raw+0x42e/0x4c0 Read of size 4 at addr ffffffffc02ac638 by task cat/279 Call Trace: afe4403_read_raw iio_read_channel_info dev_attr_show The buggy address belongs to the variable: afe4403_channel_leds+0x18/0xffffffffffffe9e0 This issue can be reproduced by singe command: $ cat /sys/bus/spi/devices/spi0.0/iio\:device0/in_intensity6_raw The array size of afe4403_channel_leds is less than channels, so access with chan->address cause OOB read in afe4403_read_raw. Fix it by moving access before use it. Fixes: b36e8257641a ("iio: health/afe440x: Use regmap fields") Signed-off-by: Wei Yongjun Acked-by: Andrew Davis Link: https://lore.kernel.org/r/20221107151946.89260-1-weiyongjun@huaweicloud.com Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/health/afe4403.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/iio/health/afe4403.c b/drivers/iio/health/afe4403.c index 38734e4ce360..82d01ac36128 100644 --- a/drivers/iio/health/afe4403.c +++ b/drivers/iio/health/afe4403.c @@ -245,14 +245,14 @@ static int afe4403_read_raw(struct iio_dev *indio_dev, int *val, int *val2, long mask) { struct afe4403_data *afe = iio_priv(indio_dev); - unsigned int reg = afe4403_channel_values[chan->address]; - unsigned int field = afe4403_channel_leds[chan->address]; + unsigned int reg, field; int ret; switch (chan->type) { case IIO_INTENSITY: switch (mask) { case IIO_CHAN_INFO_RAW: + reg = afe4403_channel_values[chan->address]; ret = afe4403_read(afe, reg, val); if (ret) return ret; @@ -262,6 +262,7 @@ static int afe4403_read_raw(struct iio_dev *indio_dev, case IIO_CURRENT: switch (mask) { case IIO_CHAN_INFO_RAW: + field = afe4403_channel_leds[chan->address]; ret = regmap_field_read(afe->fields[field], val); if (ret) return ret; From 5eb114f55b37dbc0487aa9c1913b81bb7837f1c4 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 7 Nov 2022 15:20:10 +0000 Subject: [PATCH 1104/1477] iio: health: afe4404: Fix oob read in afe4404_[read|write]_raw [ Upstream commit fc92d9e3de0b2d30a3ccc08048a5fad533e4672b ] KASAN report out-of-bounds read as follows: BUG: KASAN: global-out-of-bounds in afe4404_read_raw+0x2ce/0x380 Read of size 4 at addr ffffffffc00e4658 by task cat/278 Call Trace: afe4404_read_raw iio_read_channel_info dev_attr_show The buggy address belongs to the variable: afe4404_channel_leds+0x18/0xffffffffffffe9c0 This issue can be reproduce by singe command: $ cat /sys/bus/i2c/devices/0-0058/iio\:device0/in_intensity6_raw The array size of afe4404_channel_leds and afe4404_channel_offdacs are less than channels, so access with chan->address cause OOB read in afe4404_[read|write]_raw. Fix it by moving access before use them. Fixes: b36e8257641a ("iio: health/afe440x: Use regmap fields") Signed-off-by: Wei Yongjun Acked-by: Andrew Davis Link: https://lore.kernel.org/r/20221107152010.95937-1-weiyongjun@huaweicloud.com Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/health/afe4404.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/iio/health/afe4404.c b/drivers/iio/health/afe4404.c index 61fe4932d81d..0eaa34da59a8 100644 --- a/drivers/iio/health/afe4404.c +++ b/drivers/iio/health/afe4404.c @@ -250,20 +250,20 @@ static int afe4404_read_raw(struct iio_dev *indio_dev, int *val, int *val2, long mask) { struct afe4404_data *afe = iio_priv(indio_dev); - unsigned int value_reg = afe4404_channel_values[chan->address]; - unsigned int led_field = afe4404_channel_leds[chan->address]; - unsigned int offdac_field = afe4404_channel_offdacs[chan->address]; + unsigned int value_reg, led_field, offdac_field; int ret; switch (chan->type) { case IIO_INTENSITY: switch (mask) { case IIO_CHAN_INFO_RAW: + value_reg = afe4404_channel_values[chan->address]; ret = regmap_read(afe->regmap, value_reg, val); if (ret) return ret; return IIO_VAL_INT; case IIO_CHAN_INFO_OFFSET: + offdac_field = afe4404_channel_offdacs[chan->address]; ret = regmap_field_read(afe->fields[offdac_field], val); if (ret) return ret; @@ -273,6 +273,7 @@ static int afe4404_read_raw(struct iio_dev *indio_dev, case IIO_CURRENT: switch (mask) { case IIO_CHAN_INFO_RAW: + led_field = afe4404_channel_leds[chan->address]; ret = regmap_field_read(afe->fields[led_field], val); if (ret) return ret; @@ -295,19 +296,20 @@ static int afe4404_write_raw(struct iio_dev *indio_dev, int val, int val2, long mask) { struct afe4404_data *afe = iio_priv(indio_dev); - unsigned int led_field = afe4404_channel_leds[chan->address]; - unsigned int offdac_field = afe4404_channel_offdacs[chan->address]; + unsigned int led_field, offdac_field; switch (chan->type) { case IIO_INTENSITY: switch (mask) { case IIO_CHAN_INFO_OFFSET: + offdac_field = afe4404_channel_offdacs[chan->address]; return regmap_field_write(afe->fields[offdac_field], val); } break; case IIO_CURRENT: switch (mask) { case IIO_CHAN_INFO_RAW: + led_field = afe4404_channel_leds[chan->address]; return regmap_field_write(afe->fields[led_field], val); } break; From d48f6a5784054ffae7d9ef3646419222fc64ab39 Mon Sep 17 00:00:00 2001 From: Paul Gazzillo Date: Thu, 10 Nov 2022 16:47:29 -0500 Subject: [PATCH 1105/1477] iio: light: rpr0521: add missing Kconfig dependencies [ Upstream commit 6ac12303572ef9ace5603c2c07f5f1b00a33f580 ] Fix an implicit declaration of function error for rpr0521 under some configs When CONFIG_RPR0521 is enabled without CONFIG_IIO_TRIGGERED_BUFFER, the build results in "implicit declaration of function" errors, e.g., drivers/iio/light/rpr0521.c:434:3: error: implicit declaration of function 'iio_trigger_poll_chained' [-Werror=implicit-function-declaration] 434 | iio_trigger_poll_chained(data->drdy_trigger0); | ^~~~~~~~~~~~~~~~~~~~~~~~ This fix adds select dependencies to RPR0521's configuration declaration. Fixes: e12ffd241c00 ("iio: light: rpr0521 triggered buffer") Signed-off-by: Paul Gazzillo Link: https://bugzilla.kernel.org/show_bug.cgi?id=216678 Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221110214729.ls5ixav5kxpeftk7@device Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/light/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iio/light/Kconfig b/drivers/iio/light/Kconfig index 917f9becf9c7..dd52eff9ba2a 100644 --- a/drivers/iio/light/Kconfig +++ b/drivers/iio/light/Kconfig @@ -294,6 +294,8 @@ config RPR0521 tristate "ROHM RPR0521 ALS and proximity sensor driver" depends on I2C select REGMAP_I2C + select IIO_BUFFER + select IIO_TRIGGERED_BUFFER help Say Y here if you want to build support for ROHM's RPR0521 ambient light and proximity sensor device. From 353c3aaaf3c45227695a016c9aaafe6b43315b56 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Mon, 14 Nov 2022 17:57:33 +0800 Subject: [PATCH 1106/1477] bpf, perf: Use subprog name when reporting subprog ksymbol [ Upstream commit 47df8a2f78bc34ff170d147d05b121f84e252b85 ] Since commit bfea9a8574f3 ("bpf: Add name to struct bpf_ksym"), when reporting subprog ksymbol to perf, prog name instead of subprog name is used. The backtrace of bpf program with subprogs will be incorrect as shown below: ffffffffc02deace bpf_prog_e44a3057dcb151f8_overwrite+0x66 ffffffffc02de9f7 bpf_prog_e44a3057dcb151f8_overwrite+0x9f ffffffffa71d8d4e trace_call_bpf+0xce ffffffffa71c2938 perf_call_bpf_enter.isra.0+0x48 overwrite is the entry program and it invokes the overwrite_htab subprog through bpf_loop, but in above backtrace, overwrite program just jumps inside itself. Fixing it by using subprog name when reporting subprog ksymbol. After the fix, the output of perf script will be correct as shown below: ffffffffc031aad2 bpf_prog_37c0bec7d7c764a4_overwrite_htab+0x66 ffffffffc031a9e7 bpf_prog_c7eb827ef4f23e71_overwrite+0x9f ffffffffa3dd8d4e trace_call_bpf+0xce ffffffffa3dc2938 perf_call_bpf_enter.isra.0+0x48 Fixes: bfea9a8574f3 ("bpf: Add name to struct bpf_ksym") Signed-off-by: Hou Tao Signed-off-by: Daniel Borkmann Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20221114095733.158588-1-houtao@huaweicloud.com Signed-off-by: Sasha Levin --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 0e01216f4e5a..e9b354d521a3 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8740,7 +8740,7 @@ static void perf_event_bpf_emit_ksymbols(struct bpf_prog *prog, PERF_RECORD_KSYMBOL_TYPE_BPF, (u64)(unsigned long)subprog->bpf_func, subprog->jited_len, unregister, - prog->aux->ksym.name); + subprog->aux->ksym.name); } } } From 592724b14da7d17d09d9cf0358c3574b4903c10e Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Tue, 27 Sep 2022 13:22:11 +0530 Subject: [PATCH 1107/1477] scripts/faddr2line: Fix regression in name resolution on ppc64le [ Upstream commit 2d77de1581bb5b470486edaf17a7d70151131afd ] Commit 1d1a0e7c5100 ("scripts/faddr2line: Fix overlapping text section failures") can cause faddr2line to fail on ppc64le on some distributions, while it works fine on other distributions. The failure can be attributed to differences in the readelf output. $ ./scripts/faddr2line vmlinux find_busiest_group+0x00 no match for find_busiest_group+0x00 On ppc64le, readelf adds the localentry tag before the symbol name on some distributions, and adds the localentry tag after the symbol name on other distributions. This problem has been discussed previously: https://lore.kernel.org/bpf/20191211160133.GB4580@calabresa/ This problem can be overcome by filtering out the localentry tags in the readelf output. Similar fixes are already present in the kernel by way of the following commits: 1fd6cee127e2 ("libbpf: Fix VERSIONED_SYM_COUNT number parsing") aa915931ac3e ("libbpf: Fix readelf output parsing for Fedora") [jpoimboe: rework commit log] Fixes: 1d1a0e7c5100 ("scripts/faddr2line: Fix overlapping text section failures") Signed-off-by: Srikar Dronamraju Acked-by: Naveen N. Rao Reviewed-by: Thadeu Lima de Souza Cascardo Link: https://lore.kernel.org/r/20220927075211.897152-1-srikar@linux.vnet.ibm.com Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra Signed-off-by: Sasha Levin --- scripts/faddr2line | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/scripts/faddr2line b/scripts/faddr2line index 57099687e5e1..9e730b805e87 100755 --- a/scripts/faddr2line +++ b/scripts/faddr2line @@ -73,7 +73,8 @@ command -v ${ADDR2LINE} >/dev/null 2>&1 || die "${ADDR2LINE} isn't installed" find_dir_prefix() { local objfile=$1 - local start_kernel_addr=$(${READELF} --symbols --wide $objfile | ${AWK} '$8 == "start_kernel" {printf "0x%s", $2}') + local start_kernel_addr=$(${READELF} --symbols --wide $objfile | sed 's/\[.*\]//' | + ${AWK} '$8 == "start_kernel" {printf "0x%s", $2}') [[ -z $start_kernel_addr ]] && return local file_line=$(${ADDR2LINE} -e $objfile $start_kernel_addr) @@ -177,7 +178,7 @@ __faddr2line() { found=2 break fi - done < <(${READELF} --symbols --wide $objfile | ${AWK} -v sec=$sym_sec '$7 == sec' | sort --key=2) + done < <(${READELF} --symbols --wide $objfile | sed 's/\[.*\]//' | ${AWK} -v sec=$sym_sec '$7 == sec' | sort --key=2) if [[ $found = 0 ]]; then warn "can't find symbol: sym_name: $sym_name sym_sec: $sym_sec sym_addr: $sym_addr sym_elf_size: $sym_elf_size" @@ -258,7 +259,7 @@ __faddr2line() { DONE=1 - done < <(${READELF} --symbols --wide $objfile | ${AWK} -v fn=$sym_name '$4 == "FUNC" && $8 == fn') + done < <(${READELF} --symbols --wide $objfile | sed 's/\[.*\]//' | ${AWK} -v fn=$sym_name '$4 == "FUNC" && $8 == fn') } [[ $# -lt 2 ]] && usage From cc140c729c6832eef8eed07cf09b0aa634857603 Mon Sep 17 00:00:00 2001 From: Michael Grzeschik Date: Mon, 14 Nov 2022 19:59:22 +0100 Subject: [PATCH 1108/1477] ARM: at91: rm9200: fix usb device clock id [ Upstream commit 57976762428675f259339385d3324d28ee53ec02 ] Referring to the datasheet the index 2 is the MCKUDP. When enabled, it "Enables the automatic disable of the Master Clock of the USB Device Port when a suspend condition occurs". We fix the index to the real UDP id which "Enables the 48 MHz clock of the USB Device Port". Cc: nicolas.ferre@microchip.com Cc: ludovic.desroches@microchip.com Cc: alexandre.belloni@bootlin.com Cc: mturquette@baylibre.com Cc: sboyd@kernel.org Cc: claudiu.beznea@microchip.com Cc: linux-clk@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: kernel@pengutronix.de Fixes: 02ff48e4d7f7 ("clk: at91: add at91rm9200 pmc driver") Fixes: 0e0e528d8260 ("ARM: dts: at91: rm9200: switch to new clock bindings") Reviewed-by: Claudiu Beznea Signed-off-by: Michael Grzeschik Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20221114185923.1023249-2-m.grzeschik@pengutronix.de Signed-off-by: Sasha Levin --- arch/arm/boot/dts/at91rm9200.dtsi | 2 +- drivers/clk/at91/at91rm9200.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/at91rm9200.dtsi b/arch/arm/boot/dts/at91rm9200.dtsi index d1181ead18e5..21344fbc89e5 100644 --- a/arch/arm/boot/dts/at91rm9200.dtsi +++ b/arch/arm/boot/dts/at91rm9200.dtsi @@ -660,7 +660,7 @@ compatible = "atmel,at91rm9200-udc"; reg = <0xfffb0000 0x4000>; interrupts = <11 IRQ_TYPE_LEVEL_HIGH 2>; - clocks = <&pmc PMC_TYPE_PERIPHERAL 11>, <&pmc PMC_TYPE_SYSTEM 2>; + clocks = <&pmc PMC_TYPE_PERIPHERAL 11>, <&pmc PMC_TYPE_SYSTEM 1>; clock-names = "pclk", "hclk"; status = "disabled"; }; diff --git a/drivers/clk/at91/at91rm9200.c b/drivers/clk/at91/at91rm9200.c index 2c3d8e6ca63c..7cc20c0f8865 100644 --- a/drivers/clk/at91/at91rm9200.c +++ b/drivers/clk/at91/at91rm9200.c @@ -38,7 +38,7 @@ static const struct clk_pll_characteristics rm9200_pll_characteristics = { }; static const struct sck at91rm9200_systemck[] = { - { .n = "udpck", .p = "usbck", .id = 2 }, + { .n = "udpck", .p = "usbck", .id = 1 }, { .n = "uhpck", .p = "usbck", .id = 4 }, { .n = "pck0", .p = "prog0", .id = 8 }, { .n = "pck1", .p = "prog1", .id = 9 }, From 8a549ab6724520aa3c07f47e0eba820293551490 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Wed, 16 Nov 2022 15:23:49 +0800 Subject: [PATCH 1109/1477] libbpf: Handle size overflow for ringbuf mmap [ Upstream commit 927cbb478adf917e0a142b94baa37f06279cc466 ] The maximum size of ringbuf is 2GB on x86-64 host, so 2 * max_entries will overflow u32 when mapping producer page and data pages. Only casting max_entries to size_t is not enough, because for 32-bits application on 64-bits kernel the size of read-only mmap region also could overflow size_t. So fixing it by casting the size of read-only mmap region into a __u64 and checking whether or not there will be overflow during mmap. Fixes: bf99c936f947 ("libbpf: Add BPF ring buffer support") Signed-off-by: Hou Tao Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20221116072351.1168938-3-houtao@huaweicloud.com Signed-off-by: Sasha Levin --- tools/lib/bpf/ringbuf.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c index 86c31c787fb9..5e242be45206 100644 --- a/tools/lib/bpf/ringbuf.c +++ b/tools/lib/bpf/ringbuf.c @@ -59,6 +59,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, __u32 len = sizeof(info); struct epoll_event *e; struct ring *r; + __u64 mmap_sz; void *tmp; int err; @@ -97,8 +98,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, r->mask = info.max_entries - 1; /* Map writable consumer page */ - tmp = mmap(NULL, rb->page_size, PROT_READ | PROT_WRITE, MAP_SHARED, - map_fd, 0); + tmp = mmap(NULL, rb->page_size, PROT_READ | PROT_WRITE, MAP_SHARED, map_fd, 0); if (tmp == MAP_FAILED) { err = -errno; pr_warn("ringbuf: failed to mmap consumer page for map fd=%d: %d\n", @@ -111,8 +111,12 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, * data size to allow simple reading of samples that wrap around the * end of a ring buffer. See kernel implementation for details. * */ - tmp = mmap(NULL, rb->page_size + 2 * info.max_entries, PROT_READ, - MAP_SHARED, map_fd, rb->page_size); + mmap_sz = rb->page_size + 2 * (__u64)info.max_entries; + if (mmap_sz != (__u64)(size_t)mmap_sz) { + pr_warn("ringbuf: ring buffer size (%u) is too big\n", info.max_entries); + return -E2BIG; + } + tmp = mmap(NULL, (size_t)mmap_sz, PROT_READ, MAP_SHARED, map_fd, rb->page_size); if (tmp == MAP_FAILED) { err = -errno; ringbuf_unmap_ring(rb, r); From 984fcd3ec1aafe3cb5a50b3ca2f9b7c97fdc4e1e Mon Sep 17 00:00:00 2001 From: Derek Nguyen Date: Thu, 10 Nov 2022 13:21:08 -0600 Subject: [PATCH 1110/1477] hwmon: (ltc2947) fix temperature scaling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 07e06193ead86d4812f431b4d87bbd4161222e3f ] The LTC2947 datasheet (Rev. B) calls out in the section "Register Description: Non-Accumulated Result Registers" (pg. 30) that "To calculate temperature, multiply the TEMP register value by 0.204°C and add 5.5°C". Fix to add 5.5C and not 0.55C. Fixes: 9f90fd652bed ("hwmon: Add support for ltc2947") Signed-off-by: Derek Nguyen Signed-off-by: Brandon Maier Link: https://lore.kernel.org/r/20221110192108.20624-1-brandon.maier@collins.com Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/ltc2947-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/ltc2947-core.c b/drivers/hwmon/ltc2947-core.c index 5423466de697..e918490f3ff7 100644 --- a/drivers/hwmon/ltc2947-core.c +++ b/drivers/hwmon/ltc2947-core.c @@ -396,7 +396,7 @@ static int ltc2947_read_temp(struct device *dev, const u32 attr, long *val, return ret; /* in milidegrees celcius, temp is given by: */ - *val = (__val * 204) + 550; + *val = (__val * 204) + 5500; return 0; } From dddfc03f044b1acb10477dad42d51522368b1fbc Mon Sep 17 00:00:00 2001 From: Ninad Malwade Date: Tue, 8 Nov 2022 12:45:08 +0800 Subject: [PATCH 1111/1477] hwmon: (ina3221) Fix shunt sum critical calculation [ Upstream commit b8d27d2ce8dfc207e4b67b929a86f2be76fbc6ef ] The shunt sum critical limit register value should be left shifted by one bit as its LSB-0 is a reserved bit. Fixes: 2057bdfb7184 ("hwmon: (ina3221) Add summation feature support") Signed-off-by: Ninad Malwade Reviewed-by: Thierry Reding Link: https://lore.kernel.org/r/20221108044508.23463-1-nmalwade@nvidia.com Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/ina3221.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/ina3221.c b/drivers/hwmon/ina3221.c index ad11cbddc3a7..d3c98115042b 100644 --- a/drivers/hwmon/ina3221.c +++ b/drivers/hwmon/ina3221.c @@ -230,7 +230,7 @@ static int ina3221_read_value(struct ina3221_data *ina, unsigned int reg, * Shunt Voltage Sum register has 14-bit value with 1-bit shift * Other Shunt Voltage registers have 12 bits with 3-bit shift */ - if (reg == INA3221_SHUNT_SUM) + if (reg == INA3221_SHUNT_SUM || reg == INA3221_CRIT_SUM) *val = sign_extend32(regval >> 1, 14); else *val = sign_extend32(regval >> 3, 12); @@ -465,7 +465,7 @@ static int ina3221_write_curr(struct device *dev, u32 attr, * SHUNT_SUM: (1 / 40uV) << 1 = 1 / 20uV * SHUNT[1-3]: (1 / 40uV) << 3 = 1 / 5uV */ - if (reg == INA3221_SHUNT_SUM) + if (reg == INA3221_SHUNT_SUM || reg == INA3221_CRIT_SUM) regval = DIV_ROUND_CLOSEST(voltage_uv, 20) & 0xfffe; else regval = DIV_ROUND_CLOSEST(voltage_uv, 5) & 0xfff8; From 7649bba2633dd1dafb0dc8cd37a46dc86ecee4a2 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Sat, 12 Nov 2022 20:56:06 +0800 Subject: [PATCH 1112/1477] hwmon: (i5500_temp) fix missing pci_disable_device() [ Upstream commit 3b7f98f237528c496ea0b689bace0e35eec3e060 ] pci_disable_device() need be called while module exiting, switch to use pcim_enable(), pci_disable_device() will be called in pcim_release(). Fixes: ada072816be1 ("hwmon: (i5500_temp) New driver for the Intel 5500/5520/X58 chipsets") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221112125606.3751430-1-yangyingliang@huawei.com Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/i5500_temp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/i5500_temp.c b/drivers/hwmon/i5500_temp.c index 360f5aee1394..d4be03f43fb4 100644 --- a/drivers/hwmon/i5500_temp.c +++ b/drivers/hwmon/i5500_temp.c @@ -108,7 +108,7 @@ static int i5500_temp_probe(struct pci_dev *pdev, u32 tstimer; s8 tsfsc; - err = pci_enable_device(pdev); + err = pcim_enable_device(pdev); if (err) { dev_err(&pdev->dev, "Failed to enable device\n"); return err; From 90907cd4d11351ff76c9a447bcb5db0e264c47cd Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Thu, 17 Nov 2022 11:44:23 +0800 Subject: [PATCH 1113/1477] hwmon: (ibmpex) Fix possible UAF when ibmpex_register_bmc() fails [ Upstream commit e2a87785aab0dac190ac89be6a9ba955e2c634f2 ] Smatch report warning as follows: drivers/hwmon/ibmpex.c:509 ibmpex_register_bmc() warn: '&data->list' not removed from list If ibmpex_find_sensors() fails in ibmpex_register_bmc(), data will be freed, but data->list will not be removed from driver_data.bmc_data, then list traversal may cause UAF. Fix by removeing it from driver_data.bmc_data before free(). Fixes: 57c7c3a0fdea ("hwmon: IBM power meter driver") Signed-off-by: Gaosheng Cui Link: https://lore.kernel.org/r/20221117034423.2935739-1-cuigaosheng1@huawei.com Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/ibmpex.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hwmon/ibmpex.c b/drivers/hwmon/ibmpex.c index b2ab83c9fd9a..fe90f0536d76 100644 --- a/drivers/hwmon/ibmpex.c +++ b/drivers/hwmon/ibmpex.c @@ -502,6 +502,7 @@ static void ibmpex_register_bmc(int iface, struct device *dev) return; out_register: + list_del(&data->list); hwmon_device_unregister(data->hwmon_dev); out_user: ipmi_destroy_user(data->user); From be006212bd530b26f4d9f58de140e3973a225044 Mon Sep 17 00:00:00 2001 From: Xu Kuohai Date: Mon, 14 Nov 2022 08:47:19 -0500 Subject: [PATCH 1114/1477] bpf: Do not copy spin lock field from user in bpf_selem_alloc [ Upstream commit 836e49e103dfeeff670c934b7d563cbd982fce87 ] bpf_selem_alloc function is used by inode_storage, sk_storage and task_storage maps to set map value, for these map types, there may be a spin lock in the map value, so if we use memcpy to copy the whole map value from user, the spin lock field may be initialized incorrectly. Since the spin lock field is zeroed by kzalloc, call copy_map_value instead of memcpy to skip copying the spin lock field to fix it. Fixes: 6ac99e8f23d4 ("bpf: Introduce bpf sk local storage") Signed-off-by: Xu Kuohai Link: https://lore.kernel.org/r/20221114134720.1057939-2-xukuohai@huawei.com Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- kernel/bpf/bpf_local_storage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 5d3a7af9ba9b..8aaaaef99f09 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -70,7 +70,7 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, selem = kzalloc(smap->elem_size, GFP_ATOMIC | __GFP_NOWARN); if (selem) { if (value) - memcpy(SDATA(selem)->data, value, smap->map.value_size); + copy_map_value(&smap->map, SDATA(selem)->data, value); return selem; } From 8f7047f418102ff45494f9fe388315ec0fa505e5 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Mon, 21 Nov 2022 10:32:09 +0800 Subject: [PATCH 1115/1477] of: property: decrement node refcount in of_fwnode_get_reference_args() [ Upstream commit 60d865bd5a9b15a3961eb1c08bd4155682a3c81e ] In of_fwnode_get_reference_args(), the refcount of of_args.np has been incremented in the case of successful return from of_parse_phandle_with_args() or of_parse_phandle_with_fixed_args(). Decrement the refcount if of_args is not returned to the caller of of_fwnode_get_reference_args(). Fixes: 3e3119d3088f ("device property: Introduce fwnode_property_get_reference_args") Signed-off-by: Yang Yingliang Reviewed-by: Sakari Ailus Reviewed-by: Frank Rowand Link: https://lore.kernel.org/r/20221121023209.3909759-1-yangyingliang@huawei.com Signed-off-by: Rob Herring Signed-off-by: Sasha Levin --- drivers/of/property.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/of/property.c b/drivers/of/property.c index 1d7d24e7094b..8f998351bf4f 100644 --- a/drivers/of/property.c +++ b/drivers/of/property.c @@ -956,8 +956,10 @@ of_fwnode_get_reference_args(const struct fwnode_handle *fwnode, nargs, index, &of_args); if (ret < 0) return ret; - if (!args) + if (!args) { + of_node_put(of_args.np); return 0; + } args->nargs = of_args.args_count; args->fwnode = of_fwnode_handle(of_args.np); From f166c62cad798c53300b4b327e44300c73ec492d Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Mon, 14 Nov 2022 10:57:58 +0800 Subject: [PATCH 1116/1477] ixgbevf: Fix resource leak in ixgbevf_init_module() [ Upstream commit 8cfa238a48f34038464b99d0b4825238c2687181 ] ixgbevf_init_module() won't destroy the workqueue created by create_singlethread_workqueue() when pci_register_driver() failed. Add destroy_workqueue() in fail path to prevent the resource leak. Similar to the handling of u132_hcd_init in commit f276e002793c ("usb: u132-hcd: fix resource leak") Fixes: 40a13e2493c9 ("ixgbevf: Use a private workqueue to avoid certain possible hangs") Signed-off-by: Shang XiaoJing Reviewed-by: Saeed Mahameed Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 2d6ac61d7a3e..4510a84514fa 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -4878,6 +4878,8 @@ static struct pci_driver ixgbevf_driver = { **/ static int __init ixgbevf_init_module(void) { + int err; + pr_info("%s\n", ixgbevf_driver_string); pr_info("%s\n", ixgbevf_copyright); ixgbevf_wq = create_singlethread_workqueue(ixgbevf_driver_name); @@ -4886,7 +4888,13 @@ static int __init ixgbevf_init_module(void) return -ENOMEM; } - return pci_register_driver(&ixgbevf_driver); + err = pci_register_driver(&ixgbevf_driver); + if (err) { + destroy_workqueue(ixgbevf_wq); + return err; + } + + return 0; } module_init(ixgbevf_init_module); From dd425cec79baeaad2f93b61647bb41b246856914 Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Wed, 16 Nov 2022 09:27:25 +0800 Subject: [PATCH 1117/1477] i40e: Fix error handling in i40e_init_module() [ Upstream commit 479dd06149425b9e00477f52200872587af76a48 ] i40e_init_module() won't free the debugfs directory created by i40e_dbg_init() when pci_register_driver() failed. Add fail path to call i40e_dbg_exit() to remove the debugfs entries to prevent the bug. i40e: Intel(R) Ethernet Connection XL710 Network Driver i40e: Copyright (c) 2013 - 2019 Intel Corporation. debugfs: Directory 'i40e' with parent '/' already present! Fixes: 41c445ff0f48 ("i40e: main driver core") Signed-off-by: Shang XiaoJing Reviewed-by: Leon Romanovsky Tested-by: Gurucharan G (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/i40e/i40e_main.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index ea6a984c6d12..d7ddf9239e51 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -15972,6 +15972,8 @@ static struct pci_driver i40e_driver = { **/ static int __init i40e_init_module(void) { + int err; + pr_info("%s: %s\n", i40e_driver_name, i40e_driver_string); pr_info("%s: %s\n", i40e_driver_name, i40e_copyright); @@ -15989,7 +15991,14 @@ static int __init i40e_init_module(void) } i40e_dbg_init(); - return pci_register_driver(&i40e_driver); + err = pci_register_driver(&i40e_driver); + if (err) { + destroy_workqueue(i40e_wq); + i40e_dbg_exit(); + return err; + } + + return 0; } module_init(i40e_init_module); From fd4960ea53625105fb8616096b931a977a63a8e5 Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Mon, 14 Nov 2022 08:26:39 +0000 Subject: [PATCH 1118/1477] fm10k: Fix error handling in fm10k_init_module() [ Upstream commit 771a794c0a3c3e7f0d86cc34be4f9537e8c0a20c ] A problem about modprobe fm10k failed is triggered with the following log given: Intel(R) Ethernet Switch Host Interface Driver Copyright(c) 2013 - 2019 Intel Corporation. debugfs: Directory 'fm10k' with parent '/' already present! The reason is that fm10k_init_module() returns fm10k_register_pci_driver() directly without checking its return value, if fm10k_register_pci_driver() failed, it returns without removing debugfs and destroy workqueue, resulting the debugfs of fm10k can never be created later and leaks the workqueue. fm10k_init_module() alloc_workqueue() fm10k_dbg_init() # create debugfs fm10k_register_pci_driver() pci_register_driver() driver_register() bus_add_driver() priv = kzalloc(...) # OOM happened # return without remove debugfs and destroy workqueue Fix by remove debugfs and destroy workqueue when fm10k_register_pci_driver() returns error. Fixes: 7461fd913afe ("fm10k: Add support for debugfs") Fixes: b382bb1b3e2d ("fm10k: use separate workqueue for fm10k driver") Signed-off-by: Yuan Can Reviewed-by: Jacob Keller Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/fm10k/fm10k_main.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index 99b8252eb969..a388a0fcbeed 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -32,6 +32,8 @@ struct workqueue_struct *fm10k_workqueue; **/ static int __init fm10k_init_module(void) { + int ret; + pr_info("%s\n", fm10k_driver_string); pr_info("%s\n", fm10k_copyright); @@ -43,7 +45,13 @@ static int __init fm10k_init_module(void) fm10k_dbg_init(); - return fm10k_register_pci_driver(); + ret = fm10k_register_pci_driver(); + if (ret) { + fm10k_dbg_exit(); + destroy_workqueue(fm10k_workqueue); + } + + return ret; } module_init(fm10k_init_module); From d389a4c6987708533000130592ddf33bc48b8b0d Mon Sep 17 00:00:00 2001 From: Minghao Chi Date: Mon, 10 Jan 2022 10:46:56 +0000 Subject: [PATCH 1119/1477] iavf: remove redundant ret variable [ Upstream commit c3fec56e12678c3ad68084048a73818a7968d6b8 ] Return value directly instead of taking this in another redundant variable. Reported-by: Zeal Robot Signed-off-by: Minghao Chi Signed-off-by: CGEL ZTE Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen Stable-dep-of: 227d8d2f7f22 ("iavf: Fix error handling in iavf_init_module()") Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/iavf/iavf_main.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index a9cea7ccdd86..af515c3ccd5c 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -1318,7 +1318,6 @@ static void iavf_fill_rss_lut(struct iavf_adapter *adapter) static int iavf_init_rss(struct iavf_adapter *adapter) { struct iavf_hw *hw = &adapter->hw; - int ret; if (!RSS_PF(adapter)) { /* Enable PCTYPES for RSS, TCP/UDP with IPv4/IPv6 */ @@ -1334,9 +1333,8 @@ static int iavf_init_rss(struct iavf_adapter *adapter) iavf_fill_rss_lut(adapter); netdev_rss_key_fill((void *)adapter->rss_key, adapter->rss_key_size); - ret = iavf_config_rss(adapter); - return ret; + return iavf_config_rss(adapter); } /** @@ -4028,8 +4026,6 @@ static struct pci_driver iavf_driver = { **/ static int __init iavf_init_module(void) { - int ret; - pr_info("iavf: %s\n", iavf_driver_string); pr_info("%s\n", iavf_copyright); @@ -4040,8 +4036,7 @@ static int __init iavf_init_module(void) pr_err("%s: Failed to create workqueue\n", iavf_driver_name); return -ENOMEM; } - ret = pci_register_driver(&iavf_driver); - return ret; + return pci_register_driver(&iavf_driver); } module_init(iavf_init_module); From 971c55f0763b480e63ceb7a22beb19be2509e5ed Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Mon, 14 Nov 2022 08:26:40 +0000 Subject: [PATCH 1120/1477] iavf: Fix error handling in iavf_init_module() [ Upstream commit 227d8d2f7f2278b8468c5531b0cd0f2a905b4486 ] The iavf_init_module() won't destroy workqueue when pci_register_driver() failed. Call destroy_workqueue() when pci_register_driver() failed to prevent the resource leak. Similar to the handling of u132_hcd_init in commit f276e002793c ("usb: u132-hcd: fix resource leak") Fixes: 2803b16c10ea ("i40e/i40evf: Use private workqueue") Signed-off-by: Yuan Can Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/iavf/iavf_main.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index af515c3ccd5c..ae96b552a3bb 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -4026,6 +4026,8 @@ static struct pci_driver iavf_driver = { **/ static int __init iavf_init_module(void) { + int ret; + pr_info("iavf: %s\n", iavf_driver_string); pr_info("%s\n", iavf_copyright); @@ -4036,7 +4038,12 @@ static int __init iavf_init_module(void) pr_err("%s: Failed to create workqueue\n", iavf_driver_name); return -ENOMEM; } - return pci_register_driver(&iavf_driver); + + ret = pci_register_driver(&iavf_driver); + if (ret) + destroy_workqueue(iavf_wq); + + return ret; } module_init(iavf_init_module); From 086f656e447b082c055cab9b059a71cd409e5c60 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 28 Jan 2021 22:07:36 +0100 Subject: [PATCH 1121/1477] e100: switch from 'pci_' to 'dma_' API [ Upstream commit 4140ff1ba06d3fc16afd518736940ab742886317 ] The wrappers in include/linux/pci-dma-compat.h should go away. The patch has been generated with the coccinelle script below and has been hand modified to replace GFP_ with a correct flag. It has been compile tested. When memory is allocated in 'e100_alloc()', GFP_KERNEL can be used because it is only called from the probe function and no lock is acquired. @@ @@ - PCI_DMA_BIDIRECTIONAL + DMA_BIDIRECTIONAL @@ @@ - PCI_DMA_TODEVICE + DMA_TO_DEVICE @@ @@ - PCI_DMA_FROMDEVICE + DMA_FROM_DEVICE @@ @@ - PCI_DMA_NONE + DMA_NONE @@ expression e1, e2, e3; @@ - pci_alloc_consistent(e1, e2, e3) + dma_alloc_coherent(&e1->dev, e2, e3, GFP_) @@ expression e1, e2, e3; @@ - pci_zalloc_consistent(e1, e2, e3) + dma_alloc_coherent(&e1->dev, e2, e3, GFP_) @@ expression e1, e2, e3, e4; @@ - pci_free_consistent(e1, e2, e3, e4) + dma_free_coherent(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_map_single(e1, e2, e3, e4) + dma_map_single(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_unmap_single(e1, e2, e3, e4) + dma_unmap_single(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4, e5; @@ - pci_map_page(e1, e2, e3, e4, e5) + dma_map_page(&e1->dev, e2, e3, e4, e5) @@ expression e1, e2, e3, e4; @@ - pci_unmap_page(e1, e2, e3, e4) + dma_unmap_page(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_map_sg(e1, e2, e3, e4) + dma_map_sg(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_unmap_sg(e1, e2, e3, e4) + dma_unmap_sg(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_dma_sync_single_for_cpu(e1, e2, e3, e4) + dma_sync_single_for_cpu(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_dma_sync_single_for_device(e1, e2, e3, e4) + dma_sync_single_for_device(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_dma_sync_sg_for_cpu(e1, e2, e3, e4) + dma_sync_sg_for_cpu(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_dma_sync_sg_for_device(e1, e2, e3, e4) + dma_sync_sg_for_device(&e1->dev, e2, e3, e4) @@ expression e1, e2; @@ - pci_dma_mapping_error(e1, e2) + dma_mapping_error(&e1->dev, e2) @@ expression e1, e2; @@ - pci_set_dma_mask(e1, e2) + dma_set_mask(&e1->dev, e2) @@ expression e1, e2; @@ - pci_set_consistent_dma_mask(e1, e2) + dma_set_coherent_mask(&e1->dev, e2) Signed-off-by: Christophe JAILLET Tested-by: Aaron Brown Link: https://lore.kernel.org/r/20210128210736.749724-1-christophe.jaillet@wanadoo.fr Signed-off-by: Jakub Kicinski Stable-dep-of: 45605c75c52c ("e100: Fix possible use after free in e100_xmit_prepare") Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/e100.c | 92 ++++++++++++++++--------------- 1 file changed, 49 insertions(+), 43 deletions(-) diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c index 9295a9a1efc7..7ccf890ee735 100644 --- a/drivers/net/ethernet/intel/e100.c +++ b/drivers/net/ethernet/intel/e100.c @@ -1739,10 +1739,10 @@ static int e100_xmit_prepare(struct nic *nic, struct cb *cb, dma_addr_t dma_addr; cb->command = nic->tx_command; - dma_addr = pci_map_single(nic->pdev, - skb->data, skb->len, PCI_DMA_TODEVICE); + dma_addr = dma_map_single(&nic->pdev->dev, skb->data, skb->len, + DMA_TO_DEVICE); /* If we can't map the skb, have the upper layer try later */ - if (pci_dma_mapping_error(nic->pdev, dma_addr)) { + if (dma_mapping_error(&nic->pdev->dev, dma_addr)) { dev_kfree_skb_any(skb); skb = NULL; return -ENOMEM; @@ -1828,10 +1828,10 @@ static int e100_tx_clean(struct nic *nic) dev->stats.tx_packets++; dev->stats.tx_bytes += cb->skb->len; - pci_unmap_single(nic->pdev, - le32_to_cpu(cb->u.tcb.tbd.buf_addr), - le16_to_cpu(cb->u.tcb.tbd.size), - PCI_DMA_TODEVICE); + dma_unmap_single(&nic->pdev->dev, + le32_to_cpu(cb->u.tcb.tbd.buf_addr), + le16_to_cpu(cb->u.tcb.tbd.size), + DMA_TO_DEVICE); dev_kfree_skb_any(cb->skb); cb->skb = NULL; tx_cleaned = 1; @@ -1855,10 +1855,10 @@ static void e100_clean_cbs(struct nic *nic) while (nic->cbs_avail != nic->params.cbs.count) { struct cb *cb = nic->cb_to_clean; if (cb->skb) { - pci_unmap_single(nic->pdev, - le32_to_cpu(cb->u.tcb.tbd.buf_addr), - le16_to_cpu(cb->u.tcb.tbd.size), - PCI_DMA_TODEVICE); + dma_unmap_single(&nic->pdev->dev, + le32_to_cpu(cb->u.tcb.tbd.buf_addr), + le16_to_cpu(cb->u.tcb.tbd.size), + DMA_TO_DEVICE); dev_kfree_skb(cb->skb); } nic->cb_to_clean = nic->cb_to_clean->next; @@ -1925,10 +1925,10 @@ static int e100_rx_alloc_skb(struct nic *nic, struct rx *rx) /* Init, and map the RFD. */ skb_copy_to_linear_data(rx->skb, &nic->blank_rfd, sizeof(struct rfd)); - rx->dma_addr = pci_map_single(nic->pdev, rx->skb->data, - RFD_BUF_LEN, PCI_DMA_BIDIRECTIONAL); + rx->dma_addr = dma_map_single(&nic->pdev->dev, rx->skb->data, + RFD_BUF_LEN, DMA_BIDIRECTIONAL); - if (pci_dma_mapping_error(nic->pdev, rx->dma_addr)) { + if (dma_mapping_error(&nic->pdev->dev, rx->dma_addr)) { dev_kfree_skb_any(rx->skb); rx->skb = NULL; rx->dma_addr = 0; @@ -1941,8 +1941,10 @@ static int e100_rx_alloc_skb(struct nic *nic, struct rx *rx) if (rx->prev->skb) { struct rfd *prev_rfd = (struct rfd *)rx->prev->skb->data; put_unaligned_le32(rx->dma_addr, &prev_rfd->link); - pci_dma_sync_single_for_device(nic->pdev, rx->prev->dma_addr, - sizeof(struct rfd), PCI_DMA_BIDIRECTIONAL); + dma_sync_single_for_device(&nic->pdev->dev, + rx->prev->dma_addr, + sizeof(struct rfd), + DMA_BIDIRECTIONAL); } return 0; @@ -1961,8 +1963,8 @@ static int e100_rx_indicate(struct nic *nic, struct rx *rx, return -EAGAIN; /* Need to sync before taking a peek at cb_complete bit */ - pci_dma_sync_single_for_cpu(nic->pdev, rx->dma_addr, - sizeof(struct rfd), PCI_DMA_BIDIRECTIONAL); + dma_sync_single_for_cpu(&nic->pdev->dev, rx->dma_addr, + sizeof(struct rfd), DMA_BIDIRECTIONAL); rfd_status = le16_to_cpu(rfd->status); netif_printk(nic, rx_status, KERN_DEBUG, nic->netdev, @@ -1981,9 +1983,9 @@ static int e100_rx_indicate(struct nic *nic, struct rx *rx, if (ioread8(&nic->csr->scb.status) & rus_no_res) nic->ru_running = RU_SUSPENDED; - pci_dma_sync_single_for_device(nic->pdev, rx->dma_addr, - sizeof(struct rfd), - PCI_DMA_FROMDEVICE); + dma_sync_single_for_device(&nic->pdev->dev, rx->dma_addr, + sizeof(struct rfd), + DMA_FROM_DEVICE); return -ENODATA; } @@ -1995,8 +1997,8 @@ static int e100_rx_indicate(struct nic *nic, struct rx *rx, actual_size = RFD_BUF_LEN - sizeof(struct rfd); /* Get data */ - pci_unmap_single(nic->pdev, rx->dma_addr, - RFD_BUF_LEN, PCI_DMA_BIDIRECTIONAL); + dma_unmap_single(&nic->pdev->dev, rx->dma_addr, RFD_BUF_LEN, + DMA_BIDIRECTIONAL); /* If this buffer has the el bit, but we think the receiver * is still running, check to see if it really stopped while @@ -2097,22 +2099,25 @@ static void e100_rx_clean(struct nic *nic, unsigned int *work_done, (struct rfd *)new_before_last_rx->skb->data; new_before_last_rfd->size = 0; new_before_last_rfd->command |= cpu_to_le16(cb_el); - pci_dma_sync_single_for_device(nic->pdev, - new_before_last_rx->dma_addr, sizeof(struct rfd), - PCI_DMA_BIDIRECTIONAL); + dma_sync_single_for_device(&nic->pdev->dev, + new_before_last_rx->dma_addr, + sizeof(struct rfd), + DMA_BIDIRECTIONAL); /* Now that we have a new stopping point, we can clear the old * stopping point. We must sync twice to get the proper * ordering on the hardware side of things. */ old_before_last_rfd->command &= ~cpu_to_le16(cb_el); - pci_dma_sync_single_for_device(nic->pdev, - old_before_last_rx->dma_addr, sizeof(struct rfd), - PCI_DMA_BIDIRECTIONAL); + dma_sync_single_for_device(&nic->pdev->dev, + old_before_last_rx->dma_addr, + sizeof(struct rfd), + DMA_BIDIRECTIONAL); old_before_last_rfd->size = cpu_to_le16(VLAN_ETH_FRAME_LEN + ETH_FCS_LEN); - pci_dma_sync_single_for_device(nic->pdev, - old_before_last_rx->dma_addr, sizeof(struct rfd), - PCI_DMA_BIDIRECTIONAL); + dma_sync_single_for_device(&nic->pdev->dev, + old_before_last_rx->dma_addr, + sizeof(struct rfd), + DMA_BIDIRECTIONAL); } if (restart_required) { @@ -2134,8 +2139,9 @@ static void e100_rx_clean_list(struct nic *nic) if (nic->rxs) { for (rx = nic->rxs, i = 0; i < count; rx++, i++) { if (rx->skb) { - pci_unmap_single(nic->pdev, rx->dma_addr, - RFD_BUF_LEN, PCI_DMA_BIDIRECTIONAL); + dma_unmap_single(&nic->pdev->dev, + rx->dma_addr, RFD_BUF_LEN, + DMA_BIDIRECTIONAL); dev_kfree_skb(rx->skb); } } @@ -2177,8 +2183,8 @@ static int e100_rx_alloc_list(struct nic *nic) before_last = (struct rfd *)rx->skb->data; before_last->command |= cpu_to_le16(cb_el); before_last->size = 0; - pci_dma_sync_single_for_device(nic->pdev, rx->dma_addr, - sizeof(struct rfd), PCI_DMA_BIDIRECTIONAL); + dma_sync_single_for_device(&nic->pdev->dev, rx->dma_addr, + sizeof(struct rfd), DMA_BIDIRECTIONAL); nic->rx_to_use = nic->rx_to_clean = nic->rxs; nic->ru_running = RU_SUSPENDED; @@ -2377,8 +2383,8 @@ static int e100_loopback_test(struct nic *nic, enum loopback loopback_mode) msleep(10); - pci_dma_sync_single_for_cpu(nic->pdev, nic->rx_to_clean->dma_addr, - RFD_BUF_LEN, PCI_DMA_BIDIRECTIONAL); + dma_sync_single_for_cpu(&nic->pdev->dev, nic->rx_to_clean->dma_addr, + RFD_BUF_LEN, DMA_BIDIRECTIONAL); if (memcmp(nic->rx_to_clean->skb->data + sizeof(struct rfd), skb->data, ETH_DATA_LEN)) @@ -2759,16 +2765,16 @@ static int e100_do_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) static int e100_alloc(struct nic *nic) { - nic->mem = pci_alloc_consistent(nic->pdev, sizeof(struct mem), - &nic->dma_addr); + nic->mem = dma_alloc_coherent(&nic->pdev->dev, sizeof(struct mem), + &nic->dma_addr, GFP_KERNEL); return nic->mem ? 0 : -ENOMEM; } static void e100_free(struct nic *nic) { if (nic->mem) { - pci_free_consistent(nic->pdev, sizeof(struct mem), - nic->mem, nic->dma_addr); + dma_free_coherent(&nic->pdev->dev, sizeof(struct mem), + nic->mem, nic->dma_addr); nic->mem = NULL; } } @@ -2861,7 +2867,7 @@ static int e100_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_out_disable_pdev; } - if ((err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))) { + if ((err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)))) { netif_err(nic, probe, nic->netdev, "No usable DMA configuration, aborting\n"); goto err_out_free_res; } From b775f37d943966f6f77dca402f5a9dedce502c25 Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Wed, 16 Nov 2022 01:24:07 +0800 Subject: [PATCH 1122/1477] e100: Fix possible use after free in e100_xmit_prepare [ Upstream commit 45605c75c52c7ae7bfe902214343aabcfe5ba0ff ] In e100_xmit_prepare(), if we can't map the skb, then return -ENOMEM, so e100_xmit_frame() will return NETDEV_TX_BUSY and the upper layer will resend the skb. But the skb is already freed, which will cause UAF bug when the upper layer resends the skb. Remove the harmful free. Fixes: 5e5d49422dfb ("e100: Release skb when DMA mapping is failed in e100_xmit_prepare") Signed-off-by: Wang Hai Reviewed-by: Alexander Duyck Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/e100.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c index 7ccf890ee735..001850d578e8 100644 --- a/drivers/net/ethernet/intel/e100.c +++ b/drivers/net/ethernet/intel/e100.c @@ -1742,11 +1742,8 @@ static int e100_xmit_prepare(struct nic *nic, struct cb *cb, dma_addr = dma_map_single(&nic->pdev->dev, skb->data, skb->len, DMA_TO_DEVICE); /* If we can't map the skb, have the upper layer try later */ - if (dma_mapping_error(&nic->pdev->dev, dma_addr)) { - dev_kfree_skb_any(skb); - skb = NULL; + if (dma_mapping_error(&nic->pdev->dev, dma_addr)) return -ENOMEM; - } /* * Use the last 4 bytes of the SKB payload packet as the CRC, used for From 2cb84ff34938cc3cdfe2d90e70cf8a8ced66fd1e Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Mon, 21 Nov 2022 19:22:04 +0800 Subject: [PATCH 1123/1477] net/mlx5: Fix uninitialized variable bug in outlen_write() [ Upstream commit 3f5769a074c13d8f08455e40586600419e02a880 ] If sscanf() return 0, outlen is uninitialized and used in kzalloc(), this is unexpected. We should return -EINVAL if the string is invalid. Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Signed-off-by: YueHaibing Reviewed-by: Leon Romanovsky Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index c838d8698eab..39c17e903915 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -1422,8 +1422,8 @@ static ssize_t outlen_write(struct file *filp, const char __user *buf, return -EFAULT; err = sscanf(outlen_str, "%d", &outlen); - if (err < 0) - return err; + if (err != 1) + return -EINVAL; ptr = kzalloc(outlen, GFP_KERNEL); if (!ptr) From 0d2f9d95d9fbe993f3c4bafb87d59897b0325aff Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Mon, 14 Nov 2022 20:04:29 +0200 Subject: [PATCH 1124/1477] net/mlx5e: Fix use-after-free when reverting termination table [ Upstream commit 52c795af04441d76f565c4634f893e5b553df2ae ] When having multiple dests with termination tables and second one or afterwards fails the driver reverts usage of term tables but doesn't reset the assignment in attr->dests[num_vport_dests].termtbl which case a use-after-free when releasing the rule. Fix by resetting the assignment of termtbl to null. Fixes: 10caabdaad5a ("net/mlx5e: Use termination table for VLAN push actions") Signed-off-by: Roi Dayan Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- .../net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c index 6c865cb7f445..132ea9997676 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c @@ -308,6 +308,8 @@ revert_changes: for (curr_dest = 0; curr_dest < num_vport_dests; curr_dest++) { struct mlx5_termtbl_handle *tt = attr->dests[curr_dest].termtbl; + attr->dests[curr_dest].termtbl = NULL; + /* search for the destination associated with the * current term table */ From e74746bf0453f3af258d03e448744664c839376b Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Fri, 11 Nov 2022 20:08:41 +0800 Subject: [PATCH 1125/1477] can: sja1000_isa: sja1000_isa_probe(): add missing free_sja1000dev() [ Upstream commit 92dfd9310a71d28cefe6a2d5174d43fab240e631 ] Add the missing free_sja1000dev() before return from sja1000_isa_probe() in the register_sja1000dev() error handling case. In addition, remove blanks before goto labels. Fixes: 2a6ba39ad6a2 ("can: sja1000: legacy SJA1000 ISA bus driver") Signed-off-by: Zhang Changzhong Link: https://lore.kernel.org/all/1668168521-5540-1-git-send-email-zhangchangzhong@huawei.com Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- drivers/net/can/sja1000/sja1000_isa.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/sja1000/sja1000_isa.c b/drivers/net/can/sja1000/sja1000_isa.c index d513fac50718..db3e767d5320 100644 --- a/drivers/net/can/sja1000/sja1000_isa.c +++ b/drivers/net/can/sja1000/sja1000_isa.c @@ -202,22 +202,24 @@ static int sja1000_isa_probe(struct platform_device *pdev) if (err) { dev_err(&pdev->dev, "registering %s failed (err=%d)\n", DRV_NAME, err); - goto exit_unmap; + goto exit_free; } dev_info(&pdev->dev, "%s device registered (reg_base=0x%p, irq=%d)\n", DRV_NAME, priv->reg_base, dev->irq); return 0; - exit_unmap: +exit_free: + free_sja1000dev(dev); +exit_unmap: if (mem[idx]) iounmap(base); - exit_release: +exit_release: if (mem[idx]) release_mem_region(mem[idx], iosize); else release_region(port[idx], iosize); - exit: +exit: return err; } From d753f554f25d110d0c8269a21d9cce02acdc08f9 Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Fri, 11 Nov 2022 20:09:16 +0800 Subject: [PATCH 1126/1477] can: cc770: cc770_isa_probe(): add missing free_cc770dev() [ Upstream commit 62ec89e74099a3d6995988ed9f2f996b368417ec ] Add the missing free_cc770dev() before return from cc770_isa_probe() in the register_cc770dev() error handling case. In addition, remove blanks before goto labels. Fixes: 7e02e5433e00 ("can: cc770: legacy CC770 ISA bus driver") Signed-off-by: Zhang Changzhong Link: https://lore.kernel.org/all/1668168557-6024-1-git-send-email-zhangchangzhong@huawei.com Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- drivers/net/can/cc770/cc770_isa.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/cc770/cc770_isa.c b/drivers/net/can/cc770/cc770_isa.c index 194c86e0f340..8f6dccd5a587 100644 --- a/drivers/net/can/cc770/cc770_isa.c +++ b/drivers/net/can/cc770/cc770_isa.c @@ -264,22 +264,24 @@ static int cc770_isa_probe(struct platform_device *pdev) if (err) { dev_err(&pdev->dev, "couldn't register device (err=%d)\n", err); - goto exit_unmap; + goto exit_free; } dev_info(&pdev->dev, "device registered (reg_base=0x%p, irq=%d)\n", priv->reg_base, dev->irq); return 0; - exit_unmap: +exit_free: + free_cc770dev(dev); +exit_unmap: if (mem[idx]) iounmap(base); - exit_release: +exit_release: if (mem[idx]) release_mem_region(mem[idx], iosize); else release_region(port[idx], iosize); - exit: +exit: return err; } From fa59d49a49b06429df16f5353477d35328f02cc8 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Wed, 23 Nov 2022 18:06:42 +0800 Subject: [PATCH 1127/1477] qlcnic: fix sleep-in-atomic-context bugs caused by msleep [ Upstream commit 8dbd6e4ce1b9c527921643d9e34f188a10d4e893 ] The watchdog timer is used to monitor whether the process of transmitting data is timeout. If we use qlcnic driver, the dev_watchdog() that is the timer handler of watchdog timer will call qlcnic_tx_timeout() to process the timeout. But the qlcnic_tx_timeout() calls msleep(), as a result, the sleep-in-atomic-context bugs will happen. The processes are shown below: (atomic context) dev_watchdog qlcnic_tx_timeout qlcnic_83xx_idc_request_reset qlcnic_83xx_lock_driver msleep --------------------------- (atomic context) dev_watchdog qlcnic_tx_timeout qlcnic_83xx_idc_request_reset qlcnic_83xx_lock_driver qlcnic_83xx_recover_driver_lock msleep Fix by changing msleep() to mdelay(), the mdelay() is busy-waiting and the bugs could be mitigated. Fixes: 629263acaea3 ("qlcnic: 83xx CNA inter driver communication mechanism") Signed-off-by: Duoming Zhou Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c index bd0607680329..2fd5c6fdb500 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c @@ -2991,7 +2991,7 @@ static void qlcnic_83xx_recover_driver_lock(struct qlcnic_adapter *adapter) QLCWRX(adapter->ahw, QLC_83XX_RECOVER_DRV_LOCK, val); dev_info(&adapter->pdev->dev, "%s: lock recovery initiated\n", __func__); - msleep(QLC_83XX_DRV_LOCK_RECOVERY_DELAY); + mdelay(QLC_83XX_DRV_LOCK_RECOVERY_DELAY); val = QLCRDX(adapter->ahw, QLC_83XX_RECOVER_DRV_LOCK); id = ((val >> 2) & 0xF); if (id == adapter->portnum) { @@ -3027,7 +3027,7 @@ int qlcnic_83xx_lock_driver(struct qlcnic_adapter *adapter) if (status) break; - msleep(QLC_83XX_DRV_LOCK_WAIT_DELAY); + mdelay(QLC_83XX_DRV_LOCK_WAIT_DELAY); i++; if (i == 1) From 6922948c2ec1cc45b11acb8ec2eeb3a8353d572b Mon Sep 17 00:00:00 2001 From: Izabela Bakollari Date: Wed, 23 Nov 2022 11:10:08 +0100 Subject: [PATCH 1128/1477] aquantia: Do not purge addresses when setting the number of rings [ Upstream commit 2a83891130512dafb321418a8e7c9c09268d8c59 ] IPV6 addresses are purged when setting the number of rx/tx rings using ethtool -G. The function aq_set_ringparam calls dev_close, which removes the addresses. As a solution, call an internal function (aq_ndev_close). Fixes: c1af5427954b ("net: aquantia: Ethtool based ring size configuration") Signed-off-by: Izabela Bakollari Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c | 5 +++-- drivers/net/ethernet/aquantia/atlantic/aq_main.c | 4 ++-- drivers/net/ethernet/aquantia/atlantic/aq_main.h | 2 ++ 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c index de2a9348bc3f..1d512e6a89f5 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c @@ -13,6 +13,7 @@ #include "aq_ptp.h" #include "aq_filters.h" #include "aq_macsec.h" +#include "aq_main.h" #include @@ -841,7 +842,7 @@ static int aq_set_ringparam(struct net_device *ndev, if (netif_running(ndev)) { ndev_running = true; - dev_close(ndev); + aq_ndev_close(ndev); } cfg->rxds = max(ring->rx_pending, hw_caps->rxds_min); @@ -857,7 +858,7 @@ static int aq_set_ringparam(struct net_device *ndev, goto err_exit; if (ndev_running) - err = dev_open(ndev, NULL); + err = aq_ndev_open(ndev); err_exit: return err; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.c b/drivers/net/ethernet/aquantia/atlantic/aq_main.c index ff245f75fa3d..1401fc4632b5 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_main.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.c @@ -53,7 +53,7 @@ struct net_device *aq_ndev_alloc(void) return ndev; } -static int aq_ndev_open(struct net_device *ndev) +int aq_ndev_open(struct net_device *ndev) { struct aq_nic_s *aq_nic = netdev_priv(ndev); int err = 0; @@ -83,7 +83,7 @@ err_exit: return err; } -static int aq_ndev_close(struct net_device *ndev) +int aq_ndev_close(struct net_device *ndev) { struct aq_nic_s *aq_nic = netdev_priv(ndev); int err = 0; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.h b/drivers/net/ethernet/aquantia/atlantic/aq_main.h index a5a624b9ce73..2a562ab7a5af 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_main.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.h @@ -14,5 +14,7 @@ void aq_ndev_schedule_work(struct work_struct *work); struct net_device *aq_ndev_alloc(void); +int aq_ndev_open(struct net_device *ndev); +int aq_ndev_close(struct net_device *ndev); #endif /* AQ_MAIN_H */ From 9e6b79a3cd17620d467311b30d56f2648f6880aa Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 25 Nov 2022 12:36:57 +0100 Subject: [PATCH 1129/1477] wifi: cfg80211: fix buffer overflow in elem comparison MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9f16b5c82a025cd4c864737409234ddc44fb166a ] For vendor elements, the code here assumes that 5 octets are present without checking. Since the element itself is already checked to fit, we only need to check the length. Reported-and-tested-by: Sönke Huster Fixes: 0b8fb8235be8 ("cfg80211: Parsing of Multiple BSSID information in scanning") Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/wireless/scan.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 15119c49c093..8102ee7b2047 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -330,7 +330,8 @@ static size_t cfg80211_gen_new_ie(const u8 *ie, size_t ielen, * determine if they are the same ie. */ if (tmp_old[0] == WLAN_EID_VENDOR_SPECIFIC) { - if (!memcmp(tmp_old + 2, tmp + 2, 5)) { + if (tmp_old[1] >= 5 && tmp[1] >= 5 && + !memcmp(tmp_old + 2, tmp + 2, 5)) { /* same vendor ie, copy from * subelement */ From e2ed90fd3ae023a8c15a59c145d8db41ed6bbbd5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 25 Nov 2022 12:36:58 +0100 Subject: [PATCH 1130/1477] wifi: cfg80211: don't allow multi-BSSID in S1G MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit acd3c92acc7aaec50a94d0a7faf7ccd74e952493 ] In S1G beacon frames there shouldn't be multi-BSSID elements since that's not supported, remove that to avoid a potential integer underflow and/or misparsing the frames due to the different length of the fixed part of the frame. While at it, initialize non_tx_data so we don't send garbage values to the user (even if it doesn't seem to matter now.) Reported-and-tested-by: Sönke Huster Fixes: 9eaffe5078ca ("cfg80211: convert S1G beacon to scan results") Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/wireless/scan.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 8102ee7b2047..d09dabae5627 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -2467,10 +2467,15 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy, const struct cfg80211_bss_ies *ies1, *ies2; size_t ielen = len - offsetof(struct ieee80211_mgmt, u.probe_resp.variable); - struct cfg80211_non_tx_bss non_tx_data; + struct cfg80211_non_tx_bss non_tx_data = {}; res = cfg80211_inform_single_bss_frame_data(wiphy, data, mgmt, len, gfp); + + /* don't do any further MBSSID handling for S1G */ + if (ieee80211_is_s1g_beacon(mgmt->frame_control)) + return res; + if (!res || !wiphy->support_mbssid || !cfg80211_find_ie(WLAN_EID_MULTIPLE_BSSID, ie, ielen)) return res; From 0184ede0ec61b9cd075babfaa45081b1bf322234 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 8 Nov 2022 16:19:26 +0100 Subject: [PATCH 1131/1477] wifi: mac8021: fix possible oob access in ieee80211_get_rate_duration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3e8f7abcc3473bc9603323803aeaed4ffcc3a2ab ] Fix possible out-of-bound access in ieee80211_get_rate_duration routine as reported by the following UBSAN report: UBSAN: array-index-out-of-bounds in net/mac80211/airtime.c:455:47 index 15 is out of range for type 'u16 [12]' CPU: 2 PID: 217 Comm: kworker/u32:10 Not tainted 6.1.0-060100rc3-generic Hardware name: Acer Aspire TC-281/Aspire TC-281, BIOS R01-A2 07/18/2017 Workqueue: mt76 mt76u_tx_status_data [mt76_usb] Call Trace: show_stack+0x4e/0x61 dump_stack_lvl+0x4a/0x6f dump_stack+0x10/0x18 ubsan_epilogue+0x9/0x43 __ubsan_handle_out_of_bounds.cold+0x42/0x47 ieee80211_get_rate_duration.constprop.0+0x22f/0x2a0 [mac80211] ? ieee80211_tx_status_ext+0x32e/0x640 [mac80211] ieee80211_calc_rx_airtime+0xda/0x120 [mac80211] ieee80211_calc_tx_airtime+0xb4/0x100 [mac80211] mt76x02_send_tx_status+0x266/0x480 [mt76x02_lib] mt76x02_tx_status_data+0x52/0x80 [mt76x02_lib] mt76u_tx_status_data+0x67/0xd0 [mt76_usb] process_one_work+0x225/0x400 worker_thread+0x50/0x3e0 ? process_one_work+0x400/0x400 kthread+0xe9/0x110 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x22/0x30 Fixes: db3e1c40cf2f ("mac80211: Import airtime calculation code from mt76") Signed-off-by: Lorenzo Bianconi Acked-by: Toke Høiland-Jørgensen Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/airtime.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mac80211/airtime.c b/net/mac80211/airtime.c index 26d2f8ba7029..758ef63669e7 100644 --- a/net/mac80211/airtime.c +++ b/net/mac80211/airtime.c @@ -457,6 +457,9 @@ static u32 ieee80211_get_rate_duration(struct ieee80211_hw *hw, (status->encoding == RX_ENC_HE && streams > 8))) return 0; + if (idx >= MCS_GROUP_RATES) + return 0; + duration = airtime_mcs_groups[group].duration[idx]; duration <<= airtime_mcs_groups[group].shift; *overhead = 36 + (streams << 2); From fe6bc99c27c21348f548966118867ed26a9a372c Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Wed, 23 Nov 2022 21:28:08 +0800 Subject: [PATCH 1132/1477] net: phy: fix null-ptr-deref while probe() failed [ Upstream commit 369eb2c9f1f72adbe91e0ea8efb130f0a2ba11a6 ] I got a null-ptr-deref report as following when doing fault injection test: BUG: kernel NULL pointer dereference, address: 0000000000000058 Oops: 0000 [#1] PREEMPT SMP KASAN PTI CPU: 1 PID: 253 Comm: 507-spi-dm9051 Tainted: G B N 6.1.0-rc3+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 RIP: 0010:klist_put+0x2d/0xd0 Call Trace: klist_remove+0xf1/0x1c0 device_release_driver_internal+0x23e/0x2d0 bus_remove_device+0x1bd/0x240 device_del+0x357/0x770 phy_device_remove+0x11/0x30 mdiobus_unregister+0xa5/0x140 release_nodes+0x6a/0xa0 devres_release_all+0xf8/0x150 device_unbind_cleanup+0x19/0xd0 //probe path: phy_device_register() device_add() phy_connect phy_attach_direct() //set device driver probe() //it's failed, driver is not bound device_bind_driver() // probe failed, it's not called //remove path: phy_device_remove() device_del() device_release_driver_internal() __device_release_driver() //dev->drv is not NULL klist_remove() <- knode_driver is not added yet, cause null-ptr-deref In phy_attach_direct(), after setting the 'dev->driver', probe() fails, device_bind_driver() is not called, so the knode_driver->n_klist is not set, then it causes null-ptr-deref in __device_release_driver() while deleting device. Fix this by setting dev->driver to NULL in the error path in phy_attach_direct(). Fixes: e13934563db0 ("[PATCH] PHY Layer fixup") Signed-off-by: Yang Yingliang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/phy/phy_device.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index d2f6d8107595..3ef5aa6b72a7 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1423,6 +1423,7 @@ error: error_module_put: module_put(d->driver->owner); + d->driver = NULL; error_put_device: put_device(d); if (ndev_owner != bus->owner) From b080d4668f3f00298077ab32ec50b1128239d421 Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Thu, 24 Nov 2022 07:09:17 +0000 Subject: [PATCH 1133/1477] net: net_netdev: Fix error handling in ntb_netdev_init_module() [ Upstream commit b8f79dccd38edf7db4911c353d9cd792ab13a327 ] The ntb_netdev_init_module() returns the ntb_transport_register_client() directly without checking its return value, if ntb_transport_register_client() failed, the NTB client device is not unregistered. Fix by unregister NTB client device when ntb_transport_register_client() failed. Fixes: 548c237c0a99 ("net: Add support for NTB virtual ethernet device") Signed-off-by: Yuan Can Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ntb_netdev.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c index a5bab614ff84..1b7d588ff3c5 100644 --- a/drivers/net/ntb_netdev.c +++ b/drivers/net/ntb_netdev.c @@ -484,7 +484,14 @@ static int __init ntb_netdev_init_module(void) rc = ntb_transport_register_client_dev(KBUILD_MODNAME); if (rc) return rc; - return ntb_transport_register_client(&ntb_netdev_client); + + rc = ntb_transport_register_client(&ntb_netdev_client); + if (rc) { + ntb_transport_unregister_client_dev(KBUILD_MODNAME); + return rc; + } + + return 0; } module_init(ntb_netdev_init_module); From e01c1542379fb395e7da53706df598f38905dfbf Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Thu, 24 Nov 2022 16:10:05 +0800 Subject: [PATCH 1134/1477] net/9p: Fix a potential socket leak in p9_socket_open [ Upstream commit dcc14cfd7debe11b825cb077e75d91d2575b4cb8 ] Both p9_fd_create_tcp() and p9_fd_create_unix() will call p9_socket_open(). If the creation of p9_trans_fd fails, p9_fd_create_tcp() and p9_fd_create_unix() will return an error directly instead of releasing the cscoket, which will result in a socket leak. This patch adds sock_release() to fix the leak issue. Fixes: 6b18662e239a ("9p connect fixes") Signed-off-by: Wang Hai ACKed-by: Al Viro Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/9p/trans_fd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 400219801e63..deb66635f0f3 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -852,8 +852,10 @@ static int p9_socket_open(struct p9_client *client, struct socket *csocket) struct file *file; p = kzalloc(sizeof(struct p9_trans_fd), GFP_KERNEL); - if (!p) + if (!p) { + sock_release(csocket); return -ENOMEM; + } csocket->sk->sk_allocation = GFP_NOIO; file = sock_alloc_file(csocket, 0, NULL); From 45752af0247589e6d3dede577415bfe117b4392c Mon Sep 17 00:00:00 2001 From: Yuri Karpov Date: Thu, 24 Nov 2022 11:43:03 +0300 Subject: [PATCH 1135/1477] net: ethernet: nixge: fix NULL dereference [ Upstream commit 9256db4e45e8b497b0e993cc3ed4ad08eb2389b6 ] In function nixge_hw_dma_bd_release() dereference of NULL pointer priv->rx_bd_v is possible for the case of its allocation failure in nixge_hw_dma_bd_init(). Move for() loop with priv->rx_bd_v dereference under the check for its validity. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 492caffa8a1a ("net: ethernet: nixge: Add support for National Instruments XGE netdev") Signed-off-by: Yuri Karpov Reviewed-by: Maciej Fijalkowski Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/ni/nixge.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/ni/nixge.c b/drivers/net/ethernet/ni/nixge.c index 9c48fd85c418..07fbd329fe93 100644 --- a/drivers/net/ethernet/ni/nixge.c +++ b/drivers/net/ethernet/ni/nixge.c @@ -249,25 +249,26 @@ static void nixge_hw_dma_bd_release(struct net_device *ndev) struct sk_buff *skb; int i; - for (i = 0; i < RX_BD_NUM; i++) { - phys_addr = nixge_hw_dma_bd_get_addr(&priv->rx_bd_v[i], - phys); + if (priv->rx_bd_v) { + for (i = 0; i < RX_BD_NUM; i++) { + phys_addr = nixge_hw_dma_bd_get_addr(&priv->rx_bd_v[i], + phys); - dma_unmap_single(ndev->dev.parent, phys_addr, - NIXGE_MAX_JUMBO_FRAME_SIZE, - DMA_FROM_DEVICE); + dma_unmap_single(ndev->dev.parent, phys_addr, + NIXGE_MAX_JUMBO_FRAME_SIZE, + DMA_FROM_DEVICE); - skb = (struct sk_buff *)(uintptr_t) - nixge_hw_dma_bd_get_addr(&priv->rx_bd_v[i], - sw_id_offset); - dev_kfree_skb(skb); - } + skb = (struct sk_buff *)(uintptr_t) + nixge_hw_dma_bd_get_addr(&priv->rx_bd_v[i], + sw_id_offset); + dev_kfree_skb(skb); + } - if (priv->rx_bd_v) dma_free_coherent(ndev->dev.parent, sizeof(*priv->rx_bd_v) * RX_BD_NUM, priv->rx_bd_v, priv->rx_bd_p); + } if (priv->tx_skb) devm_kfree(ndev->dev.parent, priv->tx_skb); From 4621bdfff5f84e511a7f412cfcfea1e920ae03ed Mon Sep 17 00:00:00 2001 From: Jerry Ray Date: Mon, 28 Nov 2022 13:35:59 -0600 Subject: [PATCH 1136/1477] dsa: lan9303: Correct stat name [ Upstream commit 39f59bca275d2d819a8788c0f962e9e89843efc9 ] This patch changes the reported ethtool statistics for the lan9303 family of parts covered by this driver. The TxUnderRun statistic label is renamed to RxShort to accurately reflect what stat the device is reporting. I did not reorder the statistics as that might cause problems with existing user code that are expecting the stats at a certain offset. Fixes: a1292595e006 ("net: dsa: add new DSA switch driver for the SMSC-LAN9303") Signed-off-by: Jerry Ray Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20221128193559.6572-1-jerry.ray@microchip.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/dsa/lan9303-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c index 2044d440d7de..c79bb8cf962c 100644 --- a/drivers/net/dsa/lan9303-core.c +++ b/drivers/net/dsa/lan9303-core.c @@ -958,7 +958,7 @@ static const struct lan9303_mib_desc lan9303_mib[] = { { .offset = LAN9303_MAC_TX_BRDCST_CNT_0, .name = "TxBroad", }, { .offset = LAN9303_MAC_TX_PAUSE_CNT_0, .name = "TxPause", }, { .offset = LAN9303_MAC_TX_MULCST_CNT_0, .name = "TxMulti", }, - { .offset = LAN9303_MAC_RX_UNDSZE_CNT_0, .name = "TxUnderRun", }, + { .offset = LAN9303_MAC_RX_UNDSZE_CNT_0, .name = "RxShort", }, { .offset = LAN9303_MAC_TX_64_CNT_0, .name = "Tx64Byte", }, { .offset = LAN9303_MAC_TX_127_CNT_0, .name = "Tx128Byte", }, { .offset = LAN9303_MAC_TX_255_CNT_0, .name = "Tx256Byte", }, From a1ba595e35aa3afbe417ff0af353afb9f65559c0 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 25 Nov 2022 12:46:43 -0500 Subject: [PATCH 1137/1477] tipc: re-fetch skb cb after tipc_msg_validate [ Upstream commit 3067bc61fcfe3081bf4807ce65560f499e895e77 ] As the call trace shows, the original skb was freed in tipc_msg_validate(), and dereferencing the old skb cb would cause an use-after-free crash. BUG: KASAN: use-after-free in tipc_crypto_rcv_complete+0x1835/0x2240 [tipc] Call Trace: tipc_crypto_rcv_complete+0x1835/0x2240 [tipc] tipc_crypto_rcv+0xd32/0x1ec0 [tipc] tipc_rcv+0x744/0x1150 [tipc] ... Allocated by task 47078: kmem_cache_alloc_node+0x158/0x4d0 __alloc_skb+0x1c1/0x270 tipc_buf_acquire+0x1e/0xe0 [tipc] tipc_msg_create+0x33/0x1c0 [tipc] tipc_link_build_proto_msg+0x38a/0x2100 [tipc] tipc_link_timeout+0x8b8/0xef0 [tipc] tipc_node_timeout+0x2a1/0x960 [tipc] call_timer_fn+0x2d/0x1c0 ... Freed by task 47078: tipc_msg_validate+0x7b/0x440 [tipc] tipc_crypto_rcv_complete+0x4b5/0x2240 [tipc] tipc_crypto_rcv+0xd32/0x1ec0 [tipc] tipc_rcv+0x744/0x1150 [tipc] This patch fixes it by re-fetching the skb cb from the new allocated skb after calling tipc_msg_validate(). Fixes: fc1b6d6de220 ("tipc: introduce TIPC encryption & authentication") Reported-by: Shuang Li Signed-off-by: Xin Long Link: https://lore.kernel.org/r/1b1cdba762915325bd8ef9a98d0276eb673df2a5.1669398403.git.lucien.xin@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/tipc/crypto.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c index 6f91b9a306dc..de63d6d41645 100644 --- a/net/tipc/crypto.c +++ b/net/tipc/crypto.c @@ -1975,6 +1975,9 @@ rcv: /* Ok, everything's fine, try to synch own keys according to peers' */ tipc_crypto_key_synch(rx, *skb); + /* Re-fetch skb cb as skb might be changed in tipc_msg_validate */ + skb_cb = TIPC_SKB_CB(*skb); + /* Mark skb decrypted */ skb_cb->decrypted = 1; From 7ca81a161e406834a1fdc405fc83a572bd14b8d9 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 25 Nov 2022 15:57:24 +0800 Subject: [PATCH 1138/1477] net: hsr: Fix potential use-after-free [ Upstream commit 7e177d32442b7ed08a9fa61b61724abc548cb248 ] The skb is delivered to netif_rx() which may free it, after calling this, dereferencing skb may trigger use-after-free. Fixes: f421436a591d ("net/hsr: Add support for the High-availability Seamless Redundancy protocol (HSRv0)") Signed-off-by: YueHaibing Link: https://lore.kernel.org/r/20221125075724.27912-1-yuehaibing@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/hsr/hsr_forward.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index 908324b46328..cb9b54a7abd2 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -303,17 +303,18 @@ static void hsr_deliver_master(struct sk_buff *skb, struct net_device *dev, struct hsr_node *node_src) { bool was_multicast_frame; - int res; + int res, recv_len; was_multicast_frame = (skb->pkt_type == PACKET_MULTICAST); hsr_addr_subst_source(node_src, skb); skb_pull(skb, ETH_HLEN); + recv_len = skb->len; res = netif_rx(skb); if (res == NET_RX_DROP) { dev->stats.rx_dropped++; } else { dev->stats.rx_packets++; - dev->stats.rx_bytes += skb->len; + dev->stats.rx_bytes += recv_len; if (was_multicast_frame) dev->stats.multicast++; } From 5fa0fc5876b5979febf6d7c9906afde4489e5c81 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 28 Nov 2022 22:02:56 +0000 Subject: [PATCH 1139/1477] afs: Fix fileserver probe RTT handling [ Upstream commit ca57f02295f188d6c65ec02202402979880fa6d8 ] The fileserver probing code attempts to work out the best fileserver to use for a volume by retrieving the RTT calculated by AF_RXRPC for the probe call sent to each server and comparing them. Sometimes, however, no RTT estimate is available and rxrpc_kernel_get_srtt() returns false, leading good fileservers to be given an RTT of UINT_MAX and thus causing the rotation algorithm to ignore them. Fix afs_select_fileserver() to ignore rxrpc_kernel_get_srtt()'s return value and just take the estimated RTT it provides - which will be capped at 1 second. Fixes: 1d4adfaf6574 ("rxrpc: Make rxrpc_kernel_get_srtt() indicate validity") Signed-off-by: David Howells Reviewed-by: Marc Dionne Tested-by: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://lore.kernel.org/r/166965503999.3392585.13954054113218099395.stgit@warthog.procyon.org.uk/ Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/afs/fs_probe.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c index e7e98ad63a91..04d42e49fc59 100644 --- a/fs/afs/fs_probe.c +++ b/fs/afs/fs_probe.c @@ -161,8 +161,8 @@ responded: } } - if (rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us) && - rtt_us < server->probe.rtt) { + rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us); + if (rtt_us < server->probe.rtt) { server->probe.rtt = rtt_us; server->rtt = rtt_us; alist->preferred = index; From 5f442e1d403e0496bacb74a58e2be7f500695e6f Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Fri, 25 Nov 2022 02:51:34 +0900 Subject: [PATCH 1140/1477] net: tun: Fix use-after-free in tun_detach() [ Upstream commit 5daadc86f27ea4d691e2131c04310d0418c6cd12 ] syzbot reported use-after-free in tun_detach() [1]. This causes call trace like below: ================================================================== BUG: KASAN: use-after-free in notifier_call_chain+0x1ee/0x200 kernel/notifier.c:75 Read of size 8 at addr ffff88807324e2a8 by task syz-executor.0/3673 CPU: 0 PID: 3673 Comm: syz-executor.0 Not tainted 6.1.0-rc5-syzkaller-00044-gcc675d22e422 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/26/2022 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xd1/0x138 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:284 [inline] print_report+0x15e/0x461 mm/kasan/report.c:395 kasan_report+0xbf/0x1f0 mm/kasan/report.c:495 notifier_call_chain+0x1ee/0x200 kernel/notifier.c:75 call_netdevice_notifiers_info+0x86/0x130 net/core/dev.c:1942 call_netdevice_notifiers_extack net/core/dev.c:1983 [inline] call_netdevice_notifiers net/core/dev.c:1997 [inline] netdev_wait_allrefs_any net/core/dev.c:10237 [inline] netdev_run_todo+0xbc6/0x1100 net/core/dev.c:10351 tun_detach drivers/net/tun.c:704 [inline] tun_chr_close+0xe4/0x190 drivers/net/tun.c:3467 __fput+0x27c/0xa90 fs/file_table.c:320 task_work_run+0x16f/0x270 kernel/task_work.c:179 exit_task_work include/linux/task_work.h:38 [inline] do_exit+0xb3d/0x2a30 kernel/exit.c:820 do_group_exit+0xd4/0x2a0 kernel/exit.c:950 get_signal+0x21b1/0x2440 kernel/signal.c:2858 arch_do_signal_or_restart+0x86/0x2300 arch/x86/kernel/signal.c:869 exit_to_user_mode_loop kernel/entry/common.c:168 [inline] exit_to_user_mode_prepare+0x15f/0x250 kernel/entry/common.c:203 __syscall_exit_to_user_mode_work kernel/entry/common.c:285 [inline] syscall_exit_to_user_mode+0x1d/0x50 kernel/entry/common.c:296 do_syscall_64+0x46/0xb0 arch/x86/entry/common.c:86 entry_SYSCALL_64_after_hwframe+0x63/0xcd The cause of the issue is that sock_put() from __tun_detach() drops last reference count for struct net, and then notifier_call_chain() from netdev_state_change() accesses that struct net. This patch fixes the issue by calling sock_put() from tun_detach() after all necessary accesses for the struct net has done. Fixes: 83c1f36f9880 ("tun: send netlink notification when the device is modified") Reported-by: syzbot+106f9b687cd64ee70cd1@syzkaller.appspotmail.com Link: https://syzkaller.appspot.com/bug?id=96eb7f1ce75ef933697f24eeab928c4a716edefe [1] Signed-off-by: Shigeru Yoshida Link: https://lore.kernel.org/r/20221124175134.1589053-1-syoshida@redhat.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/tun.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index cb42fdbfeb32..67ce7b779af6 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -698,7 +698,6 @@ static void __tun_detach(struct tun_file *tfile, bool clean) if (tun) xdp_rxq_info_unreg(&tfile->xdp_rxq); ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free); - sock_put(&tfile->sk); } } @@ -714,6 +713,9 @@ static void tun_detach(struct tun_file *tfile, bool clean) if (dev) netdev_state_change(dev); rtnl_unlock(); + + if (clean) + sock_put(&tfile->sk); } static void tun_detach_all(struct net_device *dev) From 1c38c88acc9688e9e379c26fb64bdfbb853618d1 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 28 Nov 2022 11:18:12 -0500 Subject: [PATCH 1141/1477] packet: do not set TP_STATUS_CSUM_VALID on CHECKSUM_COMPLETE [ Upstream commit b85f628aa158a653c006e9c1405a117baef8c868 ] CHECKSUM_COMPLETE signals that skb->csum stores the sum over the entire packet. It does not imply that an embedded l4 checksum field has been validated. Fixes: 682f048bd494 ("af_packet: pass checksum validation status to the user") Signed-off-by: Willem de Bruijn Link: https://lore.kernel.org/r/20221128161812.640098-1-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/packet/af_packet.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index b70b06e312bd..eaa030e2ad55 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2243,8 +2243,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, if (skb->ip_summed == CHECKSUM_PARTIAL) status |= TP_STATUS_CSUMNOTREADY; else if (skb->pkt_type != PACKET_OUTGOING && - (skb->ip_summed == CHECKSUM_COMPLETE || - skb_csum_unnecessary(skb))) + skb_csum_unnecessary(skb)) status |= TP_STATUS_CSUM_VALID; if (snaplen > res) @@ -3480,8 +3479,7 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (skb->ip_summed == CHECKSUM_PARTIAL) aux.tp_status |= TP_STATUS_CSUMNOTREADY; else if (skb->pkt_type != PACKET_OUTGOING && - (skb->ip_summed == CHECKSUM_COMPLETE || - skb_csum_unnecessary(skb))) + skb_csum_unnecessary(skb)) aux.tp_status |= TP_STATUS_CSUM_VALID; aux.tp_len = origlen; From 176ee6c673ccd118e9392fd2dbb165423bdb99ca Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Sat, 26 Nov 2022 11:17:20 +0800 Subject: [PATCH 1142/1477] sctp: fix memory leak in sctp_stream_outq_migrate() [ Upstream commit 9ed7bfc79542119ac0a9e1ce8a2a5285e43433e9 ] When sctp_stream_outq_migrate() is called to release stream out resources, the memory pointed to by prio_head in stream out is not released. The memory leak information is as follows: unreferenced object 0xffff88801fe79f80 (size 64): comm "sctp_repo", pid 7957, jiffies 4294951704 (age 36.480s) hex dump (first 32 bytes): 80 9f e7 1f 80 88 ff ff 80 9f e7 1f 80 88 ff ff ................ 90 9f e7 1f 80 88 ff ff 90 9f e7 1f 80 88 ff ff ................ backtrace: [] kmalloc_trace+0x26/0x60 [] sctp_sched_prio_set+0x4cc/0x770 [] sctp_stream_init_ext+0xd2/0x1b0 [] sctp_sendmsg_to_asoc+0x1614/0x1a30 [] sctp_sendmsg+0xda1/0x1ef0 [] inet_sendmsg+0x9d/0xe0 [] sock_sendmsg+0xd3/0x120 [] __sys_sendto+0x23a/0x340 [] __x64_sys_sendto+0xe1/0x1b0 [] do_syscall_64+0x39/0xb0 [] entry_SYSCALL_64_after_hwframe+0x63/0xcd Link: https://syzkaller.appspot.com/bug?exrid=29c402e56c4760763cc0 Fixes: 637784ade221 ("sctp: introduce priority based stream scheduler") Reported-by: syzbot+29c402e56c4760763cc0@syzkaller.appspotmail.com Signed-off-by: Zhengchao Shao Reviewed-by: Xin Long Link: https://lore.kernel.org/r/20221126031720.378562-1-shaozhengchao@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- include/net/sctp/stream_sched.h | 2 ++ net/sctp/stream.c | 25 ++++++++++++++++++------- net/sctp/stream_sched.c | 5 +++++ net/sctp/stream_sched_prio.c | 19 +++++++++++++++++++ net/sctp/stream_sched_rr.c | 5 +++++ 5 files changed, 49 insertions(+), 7 deletions(-) diff --git a/include/net/sctp/stream_sched.h b/include/net/sctp/stream_sched.h index 01a70b27e026..65058faea4db 100644 --- a/include/net/sctp/stream_sched.h +++ b/include/net/sctp/stream_sched.h @@ -26,6 +26,8 @@ struct sctp_sched_ops { int (*init)(struct sctp_stream *stream); /* Init a stream */ int (*init_sid)(struct sctp_stream *stream, __u16 sid, gfp_t gfp); + /* free a stream */ + void (*free_sid)(struct sctp_stream *stream, __u16 sid); /* Frees the entire thing */ void (*free)(struct sctp_stream *stream); diff --git a/net/sctp/stream.c b/net/sctp/stream.c index ef9fceadef8d..ee6514af830f 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -52,6 +52,19 @@ static void sctp_stream_shrink_out(struct sctp_stream *stream, __u16 outcnt) } } +static void sctp_stream_free_ext(struct sctp_stream *stream, __u16 sid) +{ + struct sctp_sched_ops *sched; + + if (!SCTP_SO(stream, sid)->ext) + return; + + sched = sctp_sched_ops_from_stream(stream); + sched->free_sid(stream, sid); + kfree(SCTP_SO(stream, sid)->ext); + SCTP_SO(stream, sid)->ext = NULL; +} + /* Migrates chunks from stream queues to new stream queues if needed, * but not across associations. Also, removes those chunks to streams * higher than the new max. @@ -70,16 +83,14 @@ static void sctp_stream_outq_migrate(struct sctp_stream *stream, * sctp_stream_update will swap ->out pointers. */ for (i = 0; i < outcnt; i++) { - kfree(SCTP_SO(new, i)->ext); + sctp_stream_free_ext(new, i); SCTP_SO(new, i)->ext = SCTP_SO(stream, i)->ext; SCTP_SO(stream, i)->ext = NULL; } } - for (i = outcnt; i < stream->outcnt; i++) { - kfree(SCTP_SO(stream, i)->ext); - SCTP_SO(stream, i)->ext = NULL; - } + for (i = outcnt; i < stream->outcnt; i++) + sctp_stream_free_ext(stream, i); } static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt, @@ -174,9 +185,9 @@ void sctp_stream_free(struct sctp_stream *stream) struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream); int i; - sched->free(stream); + sched->unsched_all(stream); for (i = 0; i < stream->outcnt; i++) - kfree(SCTP_SO(stream, i)->ext); + sctp_stream_free_ext(stream, i); genradix_free(&stream->out); genradix_free(&stream->in); } diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c index a2e1d34f52c5..33c2630c2496 100644 --- a/net/sctp/stream_sched.c +++ b/net/sctp/stream_sched.c @@ -46,6 +46,10 @@ static int sctp_sched_fcfs_init_sid(struct sctp_stream *stream, __u16 sid, return 0; } +static void sctp_sched_fcfs_free_sid(struct sctp_stream *stream, __u16 sid) +{ +} + static void sctp_sched_fcfs_free(struct sctp_stream *stream) { } @@ -96,6 +100,7 @@ static struct sctp_sched_ops sctp_sched_fcfs = { .get = sctp_sched_fcfs_get, .init = sctp_sched_fcfs_init, .init_sid = sctp_sched_fcfs_init_sid, + .free_sid = sctp_sched_fcfs_free_sid, .free = sctp_sched_fcfs_free, .enqueue = sctp_sched_fcfs_enqueue, .dequeue = sctp_sched_fcfs_dequeue, diff --git a/net/sctp/stream_sched_prio.c b/net/sctp/stream_sched_prio.c index 80b5a2c4cbc7..4fc9f2923ed1 100644 --- a/net/sctp/stream_sched_prio.c +++ b/net/sctp/stream_sched_prio.c @@ -204,6 +204,24 @@ static int sctp_sched_prio_init_sid(struct sctp_stream *stream, __u16 sid, return sctp_sched_prio_set(stream, sid, 0, gfp); } +static void sctp_sched_prio_free_sid(struct sctp_stream *stream, __u16 sid) +{ + struct sctp_stream_priorities *prio = SCTP_SO(stream, sid)->ext->prio_head; + int i; + + if (!prio) + return; + + SCTP_SO(stream, sid)->ext->prio_head = NULL; + for (i = 0; i < stream->outcnt; i++) { + if (SCTP_SO(stream, i)->ext && + SCTP_SO(stream, i)->ext->prio_head == prio) + return; + } + + kfree(prio); +} + static void sctp_sched_prio_free(struct sctp_stream *stream) { struct sctp_stream_priorities *prio, *n; @@ -323,6 +341,7 @@ static struct sctp_sched_ops sctp_sched_prio = { .get = sctp_sched_prio_get, .init = sctp_sched_prio_init, .init_sid = sctp_sched_prio_init_sid, + .free_sid = sctp_sched_prio_free_sid, .free = sctp_sched_prio_free, .enqueue = sctp_sched_prio_enqueue, .dequeue = sctp_sched_prio_dequeue, diff --git a/net/sctp/stream_sched_rr.c b/net/sctp/stream_sched_rr.c index ff425aed62c7..cc444fe0d67c 100644 --- a/net/sctp/stream_sched_rr.c +++ b/net/sctp/stream_sched_rr.c @@ -90,6 +90,10 @@ static int sctp_sched_rr_init_sid(struct sctp_stream *stream, __u16 sid, return 0; } +static void sctp_sched_rr_free_sid(struct sctp_stream *stream, __u16 sid) +{ +} + static void sctp_sched_rr_free(struct sctp_stream *stream) { sctp_sched_rr_unsched_all(stream); @@ -177,6 +181,7 @@ static struct sctp_sched_ops sctp_sched_rr = { .get = sctp_sched_rr_get, .init = sctp_sched_rr_init, .init_sid = sctp_sched_rr_init_sid, + .free_sid = sctp_sched_rr_free_sid, .free = sctp_sched_rr_free, .enqueue = sctp_sched_rr_enqueue, .dequeue = sctp_sched_rr_dequeue, From d93522d04f84c81ab9af899957fddfd3c7eb0bb6 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Mon, 28 Nov 2022 15:56:04 +0900 Subject: [PATCH 1143/1477] net: ethernet: renesas: ravb: Fix promiscuous mode after system resumed [ Upstream commit d66233a312ec9013af3e37e4030b479a20811ec3 ] After system resumed on some environment board, the promiscuous mode is disabled because the SoC turned off. So, call ravb_set_rx_mode() in the ravb_resume() to fix the issue. Reported-by: Tho Vu Fixes: 0184165b2f42 ("ravb: add sleep PM suspend/resume support") Signed-off-by: Yoshihiro Shimoda Reviewed-by: Pavan Chebbi Reviewed-by: Sergey Shtylyov Link: https://lore.kernel.org/r/20221128065604.1864391-1-yoshihiro.shimoda.uh@renesas.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/renesas/ravb_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index f96eed67e1a2..9e7b85e178fd 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -2364,6 +2364,7 @@ static int __maybe_unused ravb_resume(struct device *dev) ret = ravb_open(ndev); if (ret < 0) return ret; + ravb_set_rx_mode(ndev); netif_device_attach(ndev); } From f06e0cd01eab954bd5f2190c9faa79bb5357e05b Mon Sep 17 00:00:00 2001 From: Phil Auld Date: Thu, 17 Nov 2022 11:23:13 -0500 Subject: [PATCH 1144/1477] hwmon: (coretemp) Check for null before removing sysfs attrs [ Upstream commit a89ff5f5cc64b9fe7a992cf56988fd36f56ca82a ] If coretemp_add_core() gets an error then pdata->core_data[indx] is already NULL and has been kfreed. Don't pass that to sysfs_remove_group() as that will crash in sysfs_remove_group(). [Shortened for readability] [91854.020159] sysfs: cannot create duplicate filename '/devices/platform/coretemp.0/hwmon/hwmon2/temp20_label' [91855.126115] BUG: kernel NULL pointer dereference, address: 0000000000000188 [91855.165103] #PF: supervisor read access in kernel mode [91855.194506] #PF: error_code(0x0000) - not-present page [91855.224445] PGD 0 P4D 0 [91855.238508] Oops: 0000 [#1] PREEMPT SMP PTI ... [91855.342716] RIP: 0010:sysfs_remove_group+0xc/0x80 ... [91855.796571] Call Trace: [91855.810524] coretemp_cpu_offline+0x12b/0x1dd [coretemp] [91855.841738] ? coretemp_cpu_online+0x180/0x180 [coretemp] [91855.871107] cpuhp_invoke_callback+0x105/0x4b0 [91855.893432] cpuhp_thread_fun+0x8e/0x150 ... Fix this by checking for NULL first. Signed-off-by: Phil Auld Cc: linux-hwmon@vger.kernel.org Cc: Fenghua Yu Cc: Jean Delvare Cc: Guenter Roeck Link: https://lore.kernel.org/r/20221117162313.3164803-1-pauld@redhat.com Fixes: 199e0de7f5df3 ("hwmon: (coretemp) Merge pkgtemp with coretemp") Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/coretemp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index 032129292957..9b49bfc63ffc 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -533,6 +533,10 @@ static void coretemp_remove_core(struct platform_data *pdata, int indx) { struct temp_data *tdata = pdata->core_data[indx]; + /* if we errored on add then this is already gone */ + if (!tdata) + return; + /* Remove the sysfs attributes */ sysfs_remove_group(&pdata->hwmon_dev->kobj, &tdata->attr_group); From c40db1e5f316792b557d2be37e447c20d9ac4635 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 18 Nov 2022 17:33:03 +0800 Subject: [PATCH 1145/1477] hwmon: (coretemp) fix pci device refcount leak in nv1a_ram_new() [ Upstream commit 7dec14537c5906b8bf40fd6fd6d9c3850f8df11d ] As comment of pci_get_domain_bus_and_slot() says, it returns a pci device with refcount increment, when finish using it, the caller must decrement the reference count by calling pci_dev_put(). So call it after using to avoid refcount leak. Fixes: 14513ee696a0 ("hwmon: (coretemp) Use PCI host bridge ID to identify CPU if necessary") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221118093303.214163-1-yangyingliang@huawei.com Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/coretemp.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index 9b49bfc63ffc..42b84ebff057 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -242,10 +242,13 @@ static int adjust_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *dev) */ if (host_bridge && host_bridge->vendor == PCI_VENDOR_ID_INTEL) { for (i = 0; i < ARRAY_SIZE(tjmax_pci_table); i++) { - if (host_bridge->device == tjmax_pci_table[i].device) + if (host_bridge->device == tjmax_pci_table[i].device) { + pci_dev_put(host_bridge); return tjmax_pci_table[i].tjmax; + } } } + pci_dev_put(host_bridge); for (i = 0; i < ARRAY_SIZE(tjmax_table); i++) { if (strstr(c->x86_model_id, tjmax_table[i].id)) From 2b1d8f27e2051d9d35043edbf2e3f4e1354edf59 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 10 Nov 2022 21:47:07 +0800 Subject: [PATCH 1146/1477] net/mlx5: DR, Fix uninitialized var warning [ Upstream commit 52f7cf70eb8fac6111786c59ae9dfc5cf2bee710 ] Smatch warns this: drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c:81 mlx5dr_table_set_miss_action() error: uninitialized symbol 'ret'. Initializing ret with -EOPNOTSUPP and fix missing action case. Fixes: 7838e1725394 ("net/mlx5: DR, Expose steering table functionality") Signed-off-by: YueHaibing Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c index b599b6beb5b9..6a4b997c258a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c @@ -9,7 +9,7 @@ int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl, struct mlx5dr_matcher *last_matcher = NULL; struct mlx5dr_htbl_connect_info info; struct mlx5dr_ste_htbl *last_htbl; - int ret; + int ret = -EOPNOTSUPP; if (action && action->action_type != DR_ACTION_TYP_FT) return -EOPNOTSUPP; @@ -68,6 +68,9 @@ int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl, } } + if (ret) + goto out; + /* Release old action */ if (tbl->miss_action) refcount_dec(&tbl->miss_action->refcount); From 26b6f927bb86bf32b081866851688ad2cdfc9472 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Thu, 3 Nov 2022 01:02:54 +0800 Subject: [PATCH 1147/1477] riscv: vdso: fix section overlapping under some conditions commit 74f6bb55c834da6d4bac24f44868202743189b2b upstream. lkp reported a build error, I tried the config and can reproduce build error as below: VDSOLD arch/riscv/kernel/vdso/vdso.so.dbg ld.lld: error: section .note file range overlaps with .text >>> .note range is [0x7C8, 0x803] >>> .text range is [0x800, 0x1993] ld.lld: error: section .text file range overlaps with .dynamic >>> .text range is [0x800, 0x1993] >>> .dynamic range is [0x808, 0x937] ld.lld: error: section .note virtual address range overlaps with .text >>> .note range is [0x7C8, 0x803] >>> .text range is [0x800, 0x1993] Fix it by setting DISABLE_BRANCH_PROFILING which will disable branch tracing for vdso, thus avoid useless _ftrace_annotated_branch section and _ftrace_branch section. Although we can also fix it by removing the hardcoded .text begin address, but I think that's another story and should be put into another patch. Link: https://lore.kernel.org/lkml/202210122123.Cc4FPShJ-lkp@intel.com/#r Reported-by: kernel test robot Signed-off-by: Jisheng Zhang Link: https://lore.kernel.org/r/20221102170254.1925-1-jszhang@kernel.org Fixes: ad5d1122b82f ("riscv: use vDSO common flow to reduce the latency of the time-related functions") Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt Signed-off-by: Greg Kroah-Hartman --- arch/riscv/kernel/vdso/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index c92b55a0ec1c..f4ac7ff56bce 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -17,6 +17,7 @@ vdso-syms += flush_icache obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o ccflags-y := -fno-stack-protector +ccflags-y += -DDISABLE_BRANCH_PROFILING ifneq ($(c-gettimeofday-y),) CFLAGS_vgettimeofday.o += -fPIC -include $(c-gettimeofday-y) From c099d12c5502b3eff5dd7b22815e480ff9aefe16 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Mon, 21 Nov 2022 10:44:03 -0500 Subject: [PATCH 1148/1477] error-injection: Add prompt for function error injection commit a4412fdd49dc011bcc2c0d81ac4cab7457092650 upstream. The config to be able to inject error codes into any function annotated with ALLOW_ERROR_INJECTION() is enabled when FUNCTION_ERROR_INJECTION is enabled. But unfortunately, this is always enabled on x86 when KPROBES is enabled, and there's no way to turn it off. As kprobes is useful for observability of the kernel, it is useful to have it enabled in production environments. But error injection should be avoided. Add a prompt to the config to allow it to be disabled even when kprobes is enabled, and get rid of the "def_bool y". This is a kernel debug feature (it's in Kconfig.debug), and should have never been something enabled by default. Cc: stable@vger.kernel.org Fixes: 540adea3809f6 ("error-injection: Separate error-injection from kprobe") Signed-off-by: Steven Rostedt (Google) Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- lib/Kconfig.debug | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index ce796ca869c2..6b0649d8bca7 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1801,8 +1801,14 @@ config NETDEV_NOTIFIER_ERROR_INJECT If unsure, say N. config FUNCTION_ERROR_INJECTION - def_bool y + bool "Fault-injections of functions" depends on HAVE_FUNCTION_ERROR_INJECTION && KPROBES + help + Add fault injections into various functions that are annotated with + ALLOW_ERROR_INJECTION() in the kernel. BPF may also modify the return + value of theses functions. This is useful to test error paths of code. + + If unsure, say N config FAULT_INJECTION bool "Fault-injection framework" From 6ddf788400dd3f5b75f807164bf91a59ee6b3b32 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Sat, 19 Nov 2022 10:36:59 +0800 Subject: [PATCH 1149/1477] tools/vm/slabinfo-gnuplot: use "grep -E" instead of "egrep" commit a435874bf626f55d7147026b059008c8de89fbb8 upstream. The latest version of grep claims the egrep is now obsolete so the build now contains warnings that look like: egrep: warning: egrep is obsolescent; using grep -E fix this up by moving the related file to use "grep -E" instead. sed -i "s/egrep/grep -E/g" `grep egrep -rwl tools/vm` Here are the steps to install the latest grep: wget http://ftp.gnu.org/gnu/grep/grep-3.8.tar.gz tar xf grep-3.8.tar.gz cd grep-3.8 && ./configure && make sudo make install export PATH=/usr/local/bin:$PATH Link: https://lkml.kernel.org/r/1668825419-30584-1-git-send-email-yangtiezhu@loongson.cn Signed-off-by: Tiezhu Yang Reviewed-by: Sergey Senozhatsky Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- tools/vm/slabinfo-gnuplot.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/vm/slabinfo-gnuplot.sh b/tools/vm/slabinfo-gnuplot.sh index 26e193ffd2a2..873a892147e5 100644 --- a/tools/vm/slabinfo-gnuplot.sh +++ b/tools/vm/slabinfo-gnuplot.sh @@ -150,7 +150,7 @@ do_preprocess() let lines=3 out=`basename "$in"`"-slabs-by-loss" `cat "$in" | grep -A "$lines" 'Slabs sorted by loss' |\ - egrep -iv '\-\-|Name|Slabs'\ + grep -E -iv '\-\-|Name|Slabs'\ | awk '{print $1" "$4+$2*$3" "$4}' > "$out"` if [ $? -eq 0 ]; then do_slabs_plotting "$out" @@ -159,7 +159,7 @@ do_preprocess() let lines=3 out=`basename "$in"`"-slabs-by-size" `cat "$in" | grep -A "$lines" 'Slabs sorted by size' |\ - egrep -iv '\-\-|Name|Slabs'\ + grep -E -iv '\-\-|Name|Slabs'\ | awk '{print $1" "$4" "$4-$2*$3}' > "$out"` if [ $? -eq 0 ]; then do_slabs_plotting "$out" From e858917ab785afe83c14f5ac141301216ccda847 Mon Sep 17 00:00:00 2001 From: ZhangPeng Date: Sat, 19 Nov 2022 21:05:42 +0900 Subject: [PATCH 1150/1477] nilfs2: fix NULL pointer dereference in nilfs_palloc_commit_free_entry() commit f0a0ccda18d6fd826d7c7e7ad48a6ed61c20f8b4 upstream. Syzbot reported a null-ptr-deref bug: NILFS (loop0): segctord starting. Construction interval = 5 seconds, CP frequency < 30 seconds general protection fault, probably for non-canonical address 0xdffffc0000000002: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000010-0x0000000000000017] CPU: 1 PID: 3603 Comm: segctord Not tainted 6.1.0-rc2-syzkaller-00105-gb229b6ca5abb #0 Hardware name: Google Compute Engine/Google Compute Engine, BIOS Google 10/11/2022 RIP: 0010:nilfs_palloc_commit_free_entry+0xe5/0x6b0 fs/nilfs2/alloc.c:608 Code: 00 00 00 00 fc ff df 80 3c 02 00 0f 85 cd 05 00 00 48 b8 00 00 00 00 00 fc ff df 4c 8b 73 08 49 8d 7e 10 48 89 fa 48 c1 ea 03 <80> 3c 02 00 0f 85 26 05 00 00 49 8b 46 10 be a6 00 00 00 48 c7 c7 RSP: 0018:ffffc90003dff830 EFLAGS: 00010212 RAX: dffffc0000000000 RBX: ffff88802594e218 RCX: 000000000000000d RDX: 0000000000000002 RSI: 0000000000002000 RDI: 0000000000000010 RBP: ffff888071880222 R08: 0000000000000005 R09: 000000000000003f R10: 000000000000000d R11: 0000000000000000 R12: ffff888071880158 R13: ffff88802594e220 R14: 0000000000000000 R15: 0000000000000004 FS: 0000000000000000(0000) GS:ffff8880b9b00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fb1c08316a8 CR3: 0000000018560000 CR4: 0000000000350ee0 Call Trace: nilfs_dat_commit_free fs/nilfs2/dat.c:114 [inline] nilfs_dat_commit_end+0x464/0x5f0 fs/nilfs2/dat.c:193 nilfs_dat_commit_update+0x26/0x40 fs/nilfs2/dat.c:236 nilfs_btree_commit_update_v+0x87/0x4a0 fs/nilfs2/btree.c:1940 nilfs_btree_commit_propagate_v fs/nilfs2/btree.c:2016 [inline] nilfs_btree_propagate_v fs/nilfs2/btree.c:2046 [inline] nilfs_btree_propagate+0xa00/0xd60 fs/nilfs2/btree.c:2088 nilfs_bmap_propagate+0x73/0x170 fs/nilfs2/bmap.c:337 nilfs_collect_file_data+0x45/0xd0 fs/nilfs2/segment.c:568 nilfs_segctor_apply_buffers+0x14a/0x470 fs/nilfs2/segment.c:1018 nilfs_segctor_scan_file+0x3f4/0x6f0 fs/nilfs2/segment.c:1067 nilfs_segctor_collect_blocks fs/nilfs2/segment.c:1197 [inline] nilfs_segctor_collect fs/nilfs2/segment.c:1503 [inline] nilfs_segctor_do_construct+0x12fc/0x6af0 fs/nilfs2/segment.c:2045 nilfs_segctor_construct+0x8e3/0xb30 fs/nilfs2/segment.c:2379 nilfs_segctor_thread_construct fs/nilfs2/segment.c:2487 [inline] nilfs_segctor_thread+0x3c3/0xf30 fs/nilfs2/segment.c:2570 kthread+0x2e4/0x3a0 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:306 ... If DAT metadata file is corrupted on disk, there is a case where req->pr_desc_bh is NULL and blocknr is 0 at nilfs_dat_commit_end() during a b-tree operation that cascadingly updates ancestor nodes of the b-tree, because nilfs_dat_commit_alloc() for a lower level block can initialize the blocknr on the same DAT entry between nilfs_dat_prepare_end() and nilfs_dat_commit_end(). If this happens, nilfs_dat_commit_end() calls nilfs_dat_commit_free() without valid buffer heads in req->pr_desc_bh and req->pr_bitmap_bh, and causes the NULL pointer dereference above in nilfs_palloc_commit_free_entry() function, which leads to a crash. Fix this by adding a NULL check on req->pr_desc_bh and req->pr_bitmap_bh before nilfs_palloc_commit_free_entry() in nilfs_dat_commit_free(). This also calls nilfs_error() in that case to notify that there is a fatal flaw in the filesystem metadata and prevent further operations. Link: https://lkml.kernel.org/r/00000000000097c20205ebaea3d6@google.com Link: https://lkml.kernel.org/r/20221114040441.1649940-1-zhangpeng362@huawei.com Link: https://lkml.kernel.org/r/20221119120542.17204-1-konishi.ryusuke@gmail.com Signed-off-by: ZhangPeng Signed-off-by: Ryusuke Konishi Reported-by: syzbot+ebe05ee8e98f755f61d0@syzkaller.appspotmail.com Tested-by: Ryusuke Konishi Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/nilfs2/dat.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index 1a3d183027b9..8fedc7104320 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -111,6 +111,13 @@ static void nilfs_dat_commit_free(struct inode *dat, kunmap_atomic(kaddr); nilfs_dat_commit_entry(dat, req); + + if (unlikely(req->pr_desc_bh == NULL || req->pr_bitmap_bh == NULL)) { + nilfs_error(dat->i_sb, + "state inconsistency probably due to duplicate use of vblocknr = %llu", + (unsigned long long)req->pr_entry_nr); + return; + } nilfs_palloc_commit_free_entry(dat, req); } From 471fb7b735bf9dd1caf2c8751158b81a3d9a5584 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Wed, 30 Nov 2022 07:25:51 -0800 Subject: [PATCH 1151/1477] x86/bugs: Make sure MSR_SPEC_CTRL is updated properly upon resume from S3 commit 66065157420c5b9b3f078f43d313c153e1ff7f83 upstream. The "force" argument to write_spec_ctrl_current() is currently ambiguous as it does not guarantee the MSR write. This is due to the optimization that writes to the MSR happen only when the new value differs from the cached value. This is fine in most cases, but breaks for S3 resume when the cached MSR value gets out of sync with the hardware MSR value due to S3 resetting it. When x86_spec_ctrl_current is same as x86_spec_ctrl_base, the MSR write is skipped. Which results in SPEC_CTRL mitigations not getting restored. Move the MSR write from write_spec_ctrl_current() to a new function that unconditionally writes to the MSR. Update the callers accordingly and rename functions. [ bp: Rework a bit. ] Fixes: caa0ff24d5d0 ("x86/bugs: Keep a per-CPU IA32_SPEC_CTRL value") Suggested-by: Borislav Petkov Signed-off-by: Pawan Gupta Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Thomas Gleixner Cc: Link: https://lore.kernel.org/r/806d39b0bfec2fe8f50dc5446dff20f5bb24a959.1669821572.git.pawan.kumar.gupta@linux.intel.com Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 2 +- arch/x86/kernel/cpu/bugs.c | 21 ++++++++++++++------- arch/x86/kernel/process.c | 2 +- 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 07f5030073bb..f14cdf951249 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -310,7 +310,7 @@ static inline void indirect_branch_prediction_barrier(void) /* The Intel SPEC CTRL MSR base value cache */ extern u64 x86_spec_ctrl_base; DECLARE_PER_CPU(u64, x86_spec_ctrl_current); -extern void write_spec_ctrl_current(u64 val, bool force); +extern void update_spec_ctrl_cond(u64 val); extern u64 spec_ctrl_current(void); /* diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index a300a19255b6..e2e22a5740a4 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -59,11 +59,18 @@ EXPORT_SYMBOL_GPL(x86_spec_ctrl_current); static DEFINE_MUTEX(spec_ctrl_mutex); +/* Update SPEC_CTRL MSR and its cached copy unconditionally */ +static void update_spec_ctrl(u64 val) +{ + this_cpu_write(x86_spec_ctrl_current, val); + wrmsrl(MSR_IA32_SPEC_CTRL, val); +} + /* * Keep track of the SPEC_CTRL MSR value for the current task, which may differ * from x86_spec_ctrl_base due to STIBP/SSB in __speculation_ctrl_update(). */ -void write_spec_ctrl_current(u64 val, bool force) +void update_spec_ctrl_cond(u64 val) { if (this_cpu_read(x86_spec_ctrl_current) == val) return; @@ -74,7 +81,7 @@ void write_spec_ctrl_current(u64 val, bool force) * When KERNEL_IBRS this MSR is written on return-to-user, unless * forced the update can be delayed until that time. */ - if (force || !cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS)) + if (!cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS)) wrmsrl(MSR_IA32_SPEC_CTRL, val); } @@ -1291,7 +1298,7 @@ static void __init spec_ctrl_disable_kernel_rrsba(void) if (ia32_cap & ARCH_CAP_RRSBA) { x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S; - write_spec_ctrl_current(x86_spec_ctrl_base, true); + update_spec_ctrl(x86_spec_ctrl_base); } } @@ -1413,7 +1420,7 @@ static void __init spectre_v2_select_mitigation(void) if (spectre_v2_in_ibrs_mode(mode)) { x86_spec_ctrl_base |= SPEC_CTRL_IBRS; - write_spec_ctrl_current(x86_spec_ctrl_base, true); + update_spec_ctrl(x86_spec_ctrl_base); } switch (mode) { @@ -1527,7 +1534,7 @@ static void __init spectre_v2_select_mitigation(void) static void update_stibp_msr(void * __unused) { u64 val = spec_ctrl_current() | (x86_spec_ctrl_base & SPEC_CTRL_STIBP); - write_spec_ctrl_current(val, true); + update_spec_ctrl(val); } /* Update x86_spec_ctrl_base in case SMT state changed. */ @@ -1760,7 +1767,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) x86_amd_ssb_disable(); } else { x86_spec_ctrl_base |= SPEC_CTRL_SSBD; - write_spec_ctrl_current(x86_spec_ctrl_base, true); + update_spec_ctrl(x86_spec_ctrl_base); } } @@ -1978,7 +1985,7 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) void x86_spec_ctrl_setup_ap(void) { if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) - write_spec_ctrl_current(x86_spec_ctrl_base, true); + update_spec_ctrl(x86_spec_ctrl_base); if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) x86_amd_ssb_disable(); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 4505d845daba..383afcc1098b 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -556,7 +556,7 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp, } if (updmsr) - write_spec_ctrl_current(msr, false); + update_spec_ctrl_cond(msr); } static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk) From 549e24409ac579b4ad871d43d8781a1dc9b4aaeb Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 25 Nov 2022 00:29:26 +0200 Subject: [PATCH 1152/1477] pinctrl: intel: Save and restore pins in "direct IRQ" mode commit 6989ea4881c8944fbf04378418bb1af63d875ef8 upstream. The firmware on some systems may configure GPIO pins to be an interrupt source in so called "direct IRQ" mode. In such cases the GPIO controller driver has no idea if those pins are being used or not. At the same time, there is a known bug in the firmwares that don't restore the pin settings correctly after suspend, i.e. by an unknown reason the Rx value becomes inverted. Hence, let's save and restore the pins that are configured as GPIOs in the input mode with GPIROUTIOXAPIC bit set. Cc: stable@vger.kernel.org Reported-and-tested-by: Dale Smith Reported-and-tested-by: John Harris BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=214749 Signed-off-by: Andy Shevchenko Acked-by: Mika Westerberg Link: https://lore.kernel.org/r/20221124222926.72326-1-andriy.shevchenko@linux.intel.com Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/intel/pinctrl-intel.c | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c index 4de832ac47d3..db9087c129c0 100644 --- a/drivers/pinctrl/intel/pinctrl-intel.c +++ b/drivers/pinctrl/intel/pinctrl-intel.c @@ -426,9 +426,14 @@ static void __intel_gpio_set_direction(void __iomem *padcfg0, bool input) writel(value, padcfg0); } +static int __intel_gpio_get_gpio_mode(u32 value) +{ + return (value & PADCFG0_PMODE_MASK) >> PADCFG0_PMODE_SHIFT; +} + static int intel_gpio_get_gpio_mode(void __iomem *padcfg0) { - return (readl(padcfg0) & PADCFG0_PMODE_MASK) >> PADCFG0_PMODE_SHIFT; + return __intel_gpio_get_gpio_mode(readl(padcfg0)); } static void intel_gpio_set_gpio_mode(void __iomem *padcfg0) @@ -1604,6 +1609,7 @@ EXPORT_SYMBOL_GPL(intel_pinctrl_get_soc_data); static bool intel_pinctrl_should_save(struct intel_pinctrl *pctrl, unsigned int pin) { const struct pin_desc *pd = pin_desc_get(pctrl->pctldev, pin); + u32 value; if (!pd || !intel_pad_usable(pctrl, pin)) return false; @@ -1618,6 +1624,25 @@ static bool intel_pinctrl_should_save(struct intel_pinctrl *pctrl, unsigned int gpiochip_line_is_irq(&pctrl->chip, intel_pin_to_gpio(pctrl, pin))) return true; + /* + * The firmware on some systems may configure GPIO pins to be + * an interrupt source in so called "direct IRQ" mode. In such + * cases the GPIO controller driver has no idea if those pins + * are being used or not. At the same time, there is a known bug + * in the firmwares that don't restore the pin settings correctly + * after suspend, i.e. by an unknown reason the Rx value becomes + * inverted. + * + * Hence, let's save and restore the pins that are configured + * as GPIOs in the input mode with GPIROUTIOXAPIC bit set. + * + * See https://bugzilla.kernel.org/show_bug.cgi?id=214749. + */ + value = readl(intel_get_padcfg(pctrl, pin, PADCFG0)); + if ((value & PADCFG0_GPIROUTIOXAPIC) && (value & PADCFG0_GPIOTXDIS) && + (__intel_gpio_get_gpio_mode(value) == PADCFG0_PMODE_GPIO)) + return true; + return false; } From d4fc344c0d9c06ea1c1fafa69efac4fed9cd0dd2 Mon Sep 17 00:00:00 2001 From: "Goh, Wei Sheng" Date: Wed, 23 Nov 2022 18:51:10 +0800 Subject: [PATCH 1153/1477] net: stmmac: Set MAC's flow control register to reflect current settings commit cc3d2b5fc0d6f8ad8a52da5ea679e5c2ec2adbd4 upstream. Currently, pause frame register GMAC_RX_FLOW_CTRL_RFE is not updated correctly when 'ethtool -A autoneg off rx off tx off' command is issued. This fix ensures the flow control change is reflected directly in the GMAC_RX_FLOW_CTRL_RFE register. Fixes: 46f69ded988d ("net: stmmac: Use resolved link config in mac_link_up()") Cc: # 5.10.x Signed-off-by: Goh, Wei Sheng Signed-off-by: Noor Azura Ahmad Tarmizi Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c | 2 ++ drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 12 ++++++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index 2e71e510e127..5b052fdd2696 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -720,6 +720,8 @@ static void dwmac4_flow_ctrl(struct mac_device_info *hw, unsigned int duplex, if (fc & FLOW_RX) { pr_debug("\tReceive Flow-Control ON\n"); flow |= GMAC_RX_FLOW_CTRL_RFE; + } else { + pr_debug("\tReceive Flow-Control OFF\n"); } writel(flow, ioaddr + GMAC_RX_FLOW_CTRL); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 41e71a26b1ad..14ea0168b548 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1043,8 +1043,16 @@ static void stmmac_mac_link_up(struct phylink_config *config, ctrl |= priv->hw->link.duplex; /* Flow Control operation */ - if (tx_pause && rx_pause) - stmmac_mac_flow_ctrl(priv, duplex); + if (rx_pause && tx_pause) + priv->flow_ctrl = FLOW_AUTO; + else if (rx_pause && !tx_pause) + priv->flow_ctrl = FLOW_RX; + else if (!rx_pause && tx_pause) + priv->flow_ctrl = FLOW_TX; + else + priv->flow_ctrl = FLOW_OFF; + + stmmac_mac_flow_ctrl(priv, duplex); writel(ctrl, priv->ioaddr + MAC_CTRL_REG); From b79be962b567e857470b454d564baea5014ee94b Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Wed, 23 Nov 2022 17:55:06 +0800 Subject: [PATCH 1154/1477] mmc: mmc_test: Fix removal of debugfs file commit f4307b4df1c28842bb1950ff0e1b97e17031b17f upstream. In __mmc_test_register_dbgfs_file(), we need to assign 'file', as it's being used when removing the debugfs files when the mmc_test module is removed. Fixes: a04c50aaa916 ("mmc: core: no need to check return value of debugfs_create functions") Signed-off-by: Ye Bin Acked-by: Adrian Hunter Cc: stable@vger.kernel.org [Ulf: Re-wrote the commit msg] Link: https://lore.kernel.org/r/20221123095506.1965691-1-yebin@huaweicloud.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/mmc_test.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/core/mmc_test.c b/drivers/mmc/core/mmc_test.c index 152e7525ed33..b9b6f000154b 100644 --- a/drivers/mmc/core/mmc_test.c +++ b/drivers/mmc/core/mmc_test.c @@ -3195,7 +3195,8 @@ static int __mmc_test_register_dbgfs_file(struct mmc_card *card, struct mmc_test_dbgfs_file *df; if (card->debugfs_root) - debugfs_create_file(name, mode, card->debugfs_root, card, fops); + file = debugfs_create_file(name, mode, card->debugfs_root, + card, fops); df = kmalloc(sizeof(*df), GFP_KERNEL); if (!df) { From 46ee041cd655edfb43bb9c867fdb9e2151c2f52c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20L=C3=B6hle?= Date: Thu, 17 Nov 2022 14:42:09 +0000 Subject: [PATCH 1155/1477] mmc: core: Fix ambiguous TRIM and DISCARD arg commit 489d144563f23911262a652234b80c70c89c978b upstream. Clean up the MMC_TRIM_ARGS define that became ambiguous with DISCARD introduction. While at it, let's fix one usage where MMC_TRIM_ARGS falsely included DISCARD too. Fixes: b3bf915308ca ("mmc: core: new discard feature support at eMMC v4.5") Signed-off-by: Christian Loehle Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/11376b5714964345908f3990f17e0701@hyperstone.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/core.c | 9 +++++++-- include/linux/mmc/mmc.h | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 7d9ec91e081b..8f2465394253 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -1539,6 +1539,11 @@ void mmc_init_erase(struct mmc_card *card) card->pref_erase = 0; } +static bool is_trim_arg(unsigned int arg) +{ + return (arg & MMC_TRIM_OR_DISCARD_ARGS) && arg != MMC_DISCARD_ARG; +} + static unsigned int mmc_mmc_erase_timeout(struct mmc_card *card, unsigned int arg, unsigned int qty) { @@ -1837,7 +1842,7 @@ int mmc_erase(struct mmc_card *card, unsigned int from, unsigned int nr, !(card->ext_csd.sec_feature_support & EXT_CSD_SEC_ER_EN)) return -EOPNOTSUPP; - if (mmc_card_mmc(card) && (arg & MMC_TRIM_ARGS) && + if (mmc_card_mmc(card) && is_trim_arg(arg) && !(card->ext_csd.sec_feature_support & EXT_CSD_SEC_GB_CL_EN)) return -EOPNOTSUPP; @@ -1867,7 +1872,7 @@ int mmc_erase(struct mmc_card *card, unsigned int from, unsigned int nr, * identified by the card->eg_boundary flag. */ rem = card->erase_size - (from % card->erase_size); - if ((arg & MMC_TRIM_ARGS) && (card->eg_boundary) && (nr > rem)) { + if ((arg & MMC_TRIM_OR_DISCARD_ARGS) && card->eg_boundary && nr > rem) { err = mmc_do_erase(card, from, from + rem - 1, arg); from += rem; if ((err) || (to <= from)) diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h index d9a65c6a8816..545578fb814b 100644 --- a/include/linux/mmc/mmc.h +++ b/include/linux/mmc/mmc.h @@ -445,7 +445,7 @@ static inline bool mmc_ready_for_data(u32 status) #define MMC_SECURE_TRIM1_ARG 0x80000001 #define MMC_SECURE_TRIM2_ARG 0x80008000 #define MMC_SECURE_ARGS 0x80000000 -#define MMC_TRIM_ARGS 0x00008001 +#define MMC_TRIM_OR_DISCARD_ARGS 0x00008003 #define mmc_driver_type_mask(n) (1 << (n)) From ef767907e77d00f46d0b59e5530b4f2e2d34f5ba Mon Sep 17 00:00:00 2001 From: Sebastian Falbesoner Date: Mon, 21 Nov 2022 11:57:21 +0100 Subject: [PATCH 1156/1477] mmc: sdhci-esdhc-imx: correct CQHCI exit halt state check commit a3cab1d2132474969871b5d7f915c5c0167b48b0 upstream. With the current logic the "failed to exit halt state" error would be shown even if any other bit than CQHCI_HALT was set in the CQHCI_CTL register, since the right hand side is always true. Fix this by using the correct operator (bit-wise instead of logical AND) to only check for the halt bit flag, which was obviously intended here. Fixes: 85236d2be844 ("mmc: sdhci-esdhc-imx: clear the HALT bit when enable CQE") Signed-off-by: Sebastian Falbesoner Acked-by: Haibo Chen Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20221121105721.1903878-1-sebastian.falbesoner@gmail.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-esdhc-imx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index 1f1bdd34dd55..9e827bfe19ff 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -1449,7 +1449,7 @@ static void esdhc_cqe_enable(struct mmc_host *mmc) * system resume back. */ cqhci_writel(cq_host, 0, CQHCI_CTL); - if (cqhci_readl(cq_host, CQHCI_CTL) && CQHCI_HALT) + if (cqhci_readl(cq_host, CQHCI_CTL) & CQHCI_HALT) dev_err(mmc_dev(host->mmc), "failed to exit halt state when enable CQE\n"); From ed1966245307bad007b74068759d5af5aadf6b3f Mon Sep 17 00:00:00 2001 From: Wenchao Chen Date: Wed, 30 Nov 2022 20:13:28 +0800 Subject: [PATCH 1157/1477] mmc: sdhci-sprd: Fix no reset data and command after voltage switch commit dd30dcfa7a74a06f8dcdab260d8d5adf32f17333 upstream. After switching the voltage, no reset data and command will cause CMD2 timeout. Fixes: 29ca763fc26f ("mmc: sdhci-sprd: Add pin control support for voltage switch") Signed-off-by: Wenchao Chen Acked-by: Adrian Hunter Reviewed-by: Baolin Wang Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20221130121328.25553-1-wenchao.chen@unisoc.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-sprd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-sprd.c b/drivers/mmc/host/sdhci-sprd.c index 8575f4537e57..110ee0c804c8 100644 --- a/drivers/mmc/host/sdhci-sprd.c +++ b/drivers/mmc/host/sdhci-sprd.c @@ -457,7 +457,7 @@ static int sdhci_sprd_voltage_switch(struct mmc_host *mmc, struct mmc_ios *ios) } if (IS_ERR(sprd_host->pinctrl)) - return 0; + goto reset; switch (ios->signal_voltage) { case MMC_SIGNAL_VOLTAGE_180: @@ -485,6 +485,8 @@ static int sdhci_sprd_voltage_switch(struct mmc_host *mmc, struct mmc_ios *ios) /* Wait for 300 ~ 500 us for pin state stable */ usleep_range(300, 500); + +reset: sdhci_reset(host, SDHCI_RESET_CMD | SDHCI_RESET_DATA); return 0; From 940b774069f164b0c6f25b13946f1ef166e8d439 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 28 Nov 2022 15:32:56 +0200 Subject: [PATCH 1158/1477] mmc: sdhci: Fix voltage switch delay commit c981cdfb9925f64a364f13c2b4f98f877308a408 upstream. Commit 20b92a30b561 ("mmc: sdhci: update signal voltage switch code") removed voltage switch delays from sdhci because mmc core had been enhanced to support them. However that assumed that sdhci_set_ios() did a single clock change, which it did not, and so the delays in mmc core, which should have come after the first clock change, were not effective. Fix by avoiding re-configuring UHS and preset settings when the clock is turning on and the settings have not changed. That then also avoids the associated clock changes, so that then sdhci_set_ios() does a single clock change when voltage switching, and the mmc core delays become effective. To do that has meant keeping track of driver strength (host->drv_type), and cases of reinitialization (host->reinit_uhs). Note also, the 'turning_on_clk' restriction should not be necessary but is done to minimize the impact of the change on stable kernels. Fixes: 20b92a30b561 ("mmc: sdhci: update signal voltage switch code") Cc: stable@vger.kernel.org Signed-off-by: Adrian Hunter Link: https://lore.kernel.org/r/20221128133259.38305-2-adrian.hunter@intel.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci.c | 61 +++++++++++++++++++++++++++++++++++----- drivers/mmc/host/sdhci.h | 2 ++ 2 files changed, 56 insertions(+), 7 deletions(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index d42e86cdff12..133f0d376480 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -339,6 +339,7 @@ static void sdhci_init(struct sdhci_host *host, int soft) if (soft) { /* force clock reconfiguration */ host->clock = 0; + host->reinit_uhs = true; mmc->ops->set_ios(mmc, &mmc->ios); } } @@ -2258,11 +2259,46 @@ void sdhci_set_uhs_signaling(struct sdhci_host *host, unsigned timing) } EXPORT_SYMBOL_GPL(sdhci_set_uhs_signaling); +static bool sdhci_timing_has_preset(unsigned char timing) +{ + switch (timing) { + case MMC_TIMING_UHS_SDR12: + case MMC_TIMING_UHS_SDR25: + case MMC_TIMING_UHS_SDR50: + case MMC_TIMING_UHS_SDR104: + case MMC_TIMING_UHS_DDR50: + case MMC_TIMING_MMC_DDR52: + return true; + }; + return false; +} + +static bool sdhci_preset_needed(struct sdhci_host *host, unsigned char timing) +{ + return !(host->quirks2 & SDHCI_QUIRK2_PRESET_VALUE_BROKEN) && + sdhci_timing_has_preset(timing); +} + +static bool sdhci_presetable_values_change(struct sdhci_host *host, struct mmc_ios *ios) +{ + /* + * Preset Values are: Driver Strength, Clock Generator and SDCLK/RCLK + * Frequency. Check if preset values need to be enabled, or the Driver + * Strength needs updating. Note, clock changes are handled separately. + */ + return !host->preset_enabled && + (sdhci_preset_needed(host, ios->timing) || host->drv_type != ios->drv_type); +} + void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) { struct sdhci_host *host = mmc_priv(mmc); + bool reinit_uhs = host->reinit_uhs; + bool turning_on_clk = false; u8 ctrl; + host->reinit_uhs = false; + if (ios->power_mode == MMC_POWER_UNDEFINED) return; @@ -2288,6 +2324,8 @@ void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) sdhci_enable_preset_value(host, false); if (!ios->clock || ios->clock != host->clock) { + turning_on_clk = ios->clock && !host->clock; + host->ops->set_clock(host, ios->clock); host->clock = ios->clock; @@ -2314,6 +2352,17 @@ void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) host->ops->set_bus_width(host, ios->bus_width); + /* + * Special case to avoid multiple clock changes during voltage + * switching. + */ + if (!reinit_uhs && + turning_on_clk && + host->timing == ios->timing && + host->version >= SDHCI_SPEC_300 && + !sdhci_presetable_values_change(host, ios)) + return; + ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL); if (!(host->quirks & SDHCI_QUIRK_NO_HISPD_BIT)) { @@ -2357,6 +2406,7 @@ void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) } sdhci_writew(host, ctrl_2, SDHCI_HOST_CONTROL2); + host->drv_type = ios->drv_type; } else { /* * According to SDHC Spec v3.00, if the Preset Value @@ -2384,19 +2434,14 @@ void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) host->ops->set_uhs_signaling(host, ios->timing); host->timing = ios->timing; - if (!(host->quirks2 & SDHCI_QUIRK2_PRESET_VALUE_BROKEN) && - ((ios->timing == MMC_TIMING_UHS_SDR12) || - (ios->timing == MMC_TIMING_UHS_SDR25) || - (ios->timing == MMC_TIMING_UHS_SDR50) || - (ios->timing == MMC_TIMING_UHS_SDR104) || - (ios->timing == MMC_TIMING_UHS_DDR50) || - (ios->timing == MMC_TIMING_MMC_DDR52))) { + if (sdhci_preset_needed(host, ios->timing)) { u16 preset; sdhci_enable_preset_value(host, true); preset = sdhci_get_preset_value(host); ios->drv_type = FIELD_GET(SDHCI_PRESET_DRV_MASK, preset); + host->drv_type = ios->drv_type; } /* Re-enable SD Clock */ @@ -3707,6 +3752,7 @@ int sdhci_resume_host(struct sdhci_host *host) sdhci_init(host, 0); host->pwr = 0; host->clock = 0; + host->reinit_uhs = true; mmc->ops->set_ios(mmc, &mmc->ios); } else { sdhci_init(host, (host->mmc->pm_flags & MMC_PM_KEEP_POWER)); @@ -3769,6 +3815,7 @@ int sdhci_runtime_resume_host(struct sdhci_host *host, int soft_reset) /* Force clock and power re-program */ host->pwr = 0; host->clock = 0; + host->reinit_uhs = true; mmc->ops->start_signal_voltage_switch(mmc, &mmc->ios); mmc->ops->set_ios(mmc, &mmc->ios); diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index 8b1650f37fbb..4db57c3a8cd4 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -520,6 +520,8 @@ struct sdhci_host { unsigned int clock; /* Current clock (MHz) */ u8 pwr; /* Current voltage */ + u8 drv_type; /* Current UHS-I driver type */ + bool reinit_uhs; /* Force UHS-related re-initialization */ bool runtime_suspended; /* Host is runtime suspended */ bool bus_on; /* Bus power prevents runtime suspend */ From 8649c023c427c1c934043e3083e575a7e69d53bb Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Fri, 25 Nov 2022 12:07:49 +0000 Subject: [PATCH 1159/1477] drm/amdgpu: temporarily disable broken Clang builds due to blown stack-frame MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6f6cb1714365a07dbc66851879538df9f6969288 upstream. Patch series "Fix a bunch of allmodconfig errors", v2. Since b339ec9c229aa ("kbuild: Only default to -Werror if COMPILE_TEST") WERROR now defaults to COMPILE_TEST meaning that it's enabled for allmodconfig builds. This leads to some interesting build failures when using Clang, each resolved in this set. With this set applied, I am able to obtain a successful allmodconfig Arm build. This patch (of 2): calculate_bandwidth() is presently broken on all !(X86_64 || SPARC64 || ARM64) architectures built with Clang (all released versions), whereby the stack frame gets blown up to well over 5k. This would cause an immediate kernel panic on most architectures. We'll revert this when the following bug report has been resolved: https://github.com/llvm/llvm-project/issues/41896. Link: https://lkml.kernel.org/r/20221125120750.3537134-1-lee@kernel.org Link: https://lkml.kernel.org/r/20221125120750.3537134-2-lee@kernel.org Signed-off-by: Lee Jones Suggested-by: Arnd Bergmann Acked-by: Arnd Bergmann Cc: Alex Deucher Cc: "Christian König" Cc: Daniel Vetter Cc: David Airlie Cc: Harry Wentland Cc: Lee Jones Cc: Leo Li Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: "Pan, Xinhui" Cc: Rodrigo Siqueira Cc: Thomas Zimmermann Cc: Tom Rix Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/Kconfig | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig index f3274eb6b341..6c4cba09d23b 100644 --- a/drivers/gpu/drm/amd/display/Kconfig +++ b/drivers/gpu/drm/amd/display/Kconfig @@ -5,6 +5,7 @@ menu "Display Engine Configuration" config DRM_AMD_DC bool "AMD DC - Enable new display engine" default y + depends on BROKEN || !CC_IS_CLANG || X86_64 || SPARC64 || ARM64 select SND_HDA_COMPONENT if SND_HDA_CORE select DRM_AMD_DC_DCN if (X86 || PPC64) && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS) help @@ -12,6 +13,12 @@ config DRM_AMD_DC support for AMDGPU. This adds required support for Vega and Raven ASICs. + calculate_bandwidth() is presently broken on all !(X86_64 || SPARC64 || ARM64) + architectures built with Clang (all released versions), whereby the stack + frame gets blown up to well over 5k. This would cause an immediate kernel + panic on most architectures. We'll revert this when the following bug report + has been resolved: https://github.com/llvm/llvm-project/issues/41896. + config DRM_AMD_DC_DCN def_bool n help From 648b92e5760721fbf230e242950182d7e9222143 Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Mon, 21 Nov 2022 15:56:55 +0100 Subject: [PATCH 1160/1477] drm/i915: Never return 0 if not all requests retired commit 12b8b046e4c9de40fa59b6f067d6826f4e688f68 upstream. Users of intel_gt_retire_requests_timeout() expect 0 return value on success. However, we have no protection from passing back 0 potentially returned by a call to dma_fence_wait_timeout() when it succedes right after its timeout has expired. Replace 0 with -ETIME before potentially using the timeout value as return code, so -ETIME is returned if there are still some requests not retired after timeout, 0 otherwise. v3: Use conditional expression, more compact but also better reflecting intention standing behind the change. v2: Move the added lines down so flush_submission() is not affected. Fixes: f33a8a51602c ("drm/i915: Merge wait_for_timelines with retire_request") Signed-off-by: Janusz Krzysztofik Reviewed-by: Andrzej Hajda Cc: stable@vger.kernel.org # v5.5+ Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20221121145655.75141-3-janusz.krzysztofik@linux.intel.com (cherry picked from commit f301a29f143760ce8d3d6b6a8436d45d3448cde6) Signed-off-by: Tvrtko Ursulin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/gt/intel_gt_requests.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c index 66fcbf9d0fdd..cca285185dc4 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c @@ -200,7 +200,7 @@ out_active: spin_lock(&timelines->lock); if (flush_submission(gt, timeout)) /* Wait, there's more! */ active_count++; - return active_count ? timeout : 0; + return active_count ? timeout ?: -ETIME : 0; } int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout) From be111ebd8868d4b7c041cb3c6102e1ae27d6dc1d Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Wed, 23 Nov 2022 17:14:34 -0500 Subject: [PATCH 1161/1477] tracing: Free buffers when a used dynamic event is removed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 4313e5a613049dfc1819a6dfb5f94cf2caff9452 upstream. After 65536 dynamic events have been added and removed, the "type" field of the event then uses the first type number that is available (not currently used by other events). A type number is the identifier of the binary blobs in the tracing ring buffer (known as events) to map them to logic that can parse the binary blob. The issue is that if a dynamic event (like a kprobe event) is traced and is in the ring buffer, and then that event is removed (because it is dynamic, which means it can be created and destroyed), if another dynamic event is created that has the same number that new event's logic on parsing the binary blob will be used. To show how this can be an issue, the following can crash the kernel: # cd /sys/kernel/tracing # for i in `seq 65536`; do echo 'p:kprobes/foo do_sys_openat2 $arg1:u32' > kprobe_events # done For every iteration of the above, the writing to the kprobe_events will remove the old event and create a new one (with the same format) and increase the type number to the next available on until the type number reaches over 65535 which is the max number for the 16 bit type. After it reaches that number, the logic to allocate a new number simply looks for the next available number. When an dynamic event is removed, that number is then available to be reused by the next dynamic event created. That is, once the above reaches the max number, the number assigned to the event in that loop will remain the same. Now that means deleting one dynamic event and created another will reuse the previous events type number. This is where bad things can happen. After the above loop finishes, the kprobes/foo event which reads the do_sys_openat2 function call's first parameter as an integer. # echo 1 > kprobes/foo/enable # cat /etc/passwd > /dev/null # cat trace cat-2211 [005] .... 2007.849603: foo: (do_sys_openat2+0x0/0x130) arg1=4294967196 cat-2211 [005] .... 2007.849620: foo: (do_sys_openat2+0x0/0x130) arg1=4294967196 cat-2211 [005] .... 2007.849838: foo: (do_sys_openat2+0x0/0x130) arg1=4294967196 cat-2211 [005] .... 2007.849880: foo: (do_sys_openat2+0x0/0x130) arg1=4294967196 # echo 0 > kprobes/foo/enable Now if we delete the kprobe and create a new one that reads a string: # echo 'p:kprobes/foo do_sys_openat2 +0($arg2):string' > kprobe_events And now we can the trace: # cat trace sendmail-1942 [002] ..... 530.136320: foo: (do_sys_openat2+0x0/0x240) arg1= cat-2046 [004] ..... 530.930817: foo: (do_sys_openat2+0x0/0x240) arg1="������������������������������������������������������������������������������������������������" cat-2046 [004] ..... 530.930961: foo: (do_sys_openat2+0x0/0x240) arg1="������������������������������������������������������������������������������������������������" cat-2046 [004] ..... 530.934278: foo: (do_sys_openat2+0x0/0x240) arg1="������������������������������������������������������������������������������������������������" cat-2046 [004] ..... 530.934563: foo: (do_sys_openat2+0x0/0x240) arg1="������������������������������������������������������������������������������������������������" bash-1515 [007] ..... 534.299093: foo: (do_sys_openat2+0x0/0x240) arg1="kkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkk���������@��4Z����;Y�����U And dmesg has: ================================================================== BUG: KASAN: use-after-free in string+0xd4/0x1c0 Read of size 1 at addr ffff88805fdbbfa0 by task cat/2049 CPU: 0 PID: 2049 Comm: cat Not tainted 6.1.0-rc6-test+ #641 Hardware name: Hewlett-Packard HP Compaq Pro 6300 SFF/339A, BIOS K01 v03.03 07/14/2016 Call Trace: dump_stack_lvl+0x5b/0x77 print_report+0x17f/0x47b kasan_report+0xad/0x130 string+0xd4/0x1c0 vsnprintf+0x500/0x840 seq_buf_vprintf+0x62/0xc0 trace_seq_printf+0x10e/0x1e0 print_type_string+0x90/0xa0 print_kprobe_event+0x16b/0x290 print_trace_line+0x451/0x8e0 s_show+0x72/0x1f0 seq_read_iter+0x58e/0x750 seq_read+0x115/0x160 vfs_read+0x11d/0x460 ksys_read+0xa9/0x130 do_syscall_64+0x3a/0x90 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7fc2e972ade2 Code: c0 e9 b2 fe ff ff 50 48 8d 3d b2 3f 0a 00 e8 05 f0 01 00 0f 1f 44 00 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 0f 05 <48> 3d 00 f0 ff ff 77 56 c3 0f 1f 44 00 00 48 83 ec 28 48 89 54 24 RSP: 002b:00007ffc64e687c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 RAX: ffffffffffffffda RBX: 0000000000020000 RCX: 00007fc2e972ade2 RDX: 0000000000020000 RSI: 00007fc2e980d000 RDI: 0000000000000003 RBP: 00007fc2e980d000 R08: 00007fc2e980c010 R09: 0000000000000000 R10: 0000000000000022 R11: 0000000000000246 R12: 0000000000020f00 R13: 0000000000000003 R14: 0000000000020000 R15: 0000000000020000 The buggy address belongs to the physical page: page:ffffea00017f6ec0 refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x5fdbb flags: 0xfffffc0000000(node=0|zone=1|lastcpupid=0x1fffff) raw: 000fffffc0000000 0000000000000000 ffffea00017f6ec8 0000000000000000 raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88805fdbbe80: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ffff88805fdbbf00: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff >ffff88805fdbbf80: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ^ ffff88805fdbc000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ffff88805fdbc080: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ================================================================== This was found when Zheng Yejian sent a patch to convert the event type number assignment to use IDA, which gives the next available number, and this bug showed up in the fuzz testing by Yujie Liu and the kernel test robot. But after further analysis, I found that this behavior is the same as when the event type numbers go past the 16bit max (and the above shows that). As modules have a similar issue, but is dealt with by setting a "WAS_ENABLED" flag when a module event is enabled, and when the module is freed, if any of its events were enabled, the ring buffer that holds that event is also cleared, to prevent reading stale events. The same can be done for dynamic events. If any dynamic event that is being removed was enabled, then make sure the buffers they were enabled in are now cleared. Link: https://lkml.kernel.org/r/20221123171434.545706e3@gandalf.local.home Link: https://lore.kernel.org/all/20221110020319.1259291-1-zhengyejian1@huawei.com/ Cc: stable@vger.kernel.org Cc: Andrew Morton Depends-on: e18eb8783ec49 ("tracing: Add tracing_reset_all_online_cpus_unlocked() function") Depends-on: 5448d44c38557 ("tracing: Add unified dynamic event framework") Depends-on: 6212dd29683ee ("tracing/kprobes: Use dyn_event framework for kprobe events") Depends-on: 065e63f951432 ("tracing: Only have rmmod clear buffers that its events were active in") Depends-on: 575380da8b469 ("tracing: Only clear trace buffer on module unload if event was traced") Fixes: 77b44d1b7c283 ("tracing/kprobes: Rename Kprobe-tracer to kprobe-event") Reported-by: Zheng Yejian Reported-by: Yujie Liu Reported-by: kernel test robot Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_dynevent.c | 2 ++ kernel/trace/trace_events.c | 11 ++++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c index 5fa49cfd2bb6..d312a52a10a5 100644 --- a/kernel/trace/trace_dynevent.c +++ b/kernel/trace/trace_dynevent.c @@ -70,6 +70,7 @@ int dyn_event_release(int argc, char **argv, struct dyn_event_operations *type) if (ret) break; } + tracing_reset_all_online_cpus(); mutex_unlock(&event_mutex); return ret; @@ -165,6 +166,7 @@ int dyn_events_release_all(struct dyn_event_operations *type) break; } out: + tracing_reset_all_online_cpus(); mutex_unlock(&event_mutex); return ret; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 826ecf01e380..bac13f24a96e 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -2572,7 +2572,10 @@ static int probe_remove_event_call(struct trace_event_call *call) * TRACE_REG_UNREGISTER. */ if (file->flags & EVENT_FILE_FL_ENABLED) - return -EBUSY; + goto busy; + + if (file->flags & EVENT_FILE_FL_WAS_ENABLED) + tr->clear_trace = true; /* * The do_for_each_event_file_safe() is * a double loop. After finding the call for this @@ -2585,6 +2588,12 @@ static int probe_remove_event_call(struct trace_event_call *call) __trace_remove_event_call(call); return 0; + busy: + /* No need to clear the trace now */ + list_for_each_entry(tr, &ftrace_trace_arrays, list) { + tr->clear_trace = false; + } + return -EBUSY; } /* Remove an event_call */ From a2efc465245e535fefcad8c4ed5967254344257d Mon Sep 17 00:00:00 2001 From: Hao Xu Date: Fri, 19 Feb 2021 17:19:36 +0800 Subject: [PATCH 1162/1477] io_uring: don't hold uring_lock when calling io_run_task_work* commit 8bad28d8a305b0e5ae444c8c3051e8744f5a4296 upstream. Abaci reported the below issue: [ 141.400455] hrtimer: interrupt took 205853 ns [ 189.869316] process 'usr/local/ilogtail/ilogtail_0.16.26' started with executable stack [ 250.188042] [ 250.188327] ============================================ [ 250.189015] WARNING: possible recursive locking detected [ 250.189732] 5.11.0-rc4 #1 Not tainted [ 250.190267] -------------------------------------------- [ 250.190917] a.out/7363 is trying to acquire lock: [ 250.191506] ffff888114dbcbe8 (&ctx->uring_lock){+.+.}-{3:3}, at: __io_req_task_submit+0x29/0xa0 [ 250.192599] [ 250.192599] but task is already holding lock: [ 250.193309] ffff888114dbfbe8 (&ctx->uring_lock){+.+.}-{3:3}, at: __x64_sys_io_uring_register+0xad/0x210 [ 250.194426] [ 250.194426] other info that might help us debug this: [ 250.195238] Possible unsafe locking scenario: [ 250.195238] [ 250.196019] CPU0 [ 250.196411] ---- [ 250.196803] lock(&ctx->uring_lock); [ 250.197420] lock(&ctx->uring_lock); [ 250.197966] [ 250.197966] *** DEADLOCK *** [ 250.197966] [ 250.198837] May be due to missing lock nesting notation [ 250.198837] [ 250.199780] 1 lock held by a.out/7363: [ 250.200373] #0: ffff888114dbfbe8 (&ctx->uring_lock){+.+.}-{3:3}, at: __x64_sys_io_uring_register+0xad/0x210 [ 250.201645] [ 250.201645] stack backtrace: [ 250.202298] CPU: 0 PID: 7363 Comm: a.out Not tainted 5.11.0-rc4 #1 [ 250.203144] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 [ 250.203887] Call Trace: [ 250.204302] dump_stack+0xac/0xe3 [ 250.204804] __lock_acquire+0xab6/0x13a0 [ 250.205392] lock_acquire+0x2c3/0x390 [ 250.205928] ? __io_req_task_submit+0x29/0xa0 [ 250.206541] __mutex_lock+0xae/0x9f0 [ 250.207071] ? __io_req_task_submit+0x29/0xa0 [ 250.207745] ? 0xffffffffa0006083 [ 250.208248] ? __io_req_task_submit+0x29/0xa0 [ 250.208845] ? __io_req_task_submit+0x29/0xa0 [ 250.209452] ? __io_req_task_submit+0x5/0xa0 [ 250.210083] __io_req_task_submit+0x29/0xa0 [ 250.210687] io_async_task_func+0x23d/0x4c0 [ 250.211278] task_work_run+0x89/0xd0 [ 250.211884] io_run_task_work_sig+0x50/0xc0 [ 250.212464] io_sqe_files_unregister+0xb2/0x1f0 [ 250.213109] __io_uring_register+0x115a/0x1750 [ 250.213718] ? __x64_sys_io_uring_register+0xad/0x210 [ 250.214395] ? __fget_files+0x15a/0x260 [ 250.214956] __x64_sys_io_uring_register+0xbe/0x210 [ 250.215620] ? trace_hardirqs_on+0x46/0x110 [ 250.216205] do_syscall_64+0x2d/0x40 [ 250.216731] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 250.217455] RIP: 0033:0x7f0fa17e5239 [ 250.218034] Code: 01 00 48 81 c4 80 00 00 00 e9 f1 fe ff ff 0f 1f 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 3d 01 f0 ff ff 73 01 c3 48 8b 0d 27 ec 2c 00 f7 d8 64 89 01 48 [ 250.220343] RSP: 002b:00007f0fa1eeac48 EFLAGS: 00000246 ORIG_RAX: 00000000000001ab [ 250.221360] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f0fa17e5239 [ 250.222272] RDX: 0000000000000000 RSI: 0000000000000003 RDI: 0000000000000008 [ 250.223185] RBP: 00007f0fa1eeae20 R08: 0000000000000000 R09: 0000000000000000 [ 250.224091] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 [ 250.224999] R13: 0000000000021000 R14: 0000000000000000 R15: 00007f0fa1eeb700 This is caused by calling io_run_task_work_sig() to do work under uring_lock while the caller io_sqe_files_unregister() already held uring_lock. To fix this issue, briefly drop uring_lock when calling io_run_task_work_sig(), and there are two things to concern: - hold uring_lock in io_ring_ctx_free() around io_sqe_files_unregister() this is for consistency of lock/unlock. - add new fixed rsrc ref node before dropping uring_lock it's not safe to do io_uring_enter-->percpu_ref_get() with a dying one. - check if rsrc_data->refs is dying to avoid parallel io_sqe_files_unregister Reported-by: Abaci Fixes: 1ffc54220c44 ("io_uring: fix io_sqe_files_unregister() hangs") Suggested-by: Pavel Begunkov Signed-off-by: Hao Xu [axboe: fixes from Pavel folded in] Signed-off-by: Jens Axboe Signed-off-by: Samiullah Khawaja Signed-off-by: Greg Kroah-Hartman --- fs/io_uring.c | 82 ++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 61 insertions(+), 21 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index d1cb1addea96..c5c22b067cd8 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -217,6 +217,7 @@ struct fixed_file_data { struct completion done; struct list_head ref_list; spinlock_t lock; + bool quiesce; }; struct io_buffer { @@ -7105,41 +7106,79 @@ static void io_sqe_files_set_node(struct fixed_file_data *file_data, percpu_ref_get(&file_data->refs); } -static int io_sqe_files_unregister(struct io_ring_ctx *ctx) -{ - struct fixed_file_data *data = ctx->file_data; - struct fixed_file_ref_node *backup_node, *ref_node = NULL; - unsigned nr_tables, i; - int ret; - if (!data) - return -ENXIO; - backup_node = alloc_fixed_file_ref_node(ctx); - if (!backup_node) - return -ENOMEM; +static void io_sqe_files_kill_node(struct fixed_file_data *data) +{ + struct fixed_file_ref_node *ref_node = NULL; spin_lock_bh(&data->lock); ref_node = data->node; spin_unlock_bh(&data->lock); if (ref_node) percpu_ref_kill(&ref_node->refs); +} - percpu_ref_kill(&data->refs); +static int io_file_ref_quiesce(struct fixed_file_data *data, + struct io_ring_ctx *ctx) +{ + int ret; + struct fixed_file_ref_node *backup_node; - /* wait for all refs nodes to complete */ - flush_delayed_work(&ctx->file_put_work); + if (data->quiesce) + return -ENXIO; + + data->quiesce = true; do { + backup_node = alloc_fixed_file_ref_node(ctx); + if (!backup_node) + break; + + io_sqe_files_kill_node(data); + percpu_ref_kill(&data->refs); + flush_delayed_work(&ctx->file_put_work); + ret = wait_for_completion_interruptible(&data->done); if (!ret) break; + + percpu_ref_resurrect(&data->refs); + io_sqe_files_set_node(data, backup_node); + backup_node = NULL; + reinit_completion(&data->done); + mutex_unlock(&ctx->uring_lock); ret = io_run_task_work_sig(); - if (ret < 0) { - percpu_ref_resurrect(&data->refs); - reinit_completion(&data->done); - io_sqe_files_set_node(data, backup_node); - return ret; - } + mutex_lock(&ctx->uring_lock); + + if (ret < 0) + break; + backup_node = alloc_fixed_file_ref_node(ctx); + ret = -ENOMEM; + if (!backup_node) + break; } while (1); + data->quiesce = false; + + if (backup_node) + destroy_fixed_file_ref_node(backup_node); + return ret; +} + +static int io_sqe_files_unregister(struct io_ring_ctx *ctx) +{ + struct fixed_file_data *data = ctx->file_data; + unsigned nr_tables, i; + int ret; + + /* + * percpu_ref_is_dying() is to stop parallel files unregister + * Since we possibly drop uring lock later in this function to + * run task work. + */ + if (!data || percpu_ref_is_dying(&data->refs)) + return -ENXIO; + ret = io_file_ref_quiesce(data, ctx); + if (ret) + return ret; __io_sqe_files_unregister(ctx); nr_tables = DIV_ROUND_UP(ctx->nr_user_files, IORING_MAX_FILES_TABLE); @@ -7150,7 +7189,6 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx) kfree(data); ctx->file_data = NULL; ctx->nr_user_files = 0; - destroy_fixed_file_ref_node(backup_node); return 0; } @@ -8444,7 +8482,9 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx) css_put(ctx->sqo_blkcg_css); #endif + mutex_lock(&ctx->uring_lock); io_sqe_files_unregister(ctx); + mutex_unlock(&ctx->uring_lock); io_eventfd_unregister(ctx); io_destroy_buffers(ctx); From b50c9641897274c3faef5f95ac852f54b94be2e8 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 11 May 2022 14:41:36 +0100 Subject: [PATCH 1163/1477] ASoC: ops: Fix bounds check for _sx controls [ Upstream commit 698813ba8c580efb356ace8dbf55f61dac6063a8 ] For _sx controls the semantics of the max field is not the usual one, max is the number of steps rather than the maximum value. This means that our check in snd_soc_put_volsw_sx() needs to just check against the maximum value. Fixes: 4f1e50d6a9cf9c1b ("ASoC: ops: Reject out of bounds values in snd_soc_put_volsw_sx()") Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220511134137.169575-1-broonie@kernel.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/soc-ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c index 0f26d6c31ce5..5fdd96e77ef3 100644 --- a/sound/soc/soc-ops.c +++ b/sound/soc/soc-ops.c @@ -432,7 +432,7 @@ int snd_soc_put_volsw_sx(struct snd_kcontrol *kcontrol, val = ucontrol->value.integer.value[0]; if (mc->platform_max && val > mc->platform_max) return -EINVAL; - if (val > max - min) + if (val > max) return -EINVAL; if (val < 0) return -EINVAL; From 302edce1dd426f7e2abfed35563dec8d5aed0667 Mon Sep 17 00:00:00 2001 From: Maxim Korotkov Date: Thu, 17 Nov 2022 15:30:34 +0300 Subject: [PATCH 1164/1477] pinctrl: single: Fix potential division by zero [ Upstream commit 64c150339e7f6c5cbbe8c17a56ef2b3902612798 ] There is a possibility of dividing by zero due to the pcs->bits_per_pin if pcs->fmask() also has a value of zero and called fls from asm-generic/bitops/builtin-fls.h or arch/x86/include/asm/bitops.h. The function pcs_probe() has the branch that assigned to fmask 0 before pcs_allocate_pin_table() was called Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 4e7e8017a80e ("pinctrl: pinctrl-single: enhance to configure multiple pins of different modules") Signed-off-by: Maxim Korotkov Reviewed-by: Tony Lindgren Link: https://lore.kernel.org/r/20221117123034.27383-1-korotkov.maxim.s@gmail.com Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/pinctrl-single.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c index 17aa0d542d92..d139cd9e6d13 100644 --- a/drivers/pinctrl/pinctrl-single.c +++ b/drivers/pinctrl/pinctrl-single.c @@ -726,7 +726,7 @@ static int pcs_allocate_pin_table(struct pcs_device *pcs) mux_bytes = pcs->width / BITS_PER_BYTE; - if (pcs->bits_per_mux) { + if (pcs->bits_per_mux && pcs->fmask) { pcs->bits_per_pin = fls(pcs->fmask); nr_pins = (pcs->size * BITS_PER_BYTE) / pcs->bits_per_pin; num_pins_in_register = pcs->width / pcs->bits_per_pin; From 10ed7655a17f6a3eaecd1293830488259ccd5723 Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Thu, 1 Dec 2022 12:01:26 +0800 Subject: [PATCH 1165/1477] iommu/vt-d: Fix PCI device refcount leak in has_external_pci() [ Upstream commit afca9e19cc720bfafc75dc5ce429c185ca93f31d ] for_each_pci_dev() is implemented by pci_get_device(). The comment of pci_get_device() says that it will increase the reference count for the returned pci_dev and also decrease the reference count for the input pci_dev @from if it is not NULL. If we break for_each_pci_dev() loop with pdev not NULL, we need to call pci_dev_put() to decrease the reference count. Add the missing pci_dev_put() before 'return true' to avoid reference count leak. Fixes: 89a6079df791 ("iommu/vt-d: Force IOMMU on for platform opt in hint") Signed-off-by: Xiongfeng Wang Link: https://lore.kernel.org/r/20221121113649.190393-2-wangxiongfeng2@huawei.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/intel/iommu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index f23329b7f97c..47666c9b4ba1 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4893,8 +4893,10 @@ static inline bool has_external_pci(void) struct pci_dev *pdev = NULL; for_each_pci_dev(pdev) - if (pdev->external_facing) + if (pdev->external_facing) { + pci_dev_put(pdev); return true; + } return false; } From a5c65cd56aed027f8a97fda8b691caaeb66d115e Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Thu, 1 Dec 2022 12:01:27 +0800 Subject: [PATCH 1166/1477] iommu/vt-d: Fix PCI device refcount leak in dmar_dev_scope_init() [ Upstream commit 4bedbbd782ebbe7287231fea862c158d4f08a9e3 ] for_each_pci_dev() is implemented by pci_get_device(). The comment of pci_get_device() says that it will increase the reference count for the returned pci_dev and also decrease the reference count for the input pci_dev @from if it is not NULL. If we break for_each_pci_dev() loop with pdev not NULL, we need to call pci_dev_put() to decrease the reference count. Add the missing pci_dev_put() for the error path to avoid reference count leak. Fixes: 2e4552893038 ("iommu/vt-d: Unify the way to process DMAR device scope array") Signed-off-by: Xiongfeng Wang Link: https://lore.kernel.org/r/20221121113649.190393-3-wangxiongfeng2@huawei.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/intel/dmar.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 0bc497f4cb9f..a27765a7f6b7 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -816,6 +816,7 @@ int __init dmar_dev_scope_init(void) info = dmar_alloc_pci_notify_info(dev, BUS_NOTIFY_ADD_DEVICE); if (!info) { + pci_dev_put(dev); return dmar_dev_scope_status; } else { dmar_pci_bus_add_dev(info); From a1877001ed6d7be80aa6dbd2cb6698e74473205e Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 7 Jul 2021 15:38:08 +0200 Subject: [PATCH 1167/1477] parisc: Increase size of gcc stack frame check [ Upstream commit 55b70eed81cba1331773d4aaf5cba2bb07475cd8 ] parisc uses much bigger frames than other architectures, so increase the stack frame check value to avoid compiler warnings. Cc: Arnd Bergmann Cc: Abd-Alrhman Masalkhi Cc: Christoph Hellwig Signed-off-by: Helge Deller Stable-dep-of: 152fe65f300e ("Kconfig.debug: provide a little extra FRAME_WARN leeway when KASAN is enabled") Signed-off-by: Sasha Levin --- lib/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 6b0649d8bca7..8cf3b87edd63 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -298,7 +298,7 @@ config FRAME_WARN int "Warn for stack frames larger than" range 0 8192 default 2048 if GCC_PLUGIN_LATENT_ENTROPY - default 1280 if (!64BIT && PARISC) + default 1536 if (!64BIT && PARISC) default 1024 if (!64BIT && !PARISC) default 2048 if 64BIT help From fcf20da0997458bb3cb6a23d78577b3fa6d9a5db Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 24 Sep 2021 15:43:29 -0700 Subject: [PATCH 1168/1477] xtensa: increase size of gcc stack frame check [ Upstream commit 867050247e295cf20fce046a92a7e6491fcfe066 ] xtensa frame size is larger than the frame size for almost all other architectures. This results in more than 50 "the frame size of is larger than 1024 bytes" errors when trying to build xtensa:allmodconfig. Increase frame size for xtensa to 1536 bytes to avoid compile errors due to frame size limits. Link: https://lkml.kernel.org/r/20210912025235.3514761-1-linux@roeck-us.net Signed-off-by: Guenter Roeck Reviewed-by: Max Filippov Cc: Chris Zankel Cc: David Laight Cc: Masahiro Yamada Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Stable-dep-of: 152fe65f300e ("Kconfig.debug: provide a little extra FRAME_WARN leeway when KASAN is enabled") Signed-off-by: Sasha Levin --- lib/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 8cf3b87edd63..f6c50f8ee9a1 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -298,7 +298,7 @@ config FRAME_WARN int "Warn for stack frames larger than" range 0 8192 default 2048 if GCC_PLUGIN_LATENT_ENTROPY - default 1536 if (!64BIT && PARISC) + default 1536 if (!64BIT && (PARISC || XTENSA)) default 1024 if (!64BIT && !PARISC) default 2048 if 64BIT help From 19d91d3798e7f8725ee9e3a48b290c03964c09ae Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 19 Nov 2021 22:31:03 +0100 Subject: [PATCH 1169/1477] parisc: Increase FRAME_WARN to 2048 bytes on parisc [ Upstream commit 8d192bec534bd5b778135769a12e5f04580771f7 ] PA-RISC uses a much bigger frame size for functions than other architectures. So increase it to 2048 for 32- and 64-bit kernels. This fixes e.g. a warning in lib/xxhash.c. Reported-by: kernel test robot Signed-off-by: Helge Deller Stable-dep-of: 152fe65f300e ("Kconfig.debug: provide a little extra FRAME_WARN leeway when KASAN is enabled") Signed-off-by: Sasha Levin --- lib/Kconfig.debug | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index f6c50f8ee9a1..54502848e9dc 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -298,8 +298,9 @@ config FRAME_WARN int "Warn for stack frames larger than" range 0 8192 default 2048 if GCC_PLUGIN_LATENT_ENTROPY - default 1536 if (!64BIT && (PARISC || XTENSA)) - default 1024 if (!64BIT && !PARISC) + default 2048 if PARISC + default 1536 if (!64BIT && XTENSA) + default 1024 if !64BIT default 2048 if 64BIT help Tell gcc to warn at build time for stack frames larger than this. From f09ac62f0e3f1b4a490d9922a3ac95f554297828 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Fri, 25 Nov 2022 12:07:50 +0000 Subject: [PATCH 1170/1477] Kconfig.debug: provide a little extra FRAME_WARN leeway when KASAN is enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 152fe65f300e1819d59b80477d3e0999b4d5d7d2 ] When enabled, KASAN enlarges function's stack-frames. Pushing quite a few over the current threshold. This can mainly be seen on 32-bit architectures where the present limit (when !GCC) is a lowly 1024-Bytes. Link: https://lkml.kernel.org/r/20221125120750.3537134-3-lee@kernel.org Signed-off-by: Lee Jones Acked-by: Arnd Bergmann Cc: Alex Deucher Cc: "Christian König" Cc: Daniel Vetter Cc: David Airlie Cc: Harry Wentland Cc: Leo Li Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: "Pan, Xinhui" Cc: Rodrigo Siqueira Cc: Thomas Zimmermann Cc: Tom Rix Cc: Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- lib/Kconfig.debug | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 54502848e9dc..4aed8abb2022 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -300,6 +300,7 @@ config FRAME_WARN default 2048 if GCC_PLUGIN_LATENT_ENTROPY default 2048 if PARISC default 1536 if (!64BIT && XTENSA) + default 1280 if KASAN && !64BIT default 1024 if !64BIT default 2048 if 64BIT help From 7ca14c5f24dbc74253a7e14988d7ab63f0bb71a7 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 1 Apr 2022 10:33:43 +0300 Subject: [PATCH 1171/1477] selftests: net: add delete nexthop route warning test [ Upstream commit 392baa339c6a42a2cb088e5e5df2b59b8f89be24 ] Add a test which causes a WARNING on kernels which treat a nexthop route like a normal route when comparing for deletion and a device is specified. That is, a route is found but we hit a warning while matching it. The warning is from fib_info_nh() in include/net/nexthop.h because we run it on a fib_info with nexthop object. The call chain is: inet_rtm_delroute -> fib_table_delete -> fib_nh_match (called with a nexthop fib_info and also with fc_oif set thus calling fib_info_nh on the fib_info and triggering the warning). Repro steps: $ ip nexthop add id 12 via 172.16.1.3 dev veth1 $ ip route add 172.16.101.1/32 nhid 12 $ ip route delete 172.16.101.1/32 dev veth1 Signed-off-by: Nikolay Aleksandrov Reviewed-by: David Ahern Signed-off-by: David S. Miller Stable-dep-of: d5082d386eee ("ipv4: Fix route deletion when nexthop info is not specified") Signed-off-by: Sasha Levin --- tools/testing/selftests/net/fib_nexthops.sh | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index 4c7d33618437..8cca92c969b6 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -931,6 +931,20 @@ ipv4_fcnal() set +e check_nexthop "dev veth1" "" log_test $? 0 "Nexthops removed on admin down" + + # nexthop route delete warning: route add with nhid and delete + # using device + run_cmd "$IP li set dev veth1 up" + run_cmd "$IP nexthop add id 12 via 172.16.1.3 dev veth1" + out1=`dmesg | grep "WARNING:.*fib_nh_match.*" | wc -l` + run_cmd "$IP route add 172.16.101.1/32 nhid 12" + run_cmd "$IP route delete 172.16.101.1/32 dev veth1" + out2=`dmesg | grep "WARNING:.*fib_nh_match.*" | wc -l` + [ $out1 -eq $out2 ] + rc=$? + log_test $rc 0 "Delete nexthop route warning" + run_cmd "$IP ip route delete 172.16.101.1/32 nhid 12" + run_cmd "$IP ip nexthop del id 12" } ipv4_grp_fcnal() From 4919503426c93c5a254148db08796a695245bf94 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 1 Apr 2022 18:54:27 +0300 Subject: [PATCH 1172/1477] selftests: net: fix nexthop warning cleanup double ip typo [ Upstream commit 692930cc435099580a4b9e32fa781b0688c18439 ] I made a stupid typo when adding the nexthop route warning selftest and added both $IP and ip after it (double ip) on the cleanup path. The error doesn't show up when running the test, but obviously it doesn't cleanup properly after it. Fixes: 392baa339c6a ("selftests: net: add delete nexthop route warning test") Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller Stable-dep-of: d5082d386eee ("ipv4: Fix route deletion when nexthop info is not specified") Signed-off-by: Sasha Levin --- tools/testing/selftests/net/fib_nexthops.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index 8cca92c969b6..56d90335605d 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -943,8 +943,8 @@ ipv4_fcnal() [ $out1 -eq $out2 ] rc=$? log_test $rc 0 "Delete nexthop route warning" - run_cmd "$IP ip route delete 172.16.101.1/32 nhid 12" - run_cmd "$IP ip nexthop del id 12" + run_cmd "$IP route delete 172.16.101.1/32 nhid 12" + run_cmd "$IP nexthop del id 12" } ipv4_grp_fcnal() From 0b5394229ebae09afc07aabccb5ffd705ffd250e Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 6 Oct 2022 10:48:49 -0600 Subject: [PATCH 1173/1477] ipv4: Handle attempt to delete multipath route when fib_info contains an nh reference [ Upstream commit 61b91eb33a69c3be11b259c5ea484505cd79f883 ] Gwangun Jung reported a slab-out-of-bounds access in fib_nh_match: fib_nh_match+0xf98/0x1130 linux-6.0-rc7/net/ipv4/fib_semantics.c:961 fib_table_delete+0x5f3/0xa40 linux-6.0-rc7/net/ipv4/fib_trie.c:1753 inet_rtm_delroute+0x2b3/0x380 linux-6.0-rc7/net/ipv4/fib_frontend.c:874 Separate nexthop objects are mutually exclusive with the legacy multipath spec. Fix fib_nh_match to return if the config for the to be deleted route contains a multipath spec while the fib_info is using a nexthop object. Fixes: 493ced1ac47c ("ipv4: Allow routes to use nexthop objects") Fixes: 6bf92d70e690 ("net: ipv4: fix route with nexthop object delete warning") Reported-by: Gwangun Jung Signed-off-by: David Ahern Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Signed-off-by: David S. Miller Stable-dep-of: d5082d386eee ("ipv4: Fix route deletion when nexthop info is not specified") Signed-off-by: Sasha Levin --- net/ipv4/fib_semantics.c | 8 ++++---- tools/testing/selftests/net/fib_nexthops.sh | 5 +++++ 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 3824b7abecf7..f62b1739f63b 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -887,13 +887,13 @@ int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi, return 1; } + /* cannot match on nexthop object attributes */ + if (fi->nh) + return 1; + if (cfg->fc_oif || cfg->fc_gw_family) { struct fib_nh *nh; - /* cannot match on nexthop object attributes */ - if (fi->nh) - return 1; - nh = fib_info_nh(fi, 0); if (cfg->fc_encap) { if (fib_encap_match(net, cfg->fc_encap_type, diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index 56d90335605d..050c1e0f1b0f 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -945,6 +945,11 @@ ipv4_fcnal() log_test $rc 0 "Delete nexthop route warning" run_cmd "$IP route delete 172.16.101.1/32 nhid 12" run_cmd "$IP nexthop del id 12" + + run_cmd "$IP nexthop add id 21 via 172.16.1.6 dev veth1" + run_cmd "$IP ro add 172.16.101.0/24 nhid 21" + run_cmd "$IP ro del 172.16.101.0/24 nexthop via 172.16.1.7 dev veth1 nexthop via 172.16.1.8 dev veth1" + log_test $? 2 "Delete multipath route with only nh id based entry" } ipv4_grp_fcnal() From 481f9ed8ebdcca43064813c220c1c2835102b1a9 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 24 Nov 2022 23:09:32 +0200 Subject: [PATCH 1174/1477] ipv4: Fix route deletion when nexthop info is not specified [ Upstream commit d5082d386eee7e8ec46fa8581932c81a4961dcef ] When the kernel receives a route deletion request from user space it tries to delete a route that matches the route attributes specified in the request. If only prefix information is specified in the request, the kernel should delete the first matching FIB alias regardless of its associated FIB info. However, an error is currently returned when the FIB info is backed by a nexthop object: # ip nexthop add id 1 via 192.0.2.2 dev dummy10 # ip route add 198.51.100.0/24 nhid 1 # ip route del 198.51.100.0/24 RTNETLINK answers: No such process Fix by matching on such a FIB info when legacy nexthop attributes are not specified in the request. An earlier check already covers the case where a nexthop ID is specified in the request. Add tests that cover these flows. Before the fix: # ./fib_nexthops.sh -t ipv4_fcnal ... TEST: Delete route when not specifying nexthop attributes [FAIL] Tests passed: 11 Tests failed: 1 After the fix: # ./fib_nexthops.sh -t ipv4_fcnal ... TEST: Delete route when not specifying nexthop attributes [ OK ] Tests passed: 12 Tests failed: 0 No regressions in other tests: # ./fib_nexthops.sh ... Tests passed: 228 Tests failed: 0 # ./fib_tests.sh ... Tests passed: 186 Tests failed: 0 Cc: stable@vger.kernel.org Reported-by: Jonas Gorski Tested-by: Jonas Gorski Fixes: 493ced1ac47c ("ipv4: Allow routes to use nexthop objects") Fixes: 6bf92d70e690 ("net: ipv4: fix route with nexthop object delete warning") Fixes: 61b91eb33a69 ("ipv4: Handle attempt to delete multipath route when fib_info contains an nh reference") Signed-off-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20221124210932.2470010-1-idosch@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv4/fib_semantics.c | 8 +++++--- tools/testing/selftests/net/fib_nexthops.sh | 11 +++++++++++ 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index f62b1739f63b..52ec0c43e6b8 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -887,9 +887,11 @@ int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi, return 1; } - /* cannot match on nexthop object attributes */ - if (fi->nh) - return 1; + if (fi->nh) { + if (cfg->fc_oif || cfg->fc_gw_family || cfg->fc_mp) + return 1; + return 0; + } if (cfg->fc_oif || cfg->fc_gw_family) { struct fib_nh *nh; diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index 050c1e0f1b0f..7ece4131dc6f 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -950,6 +950,17 @@ ipv4_fcnal() run_cmd "$IP ro add 172.16.101.0/24 nhid 21" run_cmd "$IP ro del 172.16.101.0/24 nexthop via 172.16.1.7 dev veth1 nexthop via 172.16.1.8 dev veth1" log_test $? 2 "Delete multipath route with only nh id based entry" + + run_cmd "$IP nexthop add id 22 via 172.16.1.6 dev veth1" + run_cmd "$IP ro add 172.16.102.0/24 nhid 22" + run_cmd "$IP ro del 172.16.102.0/24 dev veth1" + log_test $? 2 "Delete route when specifying only nexthop device" + + run_cmd "$IP ro del 172.16.102.0/24 via 172.16.1.6" + log_test $? 2 "Delete route when specifying only gateway" + + run_cmd "$IP ro del 172.16.102.0/24" + log_test $? 0 "Delete route when not specifying nexthop attributes" } ipv4_grp_fcnal() From b7f7a0402eb7332e1f81241368766044f40475ac Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Sat, 8 Oct 2022 14:02:20 +0300 Subject: [PATCH 1175/1477] Revert "tty: n_gsm: avoid call of sleeping functions from atomic context" commit acdab4cb4ba7e5f94d2b422ebd7bf4bf68178fb2 upstream. This reverts commit 902e02ea9385373ce4b142576eef41c642703955. The above commit is reverted as the usage of tx_mutex seems not to solve the problem described in 902e02ea9385 ("tty: n_gsm: avoid call of sleeping functions from atomic context") and just moves the bug to another place. Signed-off-by: Fedor Pchelkin Signed-off-by: Alexey Khoroshilov Reviewed-by: Daniel Starke Link: https://lore.kernel.org/r/20221008110221.13645-2-pchelkin@ispras.ru Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_gsm.c | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index e85282825973..f5063499f9cf 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -235,7 +235,7 @@ struct gsm_mux { int old_c_iflag; /* termios c_iflag value before attach */ bool constipated; /* Asked by remote to shut up */ - struct mutex tx_mutex; + spinlock_t tx_lock; unsigned int tx_bytes; /* TX data outstanding */ #define TX_THRESH_HI 8192 #define TX_THRESH_LO 2048 @@ -820,14 +820,15 @@ static void __gsm_data_queue(struct gsm_dlci *dlci, struct gsm_msg *msg) * * Add data to the transmit queue and try and get stuff moving * out of the mux tty if not already doing so. Take the - * the gsm tx mutex and dlci lock. + * the gsm tx lock and dlci lock. */ static void gsm_data_queue(struct gsm_dlci *dlci, struct gsm_msg *msg) { - mutex_lock(&dlci->gsm->tx_mutex); + unsigned long flags; + spin_lock_irqsave(&dlci->gsm->tx_lock, flags); __gsm_data_queue(dlci, msg); - mutex_unlock(&dlci->gsm->tx_mutex); + spin_unlock_irqrestore(&dlci->gsm->tx_lock, flags); } /** @@ -839,7 +840,7 @@ static void gsm_data_queue(struct gsm_dlci *dlci, struct gsm_msg *msg) * is data. Keep to the MRU of the mux. This path handles the usual tty * interface which is a byte stream with optional modem data. * - * Caller must hold the tx_mutex of the mux. + * Caller must hold the tx_lock of the mux. */ static int gsm_dlci_data_output(struct gsm_mux *gsm, struct gsm_dlci *dlci) @@ -902,7 +903,7 @@ static int gsm_dlci_data_output(struct gsm_mux *gsm, struct gsm_dlci *dlci) * is data. Keep to the MRU of the mux. This path handles framed data * queued as skbuffs to the DLCI. * - * Caller must hold the tx_mutex of the mux. + * Caller must hold the tx_lock of the mux. */ static int gsm_dlci_data_output_framed(struct gsm_mux *gsm, @@ -918,7 +919,7 @@ static int gsm_dlci_data_output_framed(struct gsm_mux *gsm, if (dlci->adaption == 4) overhead = 1; - /* dlci->skb is locked by tx_mutex */ + /* dlci->skb is locked by tx_lock */ if (dlci->skb == NULL) { dlci->skb = skb_dequeue_tail(&dlci->skb_list); if (dlci->skb == NULL) @@ -1018,12 +1019,13 @@ static void gsm_dlci_data_sweep(struct gsm_mux *gsm) static void gsm_dlci_data_kick(struct gsm_dlci *dlci) { + unsigned long flags; int sweep; if (dlci->constipated) return; - mutex_lock(&dlci->gsm->tx_mutex); + spin_lock_irqsave(&dlci->gsm->tx_lock, flags); /* If we have nothing running then we need to fire up */ sweep = (dlci->gsm->tx_bytes < TX_THRESH_LO); if (dlci->gsm->tx_bytes == 0) { @@ -1034,7 +1036,7 @@ static void gsm_dlci_data_kick(struct gsm_dlci *dlci) } if (sweep) gsm_dlci_data_sweep(dlci->gsm); - mutex_unlock(&dlci->gsm->tx_mutex); + spin_unlock_irqrestore(&dlci->gsm->tx_lock, flags); } /* @@ -1256,6 +1258,7 @@ static void gsm_control_message(struct gsm_mux *gsm, unsigned int command, const u8 *data, int clen) { u8 buf[1]; + unsigned long flags; switch (command) { case CMD_CLD: { @@ -1277,9 +1280,9 @@ static void gsm_control_message(struct gsm_mux *gsm, unsigned int command, gsm->constipated = false; gsm_control_reply(gsm, CMD_FCON, NULL, 0); /* Kick the link in case it is idling */ - mutex_lock(&gsm->tx_mutex); + spin_lock_irqsave(&gsm->tx_lock, flags); gsm_data_kick(gsm, NULL); - mutex_unlock(&gsm->tx_mutex); + spin_unlock_irqrestore(&gsm->tx_lock, flags); break; case CMD_FCOFF: /* Modem wants us to STFU */ @@ -2225,7 +2228,6 @@ static void gsm_free_mux(struct gsm_mux *gsm) break; } } - mutex_destroy(&gsm->tx_mutex); mutex_destroy(&gsm->mutex); kfree(gsm->txframe); kfree(gsm->buf); @@ -2297,12 +2299,12 @@ static struct gsm_mux *gsm_alloc_mux(void) } spin_lock_init(&gsm->lock); mutex_init(&gsm->mutex); - mutex_init(&gsm->tx_mutex); kref_init(&gsm->ref); INIT_LIST_HEAD(&gsm->tx_list); timer_setup(&gsm->t2_timer, gsm_control_retransmit, 0); init_waitqueue_head(&gsm->event); spin_lock_init(&gsm->control_lock); + spin_lock_init(&gsm->tx_lock); gsm->t1 = T1; gsm->t2 = T2; @@ -2327,7 +2329,6 @@ static struct gsm_mux *gsm_alloc_mux(void) } spin_unlock(&gsm_mux_lock); if (i == MAX_MUX) { - mutex_destroy(&gsm->tx_mutex); mutex_destroy(&gsm->mutex); kfree(gsm->txframe); kfree(gsm->buf); @@ -2652,15 +2653,16 @@ static int gsmld_open(struct tty_struct *tty) static void gsmld_write_wakeup(struct tty_struct *tty) { struct gsm_mux *gsm = tty->disc_data; + unsigned long flags; /* Queue poll */ clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); - mutex_lock(&gsm->tx_mutex); + spin_lock_irqsave(&gsm->tx_lock, flags); gsm_data_kick(gsm, NULL); if (gsm->tx_bytes < TX_THRESH_LO) { gsm_dlci_data_sweep(gsm); } - mutex_unlock(&gsm->tx_mutex); + spin_unlock_irqrestore(&gsm->tx_lock, flags); } /** @@ -2703,6 +2705,7 @@ static ssize_t gsmld_write(struct tty_struct *tty, struct file *file, const unsigned char *buf, size_t nr) { struct gsm_mux *gsm = tty->disc_data; + unsigned long flags; int space; int ret; @@ -2710,13 +2713,13 @@ static ssize_t gsmld_write(struct tty_struct *tty, struct file *file, return -ENODEV; ret = -ENOBUFS; - mutex_lock(&gsm->tx_mutex); + spin_lock_irqsave(&gsm->tx_lock, flags); space = tty_write_room(tty); if (space >= nr) ret = tty->ops->write(tty, buf, nr); else set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); - mutex_unlock(&gsm->tx_mutex); + spin_unlock_irqrestore(&gsm->tx_lock, flags); return ret; } From 5e3d4a68e2e11dbe561fa7de919ff9c82547a215 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Thu, 1 Dec 2022 12:19:35 -0800 Subject: [PATCH 1176/1477] x86/tsx: Add a feature bit for TSX control MSR support commit aaa65d17eec372c6a9756833f3964ba05b05ea14 upstream. Support for the TSX control MSR is enumerated in MSR_IA32_ARCH_CAPABILITIES. This is different from how other CPU features are enumerated i.e. via CPUID. Currently, a call to tsx_ctrl_is_supported() is required for enumerating the feature. In the absence of a feature bit for TSX control, any code that relies on checking feature bits directly will not work. In preparation for adding a feature bit check in MSR save/restore during suspend/resume, set a new feature bit X86_FEATURE_TSX_CTRL when MSR_IA32_TSX_CTRL is present. [ bp: Remove tsx_ctrl_is_supported()] [Pawan: Resolved conflicts in backport; Removed parts of commit message referring to removed function tsx_ctrl_is_supported()] Suggested-by: Andrew Cooper Signed-off-by: Pawan Gupta Signed-off-by: Borislav Petkov Reviewed-by: Dave Hansen Cc: Link: https://lore.kernel.org/r/de619764e1d98afbb7a5fa58424f1278ede37b45.1668539735.git.pawan.kumar.gupta@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kernel/cpu/tsx.c | 33 +++++++++++++----------------- 2 files changed, 15 insertions(+), 19 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index f507ad7c7fd7..1fcda8263554 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -300,6 +300,7 @@ #define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */ #define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */ #define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */ +#define X86_FEATURE_MSR_TSX_CTRL (11*32+18) /* "" MSR IA32_TSX_CTRL (Intel) implemented */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c index e2ad30e474f8..da06bbb5d68e 100644 --- a/arch/x86/kernel/cpu/tsx.c +++ b/arch/x86/kernel/cpu/tsx.c @@ -58,24 +58,6 @@ void tsx_enable(void) wrmsrl(MSR_IA32_TSX_CTRL, tsx); } -static bool __init tsx_ctrl_is_supported(void) -{ - u64 ia32_cap = x86_read_arch_cap_msr(); - - /* - * TSX is controlled via MSR_IA32_TSX_CTRL. However, support for this - * MSR is enumerated by ARCH_CAP_TSX_MSR bit in MSR_IA32_ARCH_CAPABILITIES. - * - * TSX control (aka MSR_IA32_TSX_CTRL) is only available after a - * microcode update on CPUs that have their MSR_IA32_ARCH_CAPABILITIES - * bit MDS_NO=1. CPUs with MDS_NO=0 are not planned to get - * MSR_IA32_TSX_CTRL support even after a microcode update. Thus, - * tsx= cmdline requests will do nothing on CPUs without - * MSR_IA32_TSX_CTRL support. - */ - return !!(ia32_cap & ARCH_CAP_TSX_CTRL_MSR); -} - static enum tsx_ctrl_states x86_get_tsx_auto_mode(void) { if (boot_cpu_has_bug(X86_BUG_TAA)) @@ -89,9 +71,22 @@ void __init tsx_init(void) char arg[5] = {}; int ret; - if (!tsx_ctrl_is_supported()) + /* + * TSX is controlled via MSR_IA32_TSX_CTRL. However, support for this + * MSR is enumerated by ARCH_CAP_TSX_MSR bit in MSR_IA32_ARCH_CAPABILITIES. + * + * TSX control (aka MSR_IA32_TSX_CTRL) is only available after a + * microcode update on CPUs that have their MSR_IA32_ARCH_CAPABILITIES + * bit MDS_NO=1. CPUs with MDS_NO=0 are not planned to get + * MSR_IA32_TSX_CTRL support even after a microcode update. Thus, + * tsx= cmdline requests will do nothing on CPUs without + * MSR_IA32_TSX_CTRL support. + */ + if (!(x86_read_arch_cap_msr() & ARCH_CAP_TSX_CTRL_MSR)) return; + setup_force_cpu_cap(X86_FEATURE_MSR_TSX_CTRL); + ret = cmdline_find_option(boot_command_line, "tsx", arg, sizeof(arg)); if (ret >= 0) { if (!strcmp(arg, "on")) { From 7462cd2443bc30488d29428ff8e13f96f8262f14 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Thu, 1 Dec 2022 12:19:41 -0800 Subject: [PATCH 1177/1477] x86/pm: Add enumeration check before spec MSRs save/restore setup commit 50bcceb7724e471d9b591803889df45dcbb584bc upstream. pm_save_spec_msr() keeps a list of all the MSRs which _might_ need to be saved and restored at hibernate and resume. However, it has zero awareness of CPU support for these MSRs. It mostly works by unconditionally attempting to manipulate these MSRs and relying on rdmsrl_safe() being able to handle a #GP on CPUs where the support is unavailable. However, it's possible for reads (RDMSR) to be supported for a given MSR while writes (WRMSR) are not. In this case, msr_build_context() sees a successful read (RDMSR) and marks the MSR as valid. Then, later, a write (WRMSR) fails, producing a nasty (but harmless) error message. This causes restore_processor_state() to try and restore it, but writing this MSR is not allowed on the Intel Atom N2600 leading to: unchecked MSR access error: WRMSR to 0x122 (tried to write 0x0000000000000002) \ at rIP: 0xffffffff8b07a574 (native_write_msr+0x4/0x20) Call Trace: restore_processor_state x86_acpi_suspend_lowlevel acpi_suspend_enter suspend_devices_and_enter pm_suspend.cold state_store kernfs_fop_write_iter vfs_write ksys_write do_syscall_64 ? do_syscall_64 ? up_read ? lock_is_held_type ? asm_exc_page_fault ? lockdep_hardirqs_on entry_SYSCALL_64_after_hwframe To fix this, add the corresponding X86_FEATURE bit for each MSR. Avoid trying to manipulate the MSR when the feature bit is clear. This required adding a X86_FEATURE bit for MSRs that do not have one already, but it's a small price to pay. [ bp: Move struct msr_enumeration inside the only function that uses it. ] [Pawan: Resolve build issue in backport] Fixes: 73924ec4d560 ("x86/pm: Save the MSR validity status at context setup") Reported-by: Hans de Goede Signed-off-by: Pawan Gupta Signed-off-by: Borislav Petkov Reviewed-by: Dave Hansen Acked-by: Rafael J. Wysocki Cc: Link: https://lore.kernel.org/r/c24db75d69df6e66c0465e13676ad3f2837a2ed8.1668539735.git.pawan.kumar.gupta@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/power/cpu.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 61581c45788e..4e4e76ecd3ec 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -516,16 +516,23 @@ static int pm_cpu_check(const struct x86_cpu_id *c) static void pm_save_spec_msr(void) { - u32 spec_msr_id[] = { - MSR_IA32_SPEC_CTRL, - MSR_IA32_TSX_CTRL, - MSR_TSX_FORCE_ABORT, - MSR_IA32_MCU_OPT_CTRL, - MSR_AMD64_LS_CFG, - MSR_AMD64_DE_CFG, + struct msr_enumeration { + u32 msr_no; + u32 feature; + } msr_enum[] = { + { MSR_IA32_SPEC_CTRL, X86_FEATURE_MSR_SPEC_CTRL }, + { MSR_IA32_TSX_CTRL, X86_FEATURE_MSR_TSX_CTRL }, + { MSR_TSX_FORCE_ABORT, X86_FEATURE_TSX_FORCE_ABORT }, + { MSR_IA32_MCU_OPT_CTRL, X86_FEATURE_SRBDS_CTRL }, + { MSR_AMD64_LS_CFG, X86_FEATURE_LS_CFG_SSBD }, + { MSR_AMD64_DE_CFG, X86_FEATURE_LFENCE_RDTSC }, }; + int i; - msr_build_context(spec_msr_id, ARRAY_SIZE(spec_msr_id)); + for (i = 0; i < ARRAY_SIZE(msr_enum); i++) { + if (boot_cpu_has(msr_enum[i].feature)) + msr_build_context(&msr_enum[i].msr_no, 1); + } } static int pm_check_save_msr(void) From df7613659872249d9bc5fbc557baec3cab4a76e6 Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Wed, 9 Nov 2022 11:22:50 +0000 Subject: [PATCH 1178/1477] i2c: npcm7xx: Fix error handling in npcm_i2c_init() [ Upstream commit 145900cf91c4b32ac05dbc8675a0c7f4a278749d ] A problem about i2c-npcm7xx create debugfs failed is triggered with the following log given: [ 173.827310] debugfs: Directory 'npcm_i2c' with parent '/' already present! The reason is that npcm_i2c_init() returns platform_driver_register() directly without checking its return value, if platform_driver_register() failed, it returns without destroy the newly created debugfs, resulting the debugfs of npcm_i2c can never be created later. npcm_i2c_init() debugfs_create_dir() # create debugfs directory platform_driver_register() driver_register() bus_add_driver() priv = kzalloc(...) # OOM happened # return without destroy debugfs directory Fix by removing debugfs when platform_driver_register() returns error. Fixes: 56a1485b102e ("i2c: npcm7xx: Add Nuvoton NPCM I2C controller driver") Signed-off-by: Yuan Can Reviewed-by: Tali Perry Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-npcm7xx.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-npcm7xx.c b/drivers/i2c/busses/i2c-npcm7xx.c index 31e3d2c9d6bc..c1b679737240 100644 --- a/drivers/i2c/busses/i2c-npcm7xx.c +++ b/drivers/i2c/busses/i2c-npcm7xx.c @@ -2362,8 +2362,17 @@ static struct platform_driver npcm_i2c_bus_driver = { static int __init npcm_i2c_init(void) { + int ret; + npcm_i2c_debugfs_dir = debugfs_create_dir("npcm_i2c", NULL); - return platform_driver_register(&npcm_i2c_bus_driver); + + ret = platform_driver_register(&npcm_i2c_bus_driver); + if (ret) { + debugfs_remove_recursive(npcm_i2c_debugfs_dir); + return ret; + } + + return 0; } module_init(npcm_i2c_init); From 63e72417a1ad00ab1eaa2e550529c6c043f40515 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Thu, 10 Nov 2022 00:59:02 +0100 Subject: [PATCH 1179/1477] i2c: imx: Only DMA messages with I2C_M_DMA_SAFE flag set [ Upstream commit d36678f7905cbd1dc55a8a96e066dafd749d4600 ] Recent changes to the DMA code has resulting in the IMX driver failing I2C transfers when the buffer has been vmalloc. Only perform DMA transfers if the message has the I2C_M_DMA_SAFE flag set, indicating the client is providing a buffer which is DMA safe. This is a minimal fix for stable. The I2C core provides helpers to allocate a bounce buffer. For a fuller fix the master should make use of these helpers. Fixes: 4544b9f25e70 ("dma-mapping: Add vmap checks to dma_map_single()") Signed-off-by: Andrew Lunn Acked-by: Oleksij Rempel Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-imx.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index be4ad516293b..b4fb4336b4e8 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -843,7 +843,8 @@ static int i2c_imx_read(struct imx_i2c_struct *i2c_imx, struct i2c_msg *msgs, int i, result; unsigned int temp; int block_data = msgs->flags & I2C_M_RECV_LEN; - int use_dma = i2c_imx->dma && msgs->len >= DMA_THRESHOLD && !block_data; + int use_dma = i2c_imx->dma && msgs->flags & I2C_M_DMA_SAFE && + msgs->len >= DMA_THRESHOLD && !block_data; dev_dbg(&i2c_imx->adapter.dev, "<%s> write slave address: addr=0x%x\n", @@ -1011,7 +1012,8 @@ static int i2c_imx_xfer_common(struct i2c_adapter *adapter, result = i2c_imx_read(i2c_imx, &msgs[i], is_lastmsg, atomic); } else { if (!atomic && - i2c_imx->dma && msgs[i].len >= DMA_THRESHOLD) + i2c_imx->dma && msgs[i].len >= DMA_THRESHOLD && + msgs[i].flags & I2C_M_DMA_SAFE) result = i2c_imx_dma_write(i2c_imx, &msgs[i]); else result = i2c_imx_write(i2c_imx, &msgs[i], atomic); From f3b76b4d38fd455e95bc6aef917220b354b8b826 Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Wed, 16 Nov 2022 16:37:36 -0700 Subject: [PATCH 1180/1477] ACPI: HMAT: remove unnecessary variable initialization [ Upstream commit 14f16d47561ba9249efc6c2db9d47ed56841f070 ] In hmat_register_target_initiators(), the variable 'best' gets initialized in the outer per-locality-type for loop. The initialization just before setting up 'Access 1' targets was unnecessary. Remove it. Cc: Rafael J. Wysocki Cc: Liu Shixin Cc: Dan Williams Acked-by: Kirill A. Shutemov Acked-by: Rafael J. Wysocki Signed-off-by: Vishal Verma Link: https://lore.kernel.org/r/20221116-acpi_hmat_fix-v2-1-3712569be691@intel.com Signed-off-by: Dan Williams Stable-dep-of: 48d4180939e1 ("ACPI: HMAT: Fix initiator registration for single-initiator systems") Signed-off-by: Sasha Levin --- drivers/acpi/numa/hmat.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c index 137a5dd880c2..fd202689dcda 100644 --- a/drivers/acpi/numa/hmat.c +++ b/drivers/acpi/numa/hmat.c @@ -645,7 +645,6 @@ static void hmat_register_target_initiators(struct memory_target *target) /* Access 1 ignores Generic Initiators */ bitmap_zero(p_nodes, MAX_NUMNODES); list_sort(p_nodes, &initiators, initiator_cmp); - best = 0; for (i = WRITE_LATENCY; i <= READ_BANDWIDTH; i++) { loc = localities_types[i]; if (!loc) From f075cf139f558fbfe56e9ae0ade8422d01961c3c Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Wed, 16 Nov 2022 16:37:37 -0700 Subject: [PATCH 1181/1477] ACPI: HMAT: Fix initiator registration for single-initiator systems [ Upstream commit 48d4180939e12c4bd2846f984436d895bb9699ed ] In a system with a single initiator node, and one or more memory-only 'target' nodes, the memory-only node(s) would fail to register their initiator node correctly. i.e. in sysfs: # ls /sys/devices/system/node/node0/access0/targets/ node0 Where as the correct behavior should be: # ls /sys/devices/system/node/node0/access0/targets/ node0 node1 This happened because hmat_register_target_initiators() uses list_sort() to sort the initiator list, but the sort comparision function (initiator_cmp()) is overloaded to also set the node mask's bits. In a system with a single initiator, the list is singular, and list_sort elides the comparision helper call. Thus the node mask never gets set, and the subsequent search for the best initiator comes up empty. Add a new helper to consume the sorted initiator list, and generate the nodemask, decoupling it from the overloaded initiator_cmp() comparision callback. This prevents the singular list corner case naturally, and makes the code easier to follow as well. Cc: Cc: Rafael J. Wysocki Cc: Liu Shixin Cc: Dan Williams Cc: Kirill A. Shutemov Reported-by: Chris Piper Signed-off-by: Vishal Verma Acked-by: Rafael J. Wysocki Acked-by: Kirill A. Shutemov Link: https://lore.kernel.org/r/20221116-acpi_hmat_fix-v2-2-3712569be691@intel.com Signed-off-by: Dan Williams Signed-off-by: Sasha Levin --- drivers/acpi/numa/hmat.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c index fd202689dcda..26453a945da4 100644 --- a/drivers/acpi/numa/hmat.c +++ b/drivers/acpi/numa/hmat.c @@ -563,17 +563,26 @@ static int initiator_cmp(void *priv, const struct list_head *a, { struct memory_initiator *ia; struct memory_initiator *ib; - unsigned long *p_nodes = priv; ia = list_entry(a, struct memory_initiator, node); ib = list_entry(b, struct memory_initiator, node); - set_bit(ia->processor_pxm, p_nodes); - set_bit(ib->processor_pxm, p_nodes); - return ia->processor_pxm - ib->processor_pxm; } +static int initiators_to_nodemask(unsigned long *p_nodes) +{ + struct memory_initiator *initiator; + + if (list_empty(&initiators)) + return -ENXIO; + + list_for_each_entry(initiator, &initiators, node) + set_bit(initiator->processor_pxm, p_nodes); + + return 0; +} + static void hmat_register_target_initiators(struct memory_target *target) { static DECLARE_BITMAP(p_nodes, MAX_NUMNODES); @@ -610,7 +619,10 @@ static void hmat_register_target_initiators(struct memory_target *target) * initiators. */ bitmap_zero(p_nodes, MAX_NUMNODES); - list_sort(p_nodes, &initiators, initiator_cmp); + list_sort(NULL, &initiators, initiator_cmp); + if (initiators_to_nodemask(p_nodes) < 0) + return; + if (!access0done) { for (i = WRITE_LATENCY; i <= READ_BANDWIDTH; i++) { loc = localities_types[i]; @@ -644,7 +656,9 @@ static void hmat_register_target_initiators(struct memory_target *target) /* Access 1 ignores Generic Initiators */ bitmap_zero(p_nodes, MAX_NUMNODES); - list_sort(p_nodes, &initiators, initiator_cmp); + if (initiators_to_nodemask(p_nodes) < 0) + return; + for (i = WRITE_LATENCY; i <= READ_BANDWIDTH; i++) { loc = localities_types[i]; if (!loc) From 5a6f935ef34e900a97064b070df4c945de631c2e Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Tue, 22 Nov 2022 12:16:21 +0000 Subject: [PATCH 1182/1477] Revert "clocksource/drivers/riscv: Events are stopped during CPU suspend" [ Upstream commit d9f15a9de44affe733e34f93bc184945ba277e6d ] This reverts commit 232ccac1bd9b5bfe73895f527c08623e7fa0752d. On the subject of suspend, the RISC-V SBI spec states: This does not cover whether any given events actually reach the hart or not, just what the hart will do if it receives an event. On PolarFire SoC, and potentially other SiFive based implementations, events from the RISC-V timer do reach a hart during suspend. This is not the case for the implementation on the Allwinner D1 - there timer events are not received during suspend. To fix this, the CLOCK_EVT_FEAT_C3STOP (mis)feature was enabled for the timer driver - but this has broken both RCU stall detection and timers generally on PolarFire SoC and potentially other SiFive based implementations. If an AXI read to the PCIe controller on PolarFire SoC times out, the system will stall, however, with CLOCK_EVT_FEAT_C3STOP active, the system just locks up without RCU stalling: io scheduler mq-deadline registered io scheduler kyber registered microchip-pcie 2000000000.pcie: host bridge /soc/pcie@2000000000 ranges: microchip-pcie 2000000000.pcie: MEM 0x2008000000..0x2087ffffff -> 0x0008000000 microchip-pcie 2000000000.pcie: sec error in pcie2axi buffer microchip-pcie 2000000000.pcie: ded error in pcie2axi buffer microchip-pcie 2000000000.pcie: axi read request error microchip-pcie 2000000000.pcie: axi read timeout microchip-pcie 2000000000.pcie: sec error in pcie2axi buffer microchip-pcie 2000000000.pcie: ded error in pcie2axi buffer microchip-pcie 2000000000.pcie: sec error in pcie2axi buffer microchip-pcie 2000000000.pcie: ded error in pcie2axi buffer microchip-pcie 2000000000.pcie: sec error in pcie2axi buffer microchip-pcie 2000000000.pcie: ded error in pcie2axi buffer Freeing initrd memory: 7332K Similarly issues were reported with clock_nanosleep() - with a test app that sleeps each cpu for 6, 5, 4, 3 ms respectively, HZ=250 & the blamed commit in place, the sleep times are rounded up to the next jiffy: == CPU: 1 == == CPU: 2 == == CPU: 3 == == CPU: 4 == Mean: 7.974992 Mean: 7.976534 Mean: 7.962591 Mean: 3.952179 Std Dev: 0.154374 Std Dev: 0.156082 Std Dev: 0.171018 Std Dev: 0.076193 Hi: 9.472000 Hi: 10.495000 Hi: 8.864000 Hi: 4.736000 Lo: 6.087000 Lo: 6.380000 Lo: 4.872000 Lo: 3.403000 Samples: 521 Samples: 521 Samples: 521 Samples: 521 Fortunately, the D1 has a second timer, which is "currently used in preference to the RISC-V/SBI timer driver" so a revert here does not hurt operation of D1 in its current form. Ultimately, a DeviceTree property (or node) will be added to encode the behaviour of the timers, but until then revert the addition of CLOCK_EVT_FEAT_C3STOP. Fixes: 232ccac1bd9b ("clocksource/drivers/riscv: Events are stopped during CPU suspend") Signed-off-by: Conor Dooley Signed-off-by: Thomas Gleixner Reviewed-by: Palmer Dabbelt Acked-by: Palmer Dabbelt Acked-by: Samuel Holland Link: https://lore.kernel.org/linux-riscv/YzYTNQRxLr7Q9JR0@spud/ Link: https://github.com/riscv-non-isa/riscv-sbi-doc/issues/98/ Link: https://lore.kernel.org/linux-riscv/bf6d3b1f-f703-4a25-833e-972a44a04114@sholland.org/ Link: https://lore.kernel.org/r/20221122121620.3522431-1-conor.dooley@microchip.com Signed-off-by: Sasha Levin --- drivers/clocksource/timer-riscv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/timer-riscv.c b/drivers/clocksource/timer-riscv.c index 0e7748df4be3..c51c5ed15aa7 100644 --- a/drivers/clocksource/timer-riscv.c +++ b/drivers/clocksource/timer-riscv.c @@ -32,7 +32,7 @@ static int riscv_clock_next_event(unsigned long delta, static unsigned int riscv_clock_event_irq; static DEFINE_PER_CPU(struct clock_event_device, riscv_clock_event) = { .name = "riscv_timer_clockevent", - .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP, + .features = CLOCK_EVT_FEAT_ONESHOT, .rating = 100, .set_next_event = riscv_clock_next_event, }; From 4e0d6c687c925e27fd4bc78a2721d10acf5614d6 Mon Sep 17 00:00:00 2001 From: Jan Dabros Date: Mon, 28 Nov 2022 20:56:51 +0100 Subject: [PATCH 1183/1477] char: tpm: Protect tpm_pm_suspend with locks commit 23393c6461422df5bf8084a086ada9a7e17dc2ba upstream. Currently tpm transactions are executed unconditionally in tpm_pm_suspend() function, which may lead to races with other tpm accessors in the system. Specifically, the hw_random tpm driver makes use of tpm_get_random(), and this function is called in a loop from a kthread, which means it's not frozen alongside userspace, and so can race with the work done during system suspend: tpm tpm0: tpm_transmit: tpm_recv: error -52 tpm tpm0: invalid TPM_STS.x 0xff, dumping stack for forensics CPU: 0 PID: 1 Comm: init Not tainted 6.1.0-rc5+ #135 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.0-20220807_005459-localhost 04/01/2014 Call Trace: tpm_tis_status.cold+0x19/0x20 tpm_transmit+0x13b/0x390 tpm_transmit_cmd+0x20/0x80 tpm1_pm_suspend+0xa6/0x110 tpm_pm_suspend+0x53/0x80 __pnp_bus_suspend+0x35/0xe0 __device_suspend+0x10f/0x350 Fix this by calling tpm_try_get_ops(), which itself is a wrapper around tpm_chip_start(), but takes the appropriate mutex. Signed-off-by: Jan Dabros Reported-by: Vlastimil Babka Tested-by: Jason A. Donenfeld Tested-by: Vlastimil Babka Link: https://lore.kernel.org/all/c5ba47ef-393f-1fba-30bd-1230d1b4b592@suse.cz/ Cc: stable@vger.kernel.org Fixes: e891db1a18bf ("tpm: turn on TPM on suspend for TPM 1.x") [Jason: reworked commit message, added metadata] Signed-off-by: Jason A. Donenfeld Reviewed-by: Jarkko Sakkinen Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm-interface.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c index 1621ce818705..d69905233aff 100644 --- a/drivers/char/tpm/tpm-interface.c +++ b/drivers/char/tpm/tpm-interface.c @@ -401,13 +401,14 @@ int tpm_pm_suspend(struct device *dev) !pm_suspend_via_firmware()) goto suspended; - if (!tpm_chip_start(chip)) { + rc = tpm_try_get_ops(chip); + if (!rc) { if (chip->flags & TPM_CHIP_FLAG_TPM2) tpm2_shutdown(chip, TPM2_SU_STATE); else rc = tpm1_pm_suspend(chip, tpm_suspend_pcr); - tpm_chip_stop(chip); + tpm_put_ops(chip); } suspended: From a82869ac52f3d9db4b2cf8fd41edc2dee7a75a61 Mon Sep 17 00:00:00 2001 From: Zhang Xiaoxu Date: Fri, 2 Dec 2022 15:37:46 -0800 Subject: [PATCH 1184/1477] Input: raydium_ts_i2c - fix memory leak in raydium_i2c_send() commit 8c9a59939deb4bfafdc451100c03d1e848b4169b upstream. There is a kmemleak when test the raydium_i2c_ts with bpf mock device: unreferenced object 0xffff88812d3675a0 (size 8): comm "python3", pid 349, jiffies 4294741067 (age 95.695s) hex dump (first 8 bytes): 11 0e 10 c0 01 00 04 00 ........ backtrace: [<0000000068427125>] __kmalloc+0x46/0x1b0 [<0000000090180f91>] raydium_i2c_send+0xd4/0x2bf [raydium_i2c_ts] [<000000006e631aee>] raydium_i2c_initialize.cold+0xbc/0x3e4 [raydium_i2c_ts] [<00000000dc6fcf38>] raydium_i2c_probe+0x3cd/0x6bc [raydium_i2c_ts] [<00000000a310de16>] i2c_device_probe+0x651/0x680 [<00000000f5a96bf3>] really_probe+0x17c/0x3f0 [<00000000096ba499>] __driver_probe_device+0xe3/0x170 [<00000000c5acb4d9>] driver_probe_device+0x49/0x120 [<00000000264fe082>] __device_attach_driver+0xf7/0x150 [<00000000f919423c>] bus_for_each_drv+0x114/0x180 [<00000000e067feca>] __device_attach+0x1e5/0x2d0 [<0000000054301fc2>] bus_probe_device+0x126/0x140 [<00000000aad93b22>] device_add+0x810/0x1130 [<00000000c086a53f>] i2c_new_client_device+0x352/0x4e0 [<000000003c2c248c>] of_i2c_register_device+0xf1/0x110 [<00000000ffec4177>] of_i2c_notify+0x100/0x160 unreferenced object 0xffff88812d3675c8 (size 8): comm "python3", pid 349, jiffies 4294741070 (age 95.692s) hex dump (first 8 bytes): 22 00 36 2d 81 88 ff ff ".6-.... backtrace: [<0000000068427125>] __kmalloc+0x46/0x1b0 [<0000000090180f91>] raydium_i2c_send+0xd4/0x2bf [raydium_i2c_ts] [<000000001d5c9620>] raydium_i2c_initialize.cold+0x223/0x3e4 [raydium_i2c_ts] [<00000000dc6fcf38>] raydium_i2c_probe+0x3cd/0x6bc [raydium_i2c_ts] [<00000000a310de16>] i2c_device_probe+0x651/0x680 [<00000000f5a96bf3>] really_probe+0x17c/0x3f0 [<00000000096ba499>] __driver_probe_device+0xe3/0x170 [<00000000c5acb4d9>] driver_probe_device+0x49/0x120 [<00000000264fe082>] __device_attach_driver+0xf7/0x150 [<00000000f919423c>] bus_for_each_drv+0x114/0x180 [<00000000e067feca>] __device_attach+0x1e5/0x2d0 [<0000000054301fc2>] bus_probe_device+0x126/0x140 [<00000000aad93b22>] device_add+0x810/0x1130 [<00000000c086a53f>] i2c_new_client_device+0x352/0x4e0 [<000000003c2c248c>] of_i2c_register_device+0xf1/0x110 [<00000000ffec4177>] of_i2c_notify+0x100/0x160 After BANK_SWITCH command from i2c BUS, no matter success or error happened, the tx_buf should be freed. Fixes: 3b384bd6c3f2 ("Input: raydium_ts_i2c - do not split tx transactions") Signed-off-by: Zhang Xiaoxu Link: https://lore.kernel.org/r/20221202103412.2120169-1-zhangxiaoxu5@huawei.com Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/touchscreen/raydium_i2c_ts.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/raydium_i2c_ts.c b/drivers/input/touchscreen/raydium_i2c_ts.c index 4d2d22a86977..bdb3e2c3ab79 100644 --- a/drivers/input/touchscreen/raydium_i2c_ts.c +++ b/drivers/input/touchscreen/raydium_i2c_ts.c @@ -210,12 +210,14 @@ static int raydium_i2c_send(struct i2c_client *client, error = raydium_i2c_xfer(client, addr, xfer, ARRAY_SIZE(xfer)); if (likely(!error)) - return 0; + goto out; msleep(RM_RETRY_DELAY_MS); } while (++tries < RM_MAX_RETRIES); dev_err(&client->dev, "%s failed: %d\n", __func__, error); +out: + kfree(tx_buf); return error; } From 5f2f775605917aa5087ee5c6f3339da94c100679 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 5 Dec 2022 21:27:39 +0800 Subject: [PATCH 1185/1477] block: unhash blkdev part inode when the part is deleted v5.11 changes the blkdev lookup mechanism completely since commit 22ae8ce8b892 ("block: simplify bdev/disk lookup in blkdev_get"), and small part of the change is to unhash part bdev inode when deleting partition. Turns out this kind of change does fix one nasty issue in case of BLOCK_EXT_MAJOR: 1) when one partition is deleted & closed, disk_put_part() is always called before bdput(bdev), see blkdev_put(); so the part's devt can be freed & re-used before the inode is dropped 2) then new partition with same devt can be created just before the inode in 1) is dropped, then the old inode/bdev structurein 1) is re-used for this new partition, this way causes use-after-free and kernel panic. It isn't possible to backport the whole big patchset of "merge struct block_device and struct hd_struct v4" for addressing this issue. https://lore.kernel.org/linux-block/20201128161510.347752-1-hch@lst.de/ So fixes it by unhashing part bdev in delete_partition(), and this way is actually aligned with v5.11+'s behavior. Reported-by: Shiwei Cui Tested-by: Shiwei Cui Cc: Christoph Hellwig Cc: Jan Kara Signed-off-by: Ming Lei Acked-by: Jens Axboe --- block/partitions/core.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/block/partitions/core.c b/block/partitions/core.c index a02e22411594..e3d61ec4a5a6 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -329,6 +329,7 @@ void delete_partition(struct hd_struct *part) struct gendisk *disk = part_to_disk(part); struct disk_part_tbl *ptbl = rcu_dereference_protected(disk->part_tbl, 1); + struct block_device *bdev; /* * ->part_tbl is referenced in this part's release handler, so @@ -346,6 +347,12 @@ void delete_partition(struct hd_struct *part) * "in-use" until we really free the gendisk. */ blk_invalidate_devt(part_devt(part)); + + bdev = bdget_part(part); + if (bdev) { + remove_inode_hash(bdev->bd_inode); + bdput(bdev); + } percpu_ref_kill(&part->ref); } From 4aa32aaef6c1b5e39ae2508ec596bd7b67871043 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 5 Dec 2022 11:33:40 -0800 Subject: [PATCH 1186/1477] proc: avoid integer type confusion in get_proc_long commit e6cfaf34be9fcd1a8285a294e18986bfc41a409c upstream. proc_get_long() is passed a size_t, but then assigns it to an 'int' variable for the length. Let's not do that, even if our IO paths are limited to MAX_RW_COUNT (exactly because of these kinds of type errors). So do the proper test in the rigth type. Reported-by: Kyle Zeng Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/sysctl.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index f0dd1a3b66eb..6faf40f1b7b9 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -466,13 +466,12 @@ static int proc_get_long(char **buf, size_t *size, unsigned long *val, bool *neg, const char *perm_tr, unsigned perm_tr_len, char *tr) { - int len; char *p, tmp[TMPBUFLEN]; + ssize_t len = *size; - if (!*size) + if (len <= 0) return -EINVAL; - len = *size; if (len > TMPBUFLEN - 1) len = TMPBUFLEN - 1; From 9ba389863ac63032d4b6ffad2c90a62cd78082ee Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 5 Dec 2022 12:09:06 -0800 Subject: [PATCH 1187/1477] proc: proc_skip_spaces() shouldn't think it is working on C strings commit bce9332220bd677d83b19d21502776ad555a0e73 upstream. proc_skip_spaces() seems to think it is working on C strings, and ends up being just a wrapper around skip_spaces() with a really odd calling convention. Instead of basing it on skip_spaces(), it should have looked more like proc_skip_char(), which really is the exact same function (except it skips a particular character, rather than whitespace). So use that as inspiration, odd coding and all. Now the calling convention actually makes sense and works for the intended purpose. Reported-and-tested-by: Kyle Zeng Acked-by: Eric Dumazet Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/sysctl.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 6faf40f1b7b9..3eb527f8a269 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -391,13 +391,14 @@ int proc_dostring(struct ctl_table *table, int write, ppos); } -static size_t proc_skip_spaces(char **buf) +static void proc_skip_spaces(char **buf, size_t *size) { - size_t ret; - char *tmp = skip_spaces(*buf); - ret = tmp - *buf; - *buf = tmp; - return ret; + while (*size) { + if (!isspace(**buf)) + break; + (*size)--; + (*buf)++; + } } static void proc_skip_char(char **buf, size_t *size, const char v) @@ -629,7 +630,7 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, bool neg; if (write) { - left -= proc_skip_spaces(&p); + proc_skip_spaces(&p, &left); if (!left) break; @@ -656,7 +657,7 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, if (!write && !first && left && !err) proc_put_char(&buffer, &left, '\n'); if (write && !err && left) - left -= proc_skip_spaces(&p); + proc_skip_spaces(&p, &left); if (write && first) return err ? : -EINVAL; *lenp -= left; @@ -698,7 +699,7 @@ static int do_proc_douintvec_w(unsigned int *tbl_data, if (left > PAGE_SIZE - 1) left = PAGE_SIZE - 1; - left -= proc_skip_spaces(&p); + proc_skip_spaces(&p, &left); if (!left) { err = -EINVAL; goto out_free; @@ -718,7 +719,7 @@ static int do_proc_douintvec_w(unsigned int *tbl_data, } if (!err && left) - left -= proc_skip_spaces(&p); + proc_skip_spaces(&p, &left); out_free: if (err) @@ -1176,7 +1177,7 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, if (write) { bool neg; - left -= proc_skip_spaces(&p); + proc_skip_spaces(&p, &left); if (!left) break; @@ -1204,7 +1205,7 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, if (!write && !first && left && !err) proc_put_char(&buffer, &left, '\n'); if (write && !err) - left -= proc_skip_spaces(&p); + proc_skip_spaces(&p, &left); if (write && first) return err ? : -EINVAL; *lenp -= left; From d072a10c81d3a4f2308e24ffb4543a9146946373 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 30 Nov 2022 16:10:52 -0800 Subject: [PATCH 1188/1477] v4l2: don't fall back to follow_pfn() if pin_user_pages_fast() fails commit 6647e76ab623b2b3fb2efe03a86e9c9046c52c33 upstream. The V4L2_MEMORY_USERPTR interface is long deprecated and shouldn't be used (and is discouraged for any modern v4l drivers). And Seth Jenkins points out that the fallback to VM_PFNMAP/VM_IO is fundamentally racy and dangerous. Note that it's not even a case that should trigger, since any normal user pointer logic ends up just using the pin_user_pages_fast() call that does the proper page reference counting. That's not the problem case, only if you try to use special device mappings do you have any issues. Normally I'd just remove this during the merge window, but since Seth pointed out the problem cases, we really want to know as soon as possible if there are actually any users of this odd special case of a legacy interface. Neither Hans nor Mauro seem to think that such mis-uses of the old legacy interface should exist. As Mauro says: "See, V4L2 has actually 4 streaming APIs: - Kernel-allocated mmap (usually referred simply as just mmap); - USERPTR mmap; - read(); - dmabuf; The USERPTR is one of the oldest way to use it, coming from V4L version 1 times, and by far the least used one" And Hans chimed in on the USERPTR interface: "To be honest, I wouldn't mind if it goes away completely, but that's a bit of a pipe dream right now" but while removing this legacy interface entirely may be a pipe dream we can at least try to remove the unlikely (and actively broken) case of using special device mappings for USERPTR accesses. This replaces it with a WARN_ONCE() that we can remove once we've hopefully confirmed that no actual users exist. NOTE! Longer term, this means that a 'struct frame_vector' only ever contains proper page pointers, and all the games we have with converting them to pages can go away (grep for 'frame_vector_to_pages()' and the uses of 'vec->is_pfns'). But this is just the first step, to verify that this code really is all dead, and do so as quickly as possible. Reported-by: Seth Jenkins Acked-by: Hans Verkuil Acked-by: Mauro Carvalho Chehab Cc: David Hildenbrand Cc: Jan Kara Signed-off-by: Linus Torvalds Signed-off-by: Sergey Senozhatsky Signed-off-by: Greg Kroah-Hartman --- mm/frame_vector.c | 29 +++++------------------------ 1 file changed, 5 insertions(+), 24 deletions(-) diff --git a/mm/frame_vector.c b/mm/frame_vector.c index 10f82d5643b6..0e589a9a8801 100644 --- a/mm/frame_vector.c +++ b/mm/frame_vector.c @@ -37,7 +37,6 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames, struct mm_struct *mm = current->mm; struct vm_area_struct *vma; int ret = 0; - int err; int locked; if (nr_frames == 0) @@ -74,32 +73,14 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames, vec->is_pfns = false; ret = pin_user_pages_locked(start, nr_frames, gup_flags, (struct page **)(vec->ptrs), &locked); - goto out; + if (likely(ret > 0)) + goto out; } - vec->got_ref = false; - vec->is_pfns = true; - do { - unsigned long *nums = frame_vector_pfns(vec); + /* This used to (racily) return non-refcounted pfns. Let people know */ + WARN_ONCE(1, "get_vaddr_frames() cannot follow VM_IO mapping"); + vec->nr_frames = 0; - while (ret < nr_frames && start + PAGE_SIZE <= vma->vm_end) { - err = follow_pfn(vma, start, &nums[ret]); - if (err) { - if (ret == 0) - ret = err; - goto out; - } - start += PAGE_SIZE; - ret++; - } - /* - * We stop if we have enough pages or if VMA doesn't completely - * cover the tail page. - */ - if (ret >= nr_frames || start < vma->vm_end) - break; - vma = find_vma_intersection(mm, start, start + 1); - } while (vma && vma->vm_flags & (VM_IO | VM_PFNMAP)); out: if (locked) mmap_read_unlock(mm); From cc1b4718cc42d298fcc923d55d19c03ecdadbaae Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 5 Dec 2022 17:59:27 +0100 Subject: [PATCH 1189/1477] ipc/sem: Fix dangling sem_array access in semtimedop race [ Upstream commit b52be557e24c47286738276121177a41f54e3b83 ] When __do_semtimedop() goes to sleep because it has to wait for a semaphore value becoming zero or becoming bigger than some threshold, it links the on-stack sem_queue to the sem_array, then goes to sleep without holding a reference on the sem_array. When __do_semtimedop() comes back out of sleep, one of two things must happen: a) We prove that the on-stack sem_queue has been disconnected from the (possibly freed) sem_array, making it safe to return from the stack frame that the sem_queue exists in. b) We stabilize our reference to the sem_array, lock the sem_array, and detach the sem_queue from the sem_array ourselves. sem_array has RCU lifetime, so for case (b), the reference can be stabilized inside an RCU read-side critical section by locklessly checking whether the sem_queue is still connected to the sem_array. However, the current code does the lockless check on sem_queue before starting an RCU read-side critical section, so the result of the lockless check immediately becomes useless. Fix it by doing rcu_read_lock() before the lockless check. Now RCU ensures that if we observe the object being on our queue, the object can't be freed until rcu_read_unlock(). This bug is only hittable on kernel builds with full preemption support (either CONFIG_PREEMPT or PREEMPT_DYNAMIC with preempt=full). Fixes: 370b262c896e ("ipc/sem: avoid idr tree lookup for interrupted semop") Cc: stable@vger.kernel.org Signed-off-by: Jann Horn Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- ipc/sem.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ipc/sem.c b/ipc/sem.c index 2cb6515ef1dd..916f7a90be31 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -2190,14 +2190,15 @@ static long do_semtimedop(int semid, struct sembuf __user *tsops, * scenarios where we were awakened externally, during the * window between wake_q_add() and wake_up_q(). */ + rcu_read_lock(); error = READ_ONCE(queue.status); if (error != -EINTR) { /* see SEM_BARRIER_2 for purpose/pairing */ smp_acquire__after_ctrl_dep(); + rcu_read_unlock(); goto out_free; } - rcu_read_lock(); locknum = sem_lock(sma, sops, nsops); if (!ipc_valid_object(&sma->sem_perm)) From 592346d5dc9b61e7fb4a3876ec498aa96ee11ac8 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 8 Dec 2022 11:24:00 +0100 Subject: [PATCH 1190/1477] Linux 5.10.158 Link: https://lore.kernel.org/r/20221205190803.464934752@linuxfoundation.org Tested-by: Florian Fainelli Tested-by: Shuah Khan Tested-by: Pavel Machek (CIP) Tested-by: Salvatore Bonaccorso Tested-by: Rudi Heitbaum Tested-by: Hulk Robot Link: https://lore.kernel.org/r/20221206124048.850573317@linuxfoundation.org Tested-by: Jon Hunter Tested-by: Pavel Machek (CIP) Tested-by: Florian Fainelli Tested-by: Linux Kernel Functional Testing Tested-by: Guenter Roeck Tested-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index bf22df29c4d8..f3d1f07b6a6f 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 10 -SUBLEVEL = 157 +SUBLEVEL = 158 EXTRAVERSION = NAME = Dare mighty things From 0b0939466f8c14bbba29828903883afe3c58e9a8 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 8 Dec 2022 17:57:07 +0000 Subject: [PATCH 1191/1477] ANDROID: gki_defconfig: add CONFIG_FUNCTION_ERROR_INJECTION Commit c099d12c5502 ("error-injection: Add prompt for function error injection") removed the "default y" selection for this option, so it needs to be added manually to the gki_defconfig files in order to preserve the option, AND to keep the stable ABI. Fixes: c099d12c5502 ("error-injection: Add prompt for function error injection") Signed-off-by: Greg Kroah-Hartman Change-Id: Ie36b7bab650356d1bf24625961adf33a725258d2 --- arch/arm64/configs/gki_defconfig | 1 + arch/x86/configs/gki_defconfig | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/arm64/configs/gki_defconfig b/arch/arm64/configs/gki_defconfig index cb0cf189b644..73696b1d7160 100644 --- a/arch/arm64/configs/gki_defconfig +++ b/arch/arm64/configs/gki_defconfig @@ -695,4 +695,5 @@ CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_TRACE_MMIO_ACCESS=y CONFIG_TRACEFS_DISABLE_AUTOMOUNT=y CONFIG_HIST_TRIGGERS=y +CONFIG_FUNCTION_ERROR_INJECTION=y # CONFIG_RUNTIME_TESTING_MENU is not set diff --git a/arch/x86/configs/gki_defconfig b/arch/x86/configs/gki_defconfig index 9d72cd46bc3b..b648dd91999b 100644 --- a/arch/x86/configs/gki_defconfig +++ b/arch/x86/configs/gki_defconfig @@ -625,3 +625,4 @@ CONFIG_SCHEDSTATS=y CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_TRACEFS_DISABLE_AUTOMOUNT=y CONFIG_UNWINDER_FRAME_POINTER=y +CONFIG_FUNCTION_ERROR_INJECTION=y From 202ee063496e2050f55e508ba2c03be1444a6dea Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 8 Dec 2022 18:02:57 +0000 Subject: [PATCH 1192/1477] Revert "mmc: sdhci: Fix voltage switch delay" This reverts commit 940b774069f164b0c6f25b13946f1ef166e8d439 which is commit c981cdfb9925f64a364f13c2b4f98f877308a408 upstream. It breaks the Android ABI so revert it for now. If this is needed in the future, it can be brought back in an ABI safe way. Bug: 161946584 Change-Id: Ia5d24c9adcb06166546c42810759b9193d4eaceb Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci.c | 61 +++++----------------------------------- drivers/mmc/host/sdhci.h | 2 -- 2 files changed, 7 insertions(+), 56 deletions(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 01dab864bab6..1be23bddf1b2 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -341,7 +341,6 @@ static void sdhci_init(struct sdhci_host *host, int soft) if (soft) { /* force clock reconfiguration */ host->clock = 0; - host->reinit_uhs = true; mmc->ops->set_ios(mmc, &mmc->ios); } } @@ -2249,46 +2248,11 @@ void sdhci_set_uhs_signaling(struct sdhci_host *host, unsigned timing) } EXPORT_SYMBOL_GPL(sdhci_set_uhs_signaling); -static bool sdhci_timing_has_preset(unsigned char timing) -{ - switch (timing) { - case MMC_TIMING_UHS_SDR12: - case MMC_TIMING_UHS_SDR25: - case MMC_TIMING_UHS_SDR50: - case MMC_TIMING_UHS_SDR104: - case MMC_TIMING_UHS_DDR50: - case MMC_TIMING_MMC_DDR52: - return true; - }; - return false; -} - -static bool sdhci_preset_needed(struct sdhci_host *host, unsigned char timing) -{ - return !(host->quirks2 & SDHCI_QUIRK2_PRESET_VALUE_BROKEN) && - sdhci_timing_has_preset(timing); -} - -static bool sdhci_presetable_values_change(struct sdhci_host *host, struct mmc_ios *ios) -{ - /* - * Preset Values are: Driver Strength, Clock Generator and SDCLK/RCLK - * Frequency. Check if preset values need to be enabled, or the Driver - * Strength needs updating. Note, clock changes are handled separately. - */ - return !host->preset_enabled && - (sdhci_preset_needed(host, ios->timing) || host->drv_type != ios->drv_type); -} - void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) { struct sdhci_host *host = mmc_priv(mmc); - bool reinit_uhs = host->reinit_uhs; - bool turning_on_clk = false; u8 ctrl; - host->reinit_uhs = false; - if (ios->power_mode == MMC_POWER_UNDEFINED) return; @@ -2314,8 +2278,6 @@ void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) sdhci_enable_preset_value(host, false); if (!ios->clock || ios->clock != host->clock) { - turning_on_clk = ios->clock && !host->clock; - host->ops->set_clock(host, ios->clock); host->clock = ios->clock; @@ -2342,17 +2304,6 @@ void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) host->ops->set_bus_width(host, ios->bus_width); - /* - * Special case to avoid multiple clock changes during voltage - * switching. - */ - if (!reinit_uhs && - turning_on_clk && - host->timing == ios->timing && - host->version >= SDHCI_SPEC_300 && - !sdhci_presetable_values_change(host, ios)) - return; - ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL); if (!(host->quirks & SDHCI_QUIRK_NO_HISPD_BIT)) { @@ -2396,7 +2347,6 @@ void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) } sdhci_writew(host, ctrl_2, SDHCI_HOST_CONTROL2); - host->drv_type = ios->drv_type; } else { /* * According to SDHC Spec v3.00, if the Preset Value @@ -2424,14 +2374,19 @@ void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) host->ops->set_uhs_signaling(host, ios->timing); host->timing = ios->timing; - if (sdhci_preset_needed(host, ios->timing)) { + if (!(host->quirks2 & SDHCI_QUIRK2_PRESET_VALUE_BROKEN) && + ((ios->timing == MMC_TIMING_UHS_SDR12) || + (ios->timing == MMC_TIMING_UHS_SDR25) || + (ios->timing == MMC_TIMING_UHS_SDR50) || + (ios->timing == MMC_TIMING_UHS_SDR104) || + (ios->timing == MMC_TIMING_UHS_DDR50) || + (ios->timing == MMC_TIMING_MMC_DDR52))) { u16 preset; sdhci_enable_preset_value(host, true); preset = sdhci_get_preset_value(host); ios->drv_type = FIELD_GET(SDHCI_PRESET_DRV_MASK, preset); - host->drv_type = ios->drv_type; } /* Re-enable SD Clock */ @@ -3747,7 +3702,6 @@ int sdhci_resume_host(struct sdhci_host *host) sdhci_init(host, 0); host->pwr = 0; host->clock = 0; - host->reinit_uhs = true; mmc->ops->set_ios(mmc, &mmc->ios); } else { sdhci_init(host, (host->mmc->pm_flags & MMC_PM_KEEP_POWER)); @@ -3810,7 +3764,6 @@ int sdhci_runtime_resume_host(struct sdhci_host *host, int soft_reset) /* Force clock and power re-program */ host->pwr = 0; host->clock = 0; - host->reinit_uhs = true; mmc->ops->start_signal_voltage_switch(mmc, &mmc->ios); mmc->ops->set_ios(mmc, &mmc->ios); diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index 67e65ab91479..15ecf856a7c7 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -520,8 +520,6 @@ struct sdhci_host { unsigned int clock; /* Current clock (MHz) */ u8 pwr; /* Current voltage */ - u8 drv_type; /* Current UHS-I driver type */ - bool reinit_uhs; /* Force UHS-related re-initialization */ bool runtime_suspended; /* Host is runtime suspended */ bool bus_on; /* Bus power prevents runtime suspend */ From 3ada63a87654e67e0b445a639ff382ca7edbd221 Mon Sep 17 00:00:00 2001 From: FUKAUMI Naoki Date: Sat, 24 Sep 2022 11:28:12 +0000 Subject: [PATCH 1193/1477] arm64: dts: rockchip: keep I2S1 disabled for GPIO function on ROCK Pi 4 series [ Upstream commit 849c19d14940b87332d5d59c7fc581d73f2099fd ] I2S1 pins are exposed on 40-pin header on Radxa ROCK Pi 4 series. their default function is GPIO, so I2S1 need to be disabled. Signed-off-by: FUKAUMI Naoki Link: https://lore.kernel.org/r/20220924112812.1219-1-naoki@radxa.com Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi index f121203081b9..64df64339119 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi @@ -448,7 +448,6 @@ &i2s1 { rockchip,playback-channels = <2>; rockchip,capture-channels = <2>; - status = "okay"; }; &i2s2 { From 3e0c4667713a9249c2bd550a1f4eced2f31f7334 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Mon, 24 Oct 2022 18:55:46 +0200 Subject: [PATCH 1194/1477] arm: dts: rockchip: fix node name for hym8563 rtc [ Upstream commit 17b57beafccb4569accbfc8c11390744cf59c021 ] Fix the node name for hym8563 in all arm rockchip devicetrees. Signed-off-by: Sebastian Reichel Link: https://lore.kernel.org/r/20221024165549.74574-4-sebastian.reichel@collabora.com Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm/boot/dts/rk3036-evb.dts | 2 +- arch/arm/boot/dts/rk3288-evb-act8846.dts | 2 +- arch/arm/boot/dts/rk3288-firefly.dtsi | 2 +- arch/arm/boot/dts/rk3288-miqi.dts | 2 +- arch/arm/boot/dts/rk3288-rock2-square.dts | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm/boot/dts/rk3036-evb.dts b/arch/arm/boot/dts/rk3036-evb.dts index 2a7e6624efb9..ea23ba98625e 100644 --- a/arch/arm/boot/dts/rk3036-evb.dts +++ b/arch/arm/boot/dts/rk3036-evb.dts @@ -31,7 +31,7 @@ &i2c1 { status = "okay"; - hym8563: hym8563@51 { + hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; #clock-cells = <0>; diff --git a/arch/arm/boot/dts/rk3288-evb-act8846.dts b/arch/arm/boot/dts/rk3288-evb-act8846.dts index be695b8c1f67..8a635c243127 100644 --- a/arch/arm/boot/dts/rk3288-evb-act8846.dts +++ b/arch/arm/boot/dts/rk3288-evb-act8846.dts @@ -54,7 +54,7 @@ vin-supply = <&vcc_sys>; }; - hym8563@51 { + rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; diff --git a/arch/arm/boot/dts/rk3288-firefly.dtsi b/arch/arm/boot/dts/rk3288-firefly.dtsi index 7fb582302b32..c560afe3af78 100644 --- a/arch/arm/boot/dts/rk3288-firefly.dtsi +++ b/arch/arm/boot/dts/rk3288-firefly.dtsi @@ -233,7 +233,7 @@ vin-supply = <&vcc_sys>; }; - hym8563: hym8563@51 { + hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; #clock-cells = <0>; diff --git a/arch/arm/boot/dts/rk3288-miqi.dts b/arch/arm/boot/dts/rk3288-miqi.dts index cf54d5ffff2f..fe265a834e8e 100644 --- a/arch/arm/boot/dts/rk3288-miqi.dts +++ b/arch/arm/boot/dts/rk3288-miqi.dts @@ -157,7 +157,7 @@ vin-supply = <&vcc_sys>; }; - hym8563: hym8563@51 { + hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; #clock-cells = <0>; diff --git a/arch/arm/boot/dts/rk3288-rock2-square.dts b/arch/arm/boot/dts/rk3288-rock2-square.dts index c4d1d142d8c6..d5ef99ebbddc 100644 --- a/arch/arm/boot/dts/rk3288-rock2-square.dts +++ b/arch/arm/boot/dts/rk3288-rock2-square.dts @@ -165,7 +165,7 @@ }; &i2c0 { - hym8563: hym8563@51 { + hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; #clock-cells = <0>; From 060d58924af64f8d55a9ab33e0888410623742be Mon Sep 17 00:00:00 2001 From: Johan Jonker Date: Thu, 27 Oct 2022 10:58:22 +0200 Subject: [PATCH 1195/1477] ARM: dts: rockchip: fix ir-receiver node names [ Upstream commit dd847fe34cdf1e89afed1af24986359f13082bfb ] Fix ir-receiver node names on Rockchip boards, so that they match with regex: '^ir(-receiver)?(@[a-f0-9]+)?$' Signed-off-by: Johan Jonker Link: https://lore.kernel.org/r/ea5af279-f44c-afea-023d-bb37f5a0d58d@gmail.com Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm/boot/dts/rk3188-radxarock.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/rk3188-radxarock.dts b/arch/arm/boot/dts/rk3188-radxarock.dts index b0fef82c0a71..39b913f8d701 100644 --- a/arch/arm/boot/dts/rk3188-radxarock.dts +++ b/arch/arm/boot/dts/rk3188-radxarock.dts @@ -67,7 +67,7 @@ #sound-dai-cells = <0>; }; - ir_recv: gpio-ir-receiver { + ir_recv: ir-receiver { compatible = "gpio-ir-receiver"; gpios = <&gpio0 RK_PB2 GPIO_ACTIVE_LOW>; pinctrl-names = "default"; From 5f9474d07b60d1e63a306967ceed819af5d78112 Mon Sep 17 00:00:00 2001 From: Johan Jonker Date: Thu, 27 Oct 2022 10:59:10 +0200 Subject: [PATCH 1196/1477] arm64: dts: rockchip: fix ir-receiver node names [ Upstream commit de0d04b9780a23eb928aedfb6f981285f78d58e5 ] Fix ir-receiver node names on Rockchip boards, so that they match with regex: '^ir(-receiver)?(@[a-f0-9]+)?$' Signed-off-by: Johan Jonker Link: https://lore.kernel.org/r/e9764253-8ce8-150b-4820-41f03f845469@gmail.com Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts b/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts index fbcb9531cc70..213c0759c4b8 100644 --- a/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts +++ b/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts @@ -13,7 +13,7 @@ stdout-path = "serial2:1500000n8"; }; - ir_rx { + ir-receiver { compatible = "gpio-ir-receiver"; gpios = <&gpio0 RK_PC0 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; From b1f40a0cdf00f2987563a7393ed38d5255f37f9b Mon Sep 17 00:00:00 2001 From: Johan Jonker Date: Thu, 27 Oct 2022 01:31:37 +0200 Subject: [PATCH 1197/1477] ARM: dts: rockchip: rk3188: fix lcdc1-rgb24 node name [ Upstream commit 11871e20bcb23c00966e785a124fb72bc8340af4 ] The lcdc1-rgb24 node name is out of line with the rest of the rk3188 lcdc1 node, so fix it. Signed-off-by: Johan Jonker Link: https://lore.kernel.org/r/7b9c0a6f-626b-07e8-ae74-7e0f08b8d241@gmail.com Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm/boot/dts/rk3188.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/rk3188.dtsi b/arch/arm/boot/dts/rk3188.dtsi index b6bde9d12c2b..a837a9a34e3e 100644 --- a/arch/arm/boot/dts/rk3188.dtsi +++ b/arch/arm/boot/dts/rk3188.dtsi @@ -402,7 +402,7 @@ rockchip,pins = <2 RK_PD3 1 &pcfg_pull_none>; }; - lcdc1_rgb24: ldcd1-rgb24 { + lcdc1_rgb24: lcdc1-rgb24 { rockchip,pins = <2 RK_PA0 1 &pcfg_pull_none>, <2 RK_PA1 1 &pcfg_pull_none>, <2 RK_PA2 1 &pcfg_pull_none>, From bb1866cf1ee95bf54c255aefe0c570b544d49ba0 Mon Sep 17 00:00:00 2001 From: Tomislav Novak Date: Mon, 26 Sep 2022 16:09:12 +0100 Subject: [PATCH 1198/1477] ARM: 9251/1: perf: Fix stacktraces for tracepoint events in THUMB2 kernels [ Upstream commit 612695bccfdbd52004551308a55bae410e7cd22f ] Store the frame address where arm_get_current_stackframe() looks for it (ARM_r7 instead of ARM_fp if CONFIG_THUMB2_KERNEL=y). Otherwise frame->fp gets set to 0, causing unwind_frame() to fail. # bpftrace -e 't:sched:sched_switch { @[kstack] = count(); exit(); }' Attaching 1 probe... @[ __schedule+1059 ]: 1 A typical first unwind instruction is 0x97 (SP = R7), so after executing it SP ends up being 0 and -URC_FAILURE is returned. unwind_frame(pc = ac9da7d7 lr = 00000000 sp = c69bdda0 fp = 00000000) unwind_find_idx(ac9da7d7) unwind_exec_insn: insn = 00000097 unwind_exec_insn: fp = 00000000 sp = 00000000 lr = 00000000 pc = 00000000 With this patch: # bpftrace -e 't:sched:sched_switch { @[kstack] = count(); exit(); }' Attaching 1 probe... @[ __schedule+1059 __schedule+1059 schedule+79 schedule_hrtimeout_range_clock+163 schedule_hrtimeout_range+17 ep_poll+471 SyS_epoll_wait+111 sys_epoll_pwait+231 __ret_fast_syscall+1 ]: 1 Link: https://lore.kernel.org/r/20220920230728.2617421-1-tnovak@fb.com/ Reviewed-by: Linus Walleij Signed-off-by: Tomislav Novak Signed-off-by: Russell King (Oracle) Signed-off-by: Sasha Levin --- arch/arm/include/asm/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h index fe87397c3d8c..bdbc1e590891 100644 --- a/arch/arm/include/asm/perf_event.h +++ b/arch/arm/include/asm/perf_event.h @@ -17,7 +17,7 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs); #define perf_arch_fetch_caller_regs(regs, __ip) { \ (regs)->ARM_pc = (__ip); \ - (regs)->ARM_fp = (unsigned long) __builtin_frame_address(0); \ + frame_pointer((regs)) = (unsigned long) __builtin_frame_address(0); \ (regs)->ARM_sp = current_stack_pointer; \ (regs)->ARM_cpsr = SVC_MODE; \ } From dbd78abd696dc0b6c21e2af1d4147c0f559e9519 Mon Sep 17 00:00:00 2001 From: Giulio Benetti Date: Fri, 4 Nov 2022 21:46:18 +0100 Subject: [PATCH 1199/1477] ARM: 9266/1: mm: fix no-MMU ZERO_PAGE() implementation [ Upstream commit 340a982825f76f1cff0daa605970fe47321b5ee7 ] Actually in no-MMU SoCs(i.e. i.MXRT) ZERO_PAGE(vaddr) expands to ``` virt_to_page(0) ``` that in order expands to: ``` pfn_to_page(virt_to_pfn(0)) ``` and then virt_to_pfn(0) to: ``` ((((unsigned long)(0) - PAGE_OFFSET) >> PAGE_SHIFT) + PHYS_PFN_OFFSET) ``` where PAGE_OFFSET and PHYS_PFN_OFFSET are the DRAM offset(0x80000000) and PAGE_SHIFT is 12. This way we obtain 16MB(0x01000000) summed to the base of DRAM(0x80000000). When ZERO_PAGE(0) is then used, for example in bio_add_page(), the page gets an address that is out of DRAM bounds. So instead of using fake virtual page 0 let's allocate a dedicated zero_page during paging_init() and assign it to a global 'struct page * empty_zero_page' the same way mmu.c does and it's the same approach used in m68k with commit dc068f462179 as discussed here[0]. Then let's move ZERO_PAGE() definition to the top of pgtable.h to be in common between mmu.c and nommu.c. [0]: https://lore.kernel.org/linux-m68k/2a462b23-5b8e-bbf4-ec7d-778434a3b9d7@google.com/T/#m1266ceb63 ad140743174d6b3070364d3c9a5179b Signed-off-by: Giulio Benetti Reviewed-by: Arnd Bergmann Signed-off-by: Russell King (Oracle) Signed-off-by: Sasha Levin --- arch/arm/include/asm/pgtable-nommu.h | 6 ------ arch/arm/include/asm/pgtable.h | 16 +++++++++------- arch/arm/mm/nommu.c | 19 +++++++++++++++++++ 3 files changed, 28 insertions(+), 13 deletions(-) diff --git a/arch/arm/include/asm/pgtable-nommu.h b/arch/arm/include/asm/pgtable-nommu.h index d16aba48fa0a..090011394477 100644 --- a/arch/arm/include/asm/pgtable-nommu.h +++ b/arch/arm/include/asm/pgtable-nommu.h @@ -44,12 +44,6 @@ typedef pte_t *pte_addr_t; -/* - * ZERO_PAGE is a global shared page that is always zero: used - * for zero-mapped memory areas etc.. - */ -#define ZERO_PAGE(vaddr) (virt_to_page(0)) - /* * Mark the prot value as uncacheable and unbufferable. */ diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index c02f24400369..d38d503493cb 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -10,6 +10,15 @@ #include #include +#ifndef __ASSEMBLY__ +/* + * ZERO_PAGE is a global shared page that is always zero: used + * for zero-mapped memory areas etc.. + */ +extern struct page *empty_zero_page; +#define ZERO_PAGE(vaddr) (empty_zero_page) +#endif + #ifndef CONFIG_MMU #include @@ -156,13 +165,6 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, #define __S111 __PAGE_SHARED_EXEC #ifndef __ASSEMBLY__ -/* - * ZERO_PAGE is a global shared page that is always zero: used - * for zero-mapped memory areas etc.. - */ -extern struct page *empty_zero_page; -#define ZERO_PAGE(vaddr) (empty_zero_page) - extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c index 8b3d7191e2b8..959f05701738 100644 --- a/arch/arm/mm/nommu.c +++ b/arch/arm/mm/nommu.c @@ -26,6 +26,13 @@ unsigned long vectors_base; +/* + * empty_zero_page is a special page that is used for + * zero-initialized data and COW. + */ +struct page *empty_zero_page; +EXPORT_SYMBOL(empty_zero_page); + #ifdef CONFIG_ARM_MPU struct mpu_rgn_info mpu_rgn_info; #endif @@ -148,9 +155,21 @@ void __init adjust_lowmem_bounds(void) */ void __init paging_init(const struct machine_desc *mdesc) { + void *zero_page; + early_trap_init((void *)vectors_base); mpu_setup(); + + /* allocate the zero page. */ + zero_page = memblock_alloc(PAGE_SIZE, PAGE_SIZE); + if (!zero_page) + panic("%s: Failed to allocate %lu bytes align=0x%lx\n", + __func__, PAGE_SIZE, PAGE_SIZE); + bootmem_init(); + + empty_zero_page = virt_to_page(zero_page); + flush_dcache_page(empty_zero_page); } /* From ddf58f59393bbcf3cefdce0aba669b72cad38ae1 Mon Sep 17 00:00:00 2001 From: Chancel Liu Date: Wed, 9 Nov 2022 20:13:54 +0800 Subject: [PATCH 1200/1477] ASoC: wm8962: Wait for updated value of WM8962_CLOCKING1 register [ Upstream commit 3ca507bf99611c82dafced73e921c1b10ee12869 ] DSPCLK_DIV field in WM8962_CLOCKING1 register is used to generate correct frequency of LRCLK and BCLK. Sometimes the read-only value can't be updated timely after enabling SYSCLK. This results in wrong calculation values. Delay is introduced here to wait for newest value from register. The time of the delay should be at least 500~1000us according to test. Signed-off-by: Chancel Liu Acked-by: Charles Keepax Link: https://lore.kernel.org/r/20221109121354.123958-1-chancel.liu@nxp.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/wm8962.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c index 21574447650c..57aeded978c2 100644 --- a/sound/soc/codecs/wm8962.c +++ b/sound/soc/codecs/wm8962.c @@ -2489,6 +2489,14 @@ static void wm8962_configure_bclk(struct snd_soc_component *component) snd_soc_component_update_bits(component, WM8962_CLOCKING2, WM8962_SYSCLK_ENA_MASK, WM8962_SYSCLK_ENA); + /* DSPCLK_DIV field in WM8962_CLOCKING1 register is used to generate + * correct frequency of LRCLK and BCLK. Sometimes the read-only value + * can't be updated timely after enabling SYSCLK. This results in wrong + * calculation values. Delay is introduced here to wait for newest + * value from register. The time of the delay should be at least + * 500~1000us according to test. + */ + usleep_range(500, 1000); dspclk = snd_soc_component_read(component, WM8962_CLOCKING1); if (snd_soc_component_get_bias_level(component) != SND_SOC_BIAS_ON) From 96b43f36a593cdc6b7984ba8d543b660c0b507a0 Mon Sep 17 00:00:00 2001 From: Johan Jonker Date: Sun, 30 Oct 2022 21:56:29 +0100 Subject: [PATCH 1201/1477] ARM: dts: rockchip: disable arm_global_timer on rk3066 and rk3188 [ Upstream commit da74858a475782a3f16470907814c8cc5950ad68 ] The clock source and the sched_clock provided by the arm_global_timer on Rockchip rk3066a/rk3188 are quite unstable because their rates depend on the CPU frequency. Recent changes to the arm_global_timer driver makes it impossible to use. On the other side, the arm_global_timer has a higher rating than the ROCKCHIP_TIMER, it will be selected by default by the time framework while we want to use the stable Rockchip clock source. Keep the arm_global_timer disabled in order to have the DW_APB_TIMER (rk3066a) or ROCKCHIP_TIMER (rk3188) selected by default. Signed-off-by: Johan Jonker Link: https://lore.kernel.org/r/f275ca8d-fd0a-26e5-b978-b7f3df815e0a@gmail.com Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm/boot/dts/rk3188.dtsi | 1 - arch/arm/boot/dts/rk3xxx.dtsi | 7 +++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/rk3188.dtsi b/arch/arm/boot/dts/rk3188.dtsi index a837a9a34e3e..ddf23748ead4 100644 --- a/arch/arm/boot/dts/rk3188.dtsi +++ b/arch/arm/boot/dts/rk3188.dtsi @@ -630,7 +630,6 @@ &global_timer { interrupts = ; - status = "disabled"; }; &local_timer { diff --git a/arch/arm/boot/dts/rk3xxx.dtsi b/arch/arm/boot/dts/rk3xxx.dtsi index 859a7477909f..5edc46a5585c 100644 --- a/arch/arm/boot/dts/rk3xxx.dtsi +++ b/arch/arm/boot/dts/rk3xxx.dtsi @@ -111,6 +111,13 @@ reg = <0x1013c200 0x20>; interrupts = ; clocks = <&cru CORE_PERI>; + status = "disabled"; + /* The clock source and the sched_clock provided by the arm_global_timer + * on Rockchip rk3066a/rk3188 are quite unstable because their rates + * depend on the CPU frequency. + * Keep the arm_global_timer disabled in order to have the + * DW_APB_TIMER (rk3066a) or ROCKCHIP_TIMER (rk3188) selected by default. + */ }; local_timer: local-timer@1013c600 { From 1331bcfcac181eaa4eaae82055c79791c02103e3 Mon Sep 17 00:00:00 2001 From: GUO Zihua Date: Thu, 17 Nov 2022 17:11:59 +0800 Subject: [PATCH 1202/1477] 9p/fd: Use P9_HDRSZ for header size [ Upstream commit 6854fadbeee10891ed74246bdc05031906b6c8cf ] Cleanup hardcoded header sizes to use P9_HDRSZ instead of '7' Link: https://lkml.kernel.org/r/20221117091159.31533-4-guozihua@huawei.com Signed-off-by: GUO Zihua Reviewed-by: Christian Schoenebeck [Dominique: commit message adjusted to make sense after offset size adjustment got removed] Signed-off-by: Dominique Martinet Signed-off-by: Sasha Levin --- net/9p/trans_fd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index deb66635f0f3..e070a0b8e5ca 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -118,7 +118,7 @@ struct p9_conn { struct list_head unsent_req_list; struct p9_req_t *rreq; struct p9_req_t *wreq; - char tmp_buf[7]; + char tmp_buf[P9_HDRSZ]; struct p9_fcall rc; int wpos; int wsize; @@ -291,7 +291,7 @@ static void p9_read_work(struct work_struct *work) if (!m->rc.sdata) { m->rc.sdata = m->tmp_buf; m->rc.offset = 0; - m->rc.capacity = 7; /* start by reading header */ + m->rc.capacity = P9_HDRSZ; /* start by reading header */ } clear_bit(Rpending, &m->wsched); @@ -314,7 +314,7 @@ static void p9_read_work(struct work_struct *work) p9_debug(P9_DEBUG_TRANS, "got new header\n"); /* Header size */ - m->rc.size = 7; + m->rc.size = P9_HDRSZ; err = p9_parse_header(&m->rc, &m->rc.size, NULL, NULL, 0); if (err) { p9_debug(P9_DEBUG_ERROR, From d38e021416b2853b8efa71e79054b9570234431b Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Fri, 18 Nov 2022 14:10:35 +0100 Subject: [PATCH 1203/1477] regulator: slg51000: Wait after asserting CS pin [ Upstream commit 0b24dfa587c6cc7484cfb170da5c7dd73451f670 ] Sony's downstream driver [1], among some other changes, adds a seemingly random 10ms usleep_range, which turned out to be necessary for the hardware to function properly on at least Sony Xperia 1 IV. Without this, I2C transactions with the SLG51000 straight up fail. Relax (10-10ms -> 10-11ms) and add the aforementioned sleep to make sure the hardware has some time to wake up. (nagara-2.0.0-mlc/vendor/semc/hardware/camera-kernel-module/) [1] https://developer.sony.com/file/download/open-source-archive-for-64-0-m-4-29/ Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20221118131035.54874-1-konrad.dybcio@linaro.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/regulator/slg51000-regulator.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/regulator/slg51000-regulator.c b/drivers/regulator/slg51000-regulator.c index 75a941fb3c2b..1b2eee95ad3f 100644 --- a/drivers/regulator/slg51000-regulator.c +++ b/drivers/regulator/slg51000-regulator.c @@ -457,6 +457,8 @@ static int slg51000_i2c_probe(struct i2c_client *client) chip->cs_gpiod = cs_gpiod; } + usleep_range(10000, 11000); + i2c_set_clientdata(client, chip); chip->chip_irq = client->irq; chip->dev = dev; From 15c42ab8d43acb73e2eba361ad05822c0af0ecfa Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 18 Nov 2022 15:23:50 -0800 Subject: [PATCH 1204/1477] ALSA: seq: Fix function prototype mismatch in snd_seq_expand_var_event [ Upstream commit 05530ef7cf7c7d700f6753f058999b1b5099a026 ] With clang's kernel control flow integrity (kCFI, CONFIG_CFI_CLANG), indirect call targets are validated against the expected function pointer prototype to make sure the call target is valid to help mitigate ROP attacks. If they are not identical, there is a failure at run time, which manifests as either a kernel panic or thread getting killed. seq_copy_in_user() and seq_copy_in_kernel() did not have prototypes matching snd_seq_dump_func_t. Adjust this and remove the casts. There are not resulting binary output differences. This was found as a result of Clang's new -Wcast-function-type-strict flag, which is more sensitive than the simpler -Wcast-function-type, which only checks for type width mismatches. Reported-by: kernel test robot Link: https://lore.kernel.org/lkml/202211041527.HD8TLSE1-lkp@intel.com Cc: Jaroslav Kysela Cc: Takashi Iwai Cc: "Gustavo A. R. Silva" Cc: alsa-devel@alsa-project.org Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20221118232346.never.380-kees@kernel.org Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/core/seq/seq_memory.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/sound/core/seq/seq_memory.c b/sound/core/seq/seq_memory.c index 65db1a7c77b7..bb76a2dd0a2f 100644 --- a/sound/core/seq/seq_memory.c +++ b/sound/core/seq/seq_memory.c @@ -112,15 +112,19 @@ EXPORT_SYMBOL(snd_seq_dump_var_event); * expand the variable length event to linear buffer space. */ -static int seq_copy_in_kernel(char **bufptr, const void *src, int size) +static int seq_copy_in_kernel(void *ptr, void *src, int size) { + char **bufptr = ptr; + memcpy(*bufptr, src, size); *bufptr += size; return 0; } -static int seq_copy_in_user(char __user **bufptr, const void *src, int size) +static int seq_copy_in_user(void *ptr, void *src, int size) { + char __user **bufptr = ptr; + if (copy_to_user(*bufptr, src, size)) return -EFAULT; *bufptr += size; @@ -149,8 +153,7 @@ int snd_seq_expand_var_event(const struct snd_seq_event *event, int count, char return newlen; } err = snd_seq_dump_var_event(event, - in_kernel ? (snd_seq_dump_func_t)seq_copy_in_kernel : - (snd_seq_dump_func_t)seq_copy_in_user, + in_kernel ? seq_copy_in_kernel : seq_copy_in_user, &buf); return err < 0 ? err : newlen; } From 688a45aff2b25c1af7095944e52556eea8666edc Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 15 Nov 2022 16:29:44 +0000 Subject: [PATCH 1205/1477] btrfs: send: avoid unaligned encoded writes when attempting to clone range [ Upstream commit a11452a3709e217492798cf3686ac2cc8eb3fb51 ] When trying to see if we can clone a file range, there are cases where we end up sending two write operations in case the inode from the source root has an i_size that is not sector size aligned and the length from the current offset to its i_size is less than the remaining length we are trying to clone. Issuing two write operations when we could instead issue a single write operation is not incorrect. However it is not optimal, specially if the extents are compressed and the flag BTRFS_SEND_FLAG_COMPRESSED was passed to the send ioctl. In that case we can end up sending an encoded write with an offset that is not sector size aligned, which makes the receiver fallback to decompressing the data and writing it using regular buffered IO (so re-compressing the data in case the fs is mounted with compression enabled), because encoded writes fail with -EINVAL when an offset is not sector size aligned. The following example, which triggered a bug in the receiver code for the fallback logic of decompressing + regular buffer IO and is fixed by the patchset referred in a Link at the bottom of this changelog, is an example where we have the non-optimal behaviour due to an unaligned encoded write: $ cat test.sh #!/bin/bash DEV=/dev/sdj MNT=/mnt/sdj mkfs.btrfs -f $DEV > /dev/null mount -o compress $DEV $MNT # File foo has a size of 33K, not aligned to the sector size. xfs_io -f -c "pwrite -S 0xab 0 33K" $MNT/foo xfs_io -f -c "pwrite -S 0xcd 0 64K" $MNT/bar # Now clone the first 32K of file bar into foo at offset 0. xfs_io -c "reflink $MNT/bar 0 0 32K" $MNT/foo # Snapshot the default subvolume and create a full send stream (v2). btrfs subvolume snapshot -r $MNT $MNT/snap btrfs send --compressed-data -f /tmp/test.send $MNT/snap echo -e "\nFile bar in the original filesystem:" od -A d -t x1 $MNT/snap/bar umount $MNT mkfs.btrfs -f $DEV > /dev/null mount $DEV $MNT echo -e "\nReceiving stream in a new filesystem..." btrfs receive -f /tmp/test.send $MNT echo -e "\nFile bar in the new filesystem:" od -A d -t x1 $MNT/snap/bar umount $MNT Before this patch, the send stream included one regular write and one encoded write for file 'bar', with the later being not sector size aligned and causing the receiver to fallback to decompression + buffered writes. The output of the btrfs receive command in verbose mode (-vvv): (...) mkfile o258-7-0 rename o258-7-0 -> bar utimes clone bar - source=foo source offset=0 offset=0 length=32768 write bar - offset=32768 length=1024 encoded_write bar - offset=33792, len=4096, unencoded_offset=33792, unencoded_file_len=31744, unencoded_len=65536, compression=1, encryption=0 encoded_write bar - falling back to decompress and write due to errno 22 ("Invalid argument") (...) This patch avoids the regular write followed by an unaligned encoded write so that we end up sending a single encoded write that is aligned. So after this patch the stream content is (output of btrfs receive -vvv): (...) mkfile o258-7-0 rename o258-7-0 -> bar utimes clone bar - source=foo source offset=0 offset=0 length=32768 encoded_write bar - offset=32768, len=4096, unencoded_offset=32768, unencoded_file_len=32768, unencoded_len=65536, compression=1, encryption=0 (...) So we get more optimal behaviour and avoid the silent data loss bug in versions of btrfs-progs affected by the bug referred by the Link tag below (btrfs-progs v5.19, v5.19.1, v6.0 and v6.0.1). Link: https://lore.kernel.org/linux-btrfs/cover.1668529099.git.fdmanana@suse.com/ Reviewed-by: Boris Burkov Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/send.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 6b80dee17f49..4a6ba0997e39 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -5398,6 +5398,7 @@ static int clone_range(struct send_ctx *sctx, u64 ext_len; u64 clone_len; u64 clone_data_offset; + bool crossed_src_i_size = false; if (slot >= btrfs_header_nritems(leaf)) { ret = btrfs_next_leaf(clone_root->root, path); @@ -5454,8 +5455,10 @@ static int clone_range(struct send_ctx *sctx, if (key.offset >= clone_src_i_size) break; - if (key.offset + ext_len > clone_src_i_size) + if (key.offset + ext_len > clone_src_i_size) { ext_len = clone_src_i_size - key.offset; + crossed_src_i_size = true; + } clone_data_offset = btrfs_file_extent_offset(leaf, ei); if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte) { @@ -5515,6 +5518,25 @@ static int clone_range(struct send_ctx *sctx, ret = send_clone(sctx, offset, clone_len, clone_root); } + } else if (crossed_src_i_size && clone_len < len) { + /* + * If we are at i_size of the clone source inode and we + * can not clone from it, terminate the loop. This is + * to avoid sending two write operations, one with a + * length matching clone_len and the final one after + * this loop with a length of len - clone_len. + * + * When using encoded writes (BTRFS_SEND_FLAG_COMPRESSED + * was passed to the send ioctl), this helps avoid + * sending an encoded write for an offset that is not + * sector size aligned, in case the i_size of the source + * inode is not sector size aligned. That will make the + * receiver fallback to decompression of the data and + * writing it using regular buffered IO, therefore while + * not incorrect, it's not optimal due decompression and + * possible re-compression at the receiver. + */ + break; } else { ret = send_extent_data(sctx, offset, clone_len); } From f6f45e538328df9ce66aa61bafee1a5717c4b700 Mon Sep 17 00:00:00 2001 From: Srinivasa Rao Mandadapu Date: Tue, 22 Nov 2022 12:01:13 +0530 Subject: [PATCH 1206/1477] ASoC: soc-pcm: Add NULL check in BE reparenting [ Upstream commit db8f91d424fe0ea6db337aca8bc05908bbce1498 ] Add NULL check in dpcm_be_reparent API, to handle kernel NULL pointer dereference error. The issue occurred in fuzzing test. Signed-off-by: Srinivasa Rao Mandadapu Link: https://lore.kernel.org/r/1669098673-29703-1-git-send-email-quic_srivasam@quicinc.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/soc-pcm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 0e2261ee07b6..fb874f924bbe 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -1154,6 +1154,8 @@ static void dpcm_be_reparent(struct snd_soc_pcm_runtime *fe, return; be_substream = snd_soc_dpcm_get_substream(be, stream); + if (!be_substream) + return; for_each_dpcm_fe(be, stream, dpcm) { if (dpcm->fe == fe) From 8b130c770d00cae3774e356b8a38c741e2240012 Mon Sep 17 00:00:00 2001 From: Andreas Kemnade Date: Sun, 20 Nov 2022 23:12:08 +0100 Subject: [PATCH 1207/1477] regulator: twl6030: fix get status of twl6032 regulators [ Upstream commit 31a6297b89aabc81b274c093a308a7f5b55081a7 ] Status is reported as always off in the 6032 case. Status reporting now matches the logic in the setters. Once of the differences to the 6030 is that there are no groups, therefore the state needs to be read out in the lower bits. Signed-off-by: Andreas Kemnade Link: https://lore.kernel.org/r/20221120221208.3093727-3-andreas@kemnade.info Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/regulator/twl6030-regulator.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/regulator/twl6030-regulator.c b/drivers/regulator/twl6030-regulator.c index 7c7e3648ea4b..f3856750944f 100644 --- a/drivers/regulator/twl6030-regulator.c +++ b/drivers/regulator/twl6030-regulator.c @@ -67,6 +67,7 @@ struct twlreg_info { #define TWL6030_CFG_STATE_SLEEP 0x03 #define TWL6030_CFG_STATE_GRP_SHIFT 5 #define TWL6030_CFG_STATE_APP_SHIFT 2 +#define TWL6030_CFG_STATE_MASK 0x03 #define TWL6030_CFG_STATE_APP_MASK (0x03 << TWL6030_CFG_STATE_APP_SHIFT) #define TWL6030_CFG_STATE_APP(v) (((v) & TWL6030_CFG_STATE_APP_MASK) >>\ TWL6030_CFG_STATE_APP_SHIFT) @@ -128,13 +129,14 @@ static int twl6030reg_is_enabled(struct regulator_dev *rdev) if (grp < 0) return grp; grp &= P1_GRP_6030; + val = twlreg_read(info, TWL_MODULE_PM_RECEIVER, VREG_STATE); + val = TWL6030_CFG_STATE_APP(val); } else { + val = twlreg_read(info, TWL_MODULE_PM_RECEIVER, VREG_STATE); + val &= TWL6030_CFG_STATE_MASK; grp = 1; } - val = twlreg_read(info, TWL_MODULE_PM_RECEIVER, VREG_STATE); - val = TWL6030_CFG_STATE_APP(val); - return grp && (val == TWL6030_CFG_STATE_ON); } @@ -187,7 +189,12 @@ static int twl6030reg_get_status(struct regulator_dev *rdev) val = twlreg_read(info, TWL_MODULE_PM_RECEIVER, VREG_STATE); - switch (TWL6030_CFG_STATE_APP(val)) { + if (info->features & TWL6032_SUBCLASS) + val &= TWL6030_CFG_STATE_MASK; + else + val = TWL6030_CFG_STATE_APP(val); + + switch (val) { case TWL6030_CFG_STATE_ON: return REGULATOR_STATUS_NORMAL; From d9b53caf0191cee24afd05ca6c83ed873199b52d Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 18 Nov 2022 00:27:58 +0900 Subject: [PATCH 1208/1477] fbcon: Use kzalloc() in fbcon_prepare_logo() [ Upstream commit a6a00d7e8ffd78d1cdb7a43f1278f081038c638f ] A kernel built with syzbot's config file reported that scr_memcpyw(q, save, array3_size(logo_lines, new_cols, 2)) causes uninitialized "save" to be copied. ---------- [drm] Initialized vgem 1.0.0 20120112 for vgem on minor 0 [drm] Initialized vkms 1.0.0 20180514 for vkms on minor 1 Console: switching to colour frame buffer device 128x48 ===================================================== BUG: KMSAN: uninit-value in do_update_region+0x4b8/0xba0 do_update_region+0x4b8/0xba0 update_region+0x40d/0x840 fbcon_switch+0x3364/0x35e0 redraw_screen+0xae3/0x18a0 do_bind_con_driver+0x1cb3/0x1df0 do_take_over_console+0x11cb/0x13f0 fbcon_fb_registered+0xacc/0xfd0 register_framebuffer+0x1179/0x1320 __drm_fb_helper_initial_config_and_unlock+0x23ad/0x2b40 drm_fbdev_client_hotplug+0xbea/0xda0 drm_fbdev_generic_setup+0x65e/0x9d0 vkms_init+0x9f3/0xc76 (...snipped...) Uninit was stored to memory at: fbcon_prepare_logo+0x143b/0x1940 fbcon_init+0x2c1b/0x31c0 visual_init+0x3e7/0x820 do_bind_con_driver+0x14a4/0x1df0 do_take_over_console+0x11cb/0x13f0 fbcon_fb_registered+0xacc/0xfd0 register_framebuffer+0x1179/0x1320 __drm_fb_helper_initial_config_and_unlock+0x23ad/0x2b40 drm_fbdev_client_hotplug+0xbea/0xda0 drm_fbdev_generic_setup+0x65e/0x9d0 vkms_init+0x9f3/0xc76 (...snipped...) Uninit was created at: __kmem_cache_alloc_node+0xb69/0x1020 __kmalloc+0x379/0x680 fbcon_prepare_logo+0x704/0x1940 fbcon_init+0x2c1b/0x31c0 visual_init+0x3e7/0x820 do_bind_con_driver+0x14a4/0x1df0 do_take_over_console+0x11cb/0x13f0 fbcon_fb_registered+0xacc/0xfd0 register_framebuffer+0x1179/0x1320 __drm_fb_helper_initial_config_and_unlock+0x23ad/0x2b40 drm_fbdev_client_hotplug+0xbea/0xda0 drm_fbdev_generic_setup+0x65e/0x9d0 vkms_init+0x9f3/0xc76 (...snipped...) CPU: 2 PID: 1 Comm: swapper/0 Not tainted 6.1.0-rc4-00356-g8f2975c2bb4c #924 Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 ---------- Signed-off-by: Tetsuo Handa Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/cad03d25-0ea0-32c4-8173-fd1895314bce@I-love.SAKURA.ne.jp Signed-off-by: Sasha Levin --- drivers/video/fbdev/core/fbcon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index 2618d3beef64..27828435dd4f 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -609,7 +609,7 @@ static void fbcon_prepare_logo(struct vc_data *vc, struct fb_info *info, if (scr_readw(r) != vc->vc_video_erase_char) break; if (r != q && new_rows >= rows + logo_lines) { - save = kmalloc(array3_size(logo_lines, new_cols, 2), + save = kzalloc(array3_size(logo_lines, new_cols, 2), GFP_KERNEL); if (save) { int i = cols < new_cols ? cols : new_cols; From ec36ebae3667326ab17e544aab9a634cc9d545ea Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Wed, 9 Nov 2022 17:58:50 -0800 Subject: [PATCH 1209/1477] usb: dwc3: gadget: Disable GUSB2PHYCFG.SUSPHY for End Transfer [ Upstream commit 3aa07f72894d209fcf922ad686cbb28cf005aaad ] If there's a disconnection while operating in eSS, there may be a delay in VBUS drop response from the connector. In that case, the internal link state may drop to operate in usb2 speed while the controller thinks the VBUS is still high. The driver must make sure to disable GUSB2PHYCFG.SUSPHY when sending endpoint command while in usb2 speed. The End Transfer command may be called, and only that command needs to go through at this point. Let's keep it simple and unconditionally disable GUSB2PHYCFG.SUSPHY whenever we issue the command. This scenario is not seen in real hardware. In a rare case, our prototype type-c controller/interface may have a slow response triggerring this issue. Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/5651117207803c26e2f22ddf4e5ce9e865dcf7c7.1668045468.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/dwc3/gadget.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index a9a43d649478..28a1194f849f 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -291,7 +291,8 @@ int dwc3_send_gadget_ep_cmd(struct dwc3_ep *dep, unsigned int cmd, * * DWC_usb3 3.30a and DWC_usb31 1.90a programming guide section 3.2.2 */ - if (dwc->gadget->speed <= USB_SPEED_HIGH) { + if (dwc->gadget->speed <= USB_SPEED_HIGH || + DWC3_DEPCMD_CMD(cmd) == DWC3_DEPCMD_ENDTRANSFER) { reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)); if (unlikely(reg & DWC3_GUSB2PHYCFG_SUSPHY)) { saved_config |= DWC3_GUSB2PHYCFG_SUSPHY; From a8c5ffb4dffd30ae663497e1cedfcee0a1b65b71 Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Fri, 18 Nov 2022 22:44:41 +0900 Subject: [PATCH 1210/1477] 9p/xen: check logical size for buffer size [ Upstream commit 391c18cf776eb4569ecda1f7794f360fe0a45a26 ] trans_xen did not check the data fits into the buffer before copying from the xen ring, but we probably should. Add a check that just skips the request and return an error to userspace if it did not fit Tested-by: Stefano Stabellini Reviewed-by: Christian Schoenebeck Link: https://lkml.kernel.org/r/20221118135542.63400-1-asmadeus@codewreck.org Signed-off-by: Dominique Martinet Signed-off-by: Sasha Levin --- net/9p/trans_xen.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c index 432ac5a16f2e..6c8a33f98f09 100644 --- a/net/9p/trans_xen.c +++ b/net/9p/trans_xen.c @@ -231,6 +231,14 @@ static void p9_xen_response(struct work_struct *work) continue; } + if (h.size > req->rc.capacity) { + dev_warn(&priv->dev->dev, + "requested packet size too big: %d for tag %d with capacity %zd\n", + h.size, h.tag, req->rc.capacity); + req->status = REQ_STATUS_ERROR; + goto recv_error; + } + memcpy(&req->rc, &h, sizeof(h)); req->rc.offset = 0; @@ -240,6 +248,7 @@ static void p9_xen_response(struct work_struct *work) masked_prod, &masked_cons, XEN_9PFS_RING_SIZE(ring)); +recv_error: virt_mb(); cons += h.size; ring->intf->in_cons = cons; From 1c0eec6a1d17349c851cf74ea151f4aff5a2fc0b Mon Sep 17 00:00:00 2001 From: Davide Tronchin Date: Mon, 21 Nov 2022 13:54:55 +0100 Subject: [PATCH 1211/1477] net: usb: qmi_wwan: add u-blox 0x1342 composition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a487069e11b6527373f7c6f435d8998051d0b5d9 ] Add RmNet support for LARA-L6. LARA-L6 module can be configured (by AT interface) in three different USB modes: * Default mode (Vendor ID: 0x1546 Product ID: 0x1341) with 4 serial interfaces * RmNet mode (Vendor ID: 0x1546 Product ID: 0x1342) with 4 serial interfaces and 1 RmNet virtual network interface * CDC-ECM mode (Vendor ID: 0x1546 Product ID: 0x1343) with 4 serial interface and 1 CDC-ECM virtual network interface In RmNet mode LARA-L6 exposes the following interfaces: If 0: Diagnostic If 1: AT parser If 2: AT parser If 3: AT parset/alternative functions If 4: RMNET interface Signed-off-by: Davide Tronchin Acked-by: Bjørn Mork Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 7313e6e03c12..bce151e3706a 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1352,6 +1352,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x0489, 0xe0b4, 0)}, /* Foxconn T77W968 LTE */ {QMI_FIXED_INTF(0x0489, 0xe0b5, 0)}, /* Foxconn T77W968 LTE with eSIM support*/ {QMI_FIXED_INTF(0x2692, 0x9025, 4)}, /* Cellient MPL200 (rebranded Qualcomm 05c6:9025) */ + {QMI_QUIRK_SET_DTR(0x1546, 0x1342, 4)}, /* u-blox LARA-L6 */ /* 4. Gobi 1000 devices */ {QMI_GOBI1K_DEVICE(0x05c6, 0x9212)}, /* Acer Gobi Modem Device */ From cdfd3739b2129f1bff2da552877e4a4b28c6e17c Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Tue, 6 Dec 2022 18:16:06 +0100 Subject: [PATCH 1212/1477] mm/khugepaged: take the right locks for page table retraction commit 8d3c106e19e8d251da31ff4cc7462e4565d65084 upstream. pagetable walks on address ranges mapped by VMAs can be done under the mmap lock, the lock of an anon_vma attached to the VMA, or the lock of the VMA's address_space. Only one of these needs to be held, and it does not need to be held in exclusive mode. Under those circumstances, the rules for concurrent access to page table entries are: - Terminal page table entries (entries that don't point to another page table) can be arbitrarily changed under the page table lock, with the exception that they always need to be consistent for hardware page table walks and lockless_pages_from_mm(). This includes that they can be changed into non-terminal entries. - Non-terminal page table entries (which point to another page table) can not be modified; readers are allowed to READ_ONCE() an entry, verify that it is non-terminal, and then assume that its value will stay as-is. Retracting a page table involves modifying a non-terminal entry, so page-table-level locks are insufficient to protect against concurrent page table traversal; it requires taking all the higher-level locks under which it is possible to start a page walk in the relevant range in exclusive mode. The collapse_huge_page() path for anonymous THP already follows this rule, but the shmem/file THP path was getting it wrong, making it possible for concurrent rmap-based operations to cause corruption. Link: https://lkml.kernel.org/r/20221129154730.2274278-1-jannh@google.com Link: https://lkml.kernel.org/r/20221128180252.1684965-1-jannh@google.com Link: https://lkml.kernel.org/r/20221125213714.4115729-1-jannh@google.com Fixes: 27e1f8273113 ("khugepaged: enable collapse pmd for pte-mapped THP") Signed-off-by: Jann Horn Reviewed-by: Yang Shi Acked-by: David Hildenbrand Cc: John Hubbard Cc: Peter Xu Cc: Signed-off-by: Andrew Morton [manual backport: this code was refactored from two copies into a common helper between 5.15 and 6.0] Signed-off-by: Jann Horn Signed-off-by: Sasha Levin --- mm/khugepaged.c | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index cf4dceb9682b..014e8b259313 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1457,6 +1457,14 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) if (!hugepage_vma_check(vma, vma->vm_flags | VM_HUGEPAGE)) return; + /* + * Symmetry with retract_page_tables(): Exclude MAP_PRIVATE mappings + * that got written to. Without this, we'd have to also lock the + * anon_vma if one exists. + */ + if (vma->anon_vma) + return; + hpage = find_lock_page(vma->vm_file->f_mapping, linear_page_index(vma, haddr)); if (!hpage) @@ -1469,6 +1477,19 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) if (!pmd) goto drop_hpage; + /* + * We need to lock the mapping so that from here on, only GUP-fast and + * hardware page walks can access the parts of the page tables that + * we're operating on. + */ + i_mmap_lock_write(vma->vm_file->f_mapping); + + /* + * This spinlock should be unnecessary: Nobody else should be accessing + * the page tables under spinlock protection here, only + * lockless_pages_from_mm() and the hardware page walker can access page + * tables while all the high-level locks are held in write mode. + */ start_pte = pte_offset_map_lock(mm, pmd, haddr, &ptl); /* step 1: check all mapped PTEs are to the right huge page */ @@ -1515,12 +1536,12 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) } /* step 4: collapse pmd */ - ptl = pmd_lock(vma->vm_mm, pmd); _pmd = pmdp_collapse_flush(vma, haddr, pmd); - spin_unlock(ptl); mm_dec_nr_ptes(mm); pte_free(mm, pmd_pgtable(_pmd)); + i_mmap_unlock_write(vma->vm_file->f_mapping); + drop_hpage: unlock_page(hpage); put_page(hpage); @@ -1528,6 +1549,7 @@ drop_hpage: abort: pte_unmap_unlock(start_pte, ptl); + i_mmap_unlock_write(vma->vm_file->f_mapping); goto drop_hpage; } @@ -1577,7 +1599,8 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) * An alternative would be drop the check, but check that page * table is clear before calling pmdp_collapse_flush() under * ptl. It has higher chance to recover THP for the VMA, but - * has higher cost too. + * has higher cost too. It would also probably require locking + * the anon_vma. */ if (vma->anon_vma) continue; @@ -1599,10 +1622,8 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) */ if (mmap_write_trylock(mm)) { if (!khugepaged_test_exit(mm)) { - spinlock_t *ptl = pmd_lock(mm, pmd); /* assume page table is clear */ _pmd = pmdp_collapse_flush(vma, addr, pmd); - spin_unlock(ptl); mm_dec_nr_ptes(mm); pte_free(mm, pmd_pgtable(_pmd)); } From 4a1cdb49d0f2e865573d822ada4843f40f00bc8e Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Tue, 6 Dec 2022 18:16:04 +0100 Subject: [PATCH 1213/1477] mm/khugepaged: fix GUP-fast interaction by sending IPI commit 2ba99c5e08812494bc57f319fb562f527d9bacd8 upstream. Since commit 70cbc3cc78a99 ("mm: gup: fix the fast GUP race against THP collapse"), the lockless_pages_from_mm() fastpath rechecks the pmd_t to ensure that the page table was not removed by khugepaged in between. However, lockless_pages_from_mm() still requires that the page table is not concurrently freed. Fix it by sending IPIs (if the architecture uses semi-RCU-style page table freeing) before freeing/reusing page tables. Link: https://lkml.kernel.org/r/20221129154730.2274278-2-jannh@google.com Link: https://lkml.kernel.org/r/20221128180252.1684965-2-jannh@google.com Link: https://lkml.kernel.org/r/20221125213714.4115729-2-jannh@google.com Fixes: ba76149f47d8 ("thp: khugepaged") Signed-off-by: Jann Horn Reviewed-by: Yang Shi Acked-by: David Hildenbrand Cc: John Hubbard Cc: Peter Xu Cc: Signed-off-by: Andrew Morton [manual backport: two of the three places in khugepaged that can free ptes were refactored into a common helper between 5.15 and 6.0] Signed-off-by: Jann Horn Signed-off-by: Sasha Levin --- include/asm-generic/tlb.h | 4 ++++ mm/khugepaged.c | 3 +++ mm/mmu_gather.c | 4 +--- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index a0c4b99d2899..f40c9534f20b 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -205,12 +205,16 @@ extern void tlb_remove_table(struct mmu_gather *tlb, void *table); #define tlb_needs_table_invalidate() (true) #endif +void tlb_remove_table_sync_one(void); + #else #ifdef tlb_needs_table_invalidate #error tlb_needs_table_invalidate() requires MMU_GATHER_RCU_TABLE_FREE #endif +static inline void tlb_remove_table_sync_one(void) { } + #endif /* CONFIG_MMU_GATHER_RCU_TABLE_FREE */ diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 014e8b259313..0268b549bd60 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1154,6 +1154,7 @@ static void collapse_huge_page(struct mm_struct *mm, _pmd = pmdp_collapse_flush(vma, address, pmd); spin_unlock(pmd_ptl); mmu_notifier_invalidate_range_end(&range); + tlb_remove_table_sync_one(); spin_lock(pte_ptl); isolated = __collapse_huge_page_isolate(vma, address, pte, @@ -1538,6 +1539,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) /* step 4: collapse pmd */ _pmd = pmdp_collapse_flush(vma, haddr, pmd); mm_dec_nr_ptes(mm); + tlb_remove_table_sync_one(); pte_free(mm, pmd_pgtable(_pmd)); i_mmap_unlock_write(vma->vm_file->f_mapping); @@ -1625,6 +1627,7 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) /* assume page table is clear */ _pmd = pmdp_collapse_flush(vma, addr, pmd); mm_dec_nr_ptes(mm); + tlb_remove_table_sync_one(); pte_free(mm, pmd_pgtable(_pmd)); } mmap_write_unlock(mm); diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c index 03c33c93a582..205fdbb5792a 100644 --- a/mm/mmu_gather.c +++ b/mm/mmu_gather.c @@ -139,7 +139,7 @@ static void tlb_remove_table_smp_sync(void *arg) /* Simply deliver the interrupt */ } -static void tlb_remove_table_sync_one(void) +void tlb_remove_table_sync_one(void) { /* * This isn't an RCU grace period and hence the page-tables cannot be @@ -163,8 +163,6 @@ static void tlb_remove_table_free(struct mmu_table_batch *batch) #else /* !CONFIG_MMU_GATHER_RCU_TABLE_FREE */ -static void tlb_remove_table_sync_one(void) { } - static void tlb_remove_table_free(struct mmu_table_batch *batch) { __tlb_remove_table_free(batch); From 7f445ca2e0e59c7971d0b7b853465e50844ab596 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Tue, 6 Dec 2022 18:16:05 +0100 Subject: [PATCH 1214/1477] mm/khugepaged: invoke MMU notifiers in shmem/file collapse paths commit f268f6cf875f3220afc77bdd0bf1bb136eb54db9 upstream. Any codepath that zaps page table entries must invoke MMU notifiers to ensure that secondary MMUs (like KVM) don't keep accessing pages which aren't mapped anymore. Secondary MMUs don't hold their own references to pages that are mirrored over, so failing to notify them can lead to page use-after-free. I'm marking this as addressing an issue introduced in commit f3f0e1d2150b ("khugepaged: add support of collapse for tmpfs/shmem pages"), but most of the security impact of this only came in commit 27e1f8273113 ("khugepaged: enable collapse pmd for pte-mapped THP"), which actually omitted flushes for the removal of present PTEs, not just for the removal of empty page tables. Link: https://lkml.kernel.org/r/20221129154730.2274278-3-jannh@google.com Link: https://lkml.kernel.org/r/20221128180252.1684965-3-jannh@google.com Link: https://lkml.kernel.org/r/20221125213714.4115729-3-jannh@google.com Fixes: f3f0e1d2150b ("khugepaged: add support of collapse for tmpfs/shmem pages") Signed-off-by: Jann Horn Acked-by: David Hildenbrand Reviewed-by: Yang Shi Cc: John Hubbard Cc: Peter Xu Cc: Signed-off-by: Andrew Morton [manual backport: this code was refactored from two copies into a common helper between 5.15 and 6.0] Signed-off-by: Jann Horn Signed-off-by: Sasha Levin --- mm/khugepaged.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 0268b549bd60..0eb3adf4ff68 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1444,6 +1444,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) spinlock_t *ptl; int count = 0; int i; + struct mmu_notifier_range range; if (!vma || !vma->vm_file || vma->vm_start > haddr || vma->vm_end < haddr + HPAGE_PMD_SIZE) @@ -1537,9 +1538,13 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) } /* step 4: collapse pmd */ + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, NULL, mm, haddr, + haddr + HPAGE_PMD_SIZE); + mmu_notifier_invalidate_range_start(&range); _pmd = pmdp_collapse_flush(vma, haddr, pmd); mm_dec_nr_ptes(mm); tlb_remove_table_sync_one(); + mmu_notifier_invalidate_range_end(&range); pte_free(mm, pmd_pgtable(_pmd)); i_mmap_unlock_write(vma->vm_file->f_mapping); @@ -1624,11 +1629,19 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) */ if (mmap_write_trylock(mm)) { if (!khugepaged_test_exit(mm)) { + struct mmu_notifier_range range; + + mmu_notifier_range_init(&range, + MMU_NOTIFY_CLEAR, 0, + NULL, mm, addr, + addr + HPAGE_PMD_SIZE); + mmu_notifier_invalidate_range_start(&range); /* assume page table is clear */ _pmd = pmdp_collapse_flush(vma, addr, pmd); mm_dec_nr_ptes(mm); tlb_remove_table_sync_one(); pte_free(mm, pmd_pgtable(_pmd)); + mmu_notifier_invalidate_range_end(&range); } mmap_write_unlock(mm); } else { From 7c7075c88da4e108deb80257a3a5352ddf22ba4c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 6 Dec 2020 22:46:14 +0100 Subject: [PATCH 1215/1477] rtc: mc146818: Prevent reading garbage [ Upstream commit 05a0302c35481e9b47fb90ba40922b0a4cae40d8 ] The MC146818 driver is prone to read garbage from the RTC. There are several issues all related to the update cycle of the MC146818. The chip increments seconds obviously once per second and indicates that by a bit in a register. The bit goes high 244us before the actual update starts. During the update the readout of the time values is undefined. The code just checks whether the update in progress bit (UIP) is set before reading the clock. If it's set it waits arbitrary 20ms before retrying, which is ample because the maximum update time is ~2ms. But this check does not guarantee that the UIP bit goes high and the actual update happens during the readout. So the following can happen 0.997 UIP = False -> Interrupt/NMI/preemption 0.998 UIP -> True 0.999 Readout <- Undefined To prevent this rework the code so it checks UIP before and after the readout and if set after the readout try again. But that's not enough to cover the following: 0.997 UIP = False Readout seconds -> NMI (or vCPU scheduled out) 0.998 UIP -> True update completes UIP -> False 1.000 Readout minutes,.... UIP check succeeds That can make the readout wrong up to 59 seconds. To prevent this, read the seconds value before the first UIP check, validate it after checking UIP and after reading out the rest. It's amazing that the original i386 code had this actually correct and the generic implementation of the MC146818 driver got it wrong in 2002 and it stayed that way until today. Signed-off-by: Thomas Gleixner Acked-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201206220541.594826678@linutronix.de Stable-dep-of: cd17420ebea5 ("rtc: cmos: avoid UIP when writing alarm time") Signed-off-by: Sasha Levin --- drivers/rtc/rtc-mc146818-lib.c | 64 +++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 25 deletions(-) diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c index b036ff33fbe6..8364e4141670 100644 --- a/drivers/rtc/rtc-mc146818-lib.c +++ b/drivers/rtc/rtc-mc146818-lib.c @@ -8,41 +8,41 @@ #include #endif -/* - * Returns true if a clock update is in progress - */ -static inline unsigned char mc146818_is_updating(void) -{ - unsigned char uip; - unsigned long flags; - - spin_lock_irqsave(&rtc_lock, flags); - uip = (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP); - spin_unlock_irqrestore(&rtc_lock, flags); - return uip; -} - unsigned int mc146818_get_time(struct rtc_time *time) { unsigned char ctrl; unsigned long flags; unsigned char century = 0; + bool retry; #ifdef CONFIG_MACH_DECSTATION unsigned int real_year; #endif +again: + spin_lock_irqsave(&rtc_lock, flags); /* - * read RTC once any update in progress is done. The update - * can take just over 2ms. We wait 20ms. There is no need to - * to poll-wait (up to 1s - eeccch) for the falling edge of RTC_UIP. - * If you need to know *exactly* when a second has started, enable - * periodic update complete interrupts, (via ioctl) and then - * immediately read /dev/rtc which will block until you get the IRQ. - * Once the read clears, read the RTC time (again via ioctl). Easy. + * Check whether there is an update in progress during which the + * readout is unspecified. The maximum update time is ~2ms. Poll + * every msec for completion. + * + * Store the second value before checking UIP so a long lasting NMI + * which happens to hit after the UIP check cannot make an update + * cycle invisible. */ - if (mc146818_is_updating()) - mdelay(20); + time->tm_sec = CMOS_READ(RTC_SECONDS); + + if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) { + spin_unlock_irqrestore(&rtc_lock, flags); + mdelay(1); + goto again; + } + + /* Revalidate the above readout */ + if (time->tm_sec != CMOS_READ(RTC_SECONDS)) { + spin_unlock_irqrestore(&rtc_lock, flags); + goto again; + } /* * Only the values that we read from the RTC are set. We leave @@ -50,8 +50,6 @@ unsigned int mc146818_get_time(struct rtc_time *time) * RTC has RTC_DAY_OF_WEEK, we ignore it, as it is only updated * by the RTC when initially set to a non-zero value. */ - spin_lock_irqsave(&rtc_lock, flags); - time->tm_sec = CMOS_READ(RTC_SECONDS); time->tm_min = CMOS_READ(RTC_MINUTES); time->tm_hour = CMOS_READ(RTC_HOURS); time->tm_mday = CMOS_READ(RTC_DAY_OF_MONTH); @@ -66,8 +64,24 @@ unsigned int mc146818_get_time(struct rtc_time *time) century = CMOS_READ(acpi_gbl_FADT.century); #endif ctrl = CMOS_READ(RTC_CONTROL); + /* + * Check for the UIP bit again. If it is set now then + * the above values may contain garbage. + */ + retry = CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP; + /* + * A NMI might have interrupted the above sequence so check whether + * the seconds value has changed which indicates that the NMI took + * longer than the UIP bit was set. Unlikely, but possible and + * there is also virt... + */ + retry |= time->tm_sec != CMOS_READ(RTC_SECONDS); + spin_unlock_irqrestore(&rtc_lock, flags); + if (retry) + goto again; + if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { time->tm_sec = bcd2bin(time->tm_sec); From 3736972360fad22805e7e9d322387fd1ec0f48fc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 26 Jan 2021 18:02:11 +0100 Subject: [PATCH 1216/1477] rtc: mc146818: Detect and handle broken RTCs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 211e5db19d15a721b2953ea54b8f26c2963720eb ] The recent fix for handling the UIP bit unearthed another issue in the RTC code. If the RTC is advertised but the readout is straight 0xFF because it's not available, the old code just proceeded with crappy values, but the new code hangs because it waits for the UIP bit to become low. Add a sanity check in the RTC CMOS probe function which reads the RTC_VALID register (Register D) which should have bit 0-6 cleared. If that's not the case then fail to register the CMOS. Add the same check to mc146818_get_time(), warn once when the condition is true and invalidate the rtc_time data. Reported-by: Mickaël Salaün Signed-off-by: Thomas Gleixner Tested-by: Mickaël Salaün Acked-by: Alexandre Belloni Link: https://lore.kernel.org/r/87tur3fx7w.fsf@nanos.tec.linutronix.de Stable-dep-of: cd17420ebea5 ("rtc: cmos: avoid UIP when writing alarm time") Signed-off-by: Sasha Levin --- drivers/rtc/rtc-cmos.c | 8 ++++++++ drivers/rtc/rtc-mc146818-lib.c | 7 +++++++ 2 files changed, 15 insertions(+) diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index 58c6382a2807..cce4b62ffdd0 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -808,6 +808,14 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq) spin_lock_irq(&rtc_lock); + /* Ensure that the RTC is accessible. Bit 0-6 must be 0! */ + if ((CMOS_READ(RTC_VALID) & 0x7f) != 0) { + spin_unlock_irq(&rtc_lock); + dev_warn(dev, "not accessible\n"); + retval = -ENXIO; + goto cleanup1; + } + if (!(flags & CMOS_RTC_FLAGS_NOFREQ)) { /* force periodic irq to CMOS reset default of 1024Hz; * diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c index 8364e4141670..7f01dc41271d 100644 --- a/drivers/rtc/rtc-mc146818-lib.c +++ b/drivers/rtc/rtc-mc146818-lib.c @@ -21,6 +21,13 @@ unsigned int mc146818_get_time(struct rtc_time *time) again: spin_lock_irqsave(&rtc_lock, flags); + /* Ensure that the RTC is accessible. Bit 0-6 must be 0! */ + if (WARN_ON_ONCE((CMOS_READ(RTC_VALID) & 0x7f) != 0)) { + spin_unlock_irqrestore(&rtc_lock, flags); + memset(time, 0xff, sizeof(*time)); + return 0; + } + /* * Check whether there is an update in progress during which the * readout is unspecified. The maximum update time is ~2ms. Poll From f5b51f855036e39456ac36aadc7078ac583c5932 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 1 Feb 2021 20:24:17 +0100 Subject: [PATCH 1217/1477] rtc: mc146818: Dont test for bit 0-5 in Register D [ Upstream commit ebb22a05943666155e6da04407cc6e913974c78c ] The recent change to validate the RTC turned out to be overly tight. While it cures the problem on the reporters machine it breaks machines with Intel chipsets which use bit 0-5 of the D register. So check only for bit 6 being 0 which is the case on these Intel machines as well. Fixes: 211e5db19d15 ("rtc: mc146818: Detect and handle broken RTCs") Reported-by: Serge Belyshev Reported-by: Dirk Gouders Reported-by: Borislav Petkov Signed-off-by: Thomas Gleixner Tested-by: Dirk Gouders Tested-by: Len Brown Tested-by: Borislav Petkov Acked-by: Alexandre Belloni Link: https://lore.kernel.org/r/87zh0nbnha.fsf@nanos.tec.linutronix.de Stable-dep-of: cd17420ebea5 ("rtc: cmos: avoid UIP when writing alarm time") Signed-off-by: Sasha Levin --- drivers/rtc/rtc-cmos.c | 4 ++-- drivers/rtc/rtc-mc146818-lib.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index cce4b62ffdd0..8e8ce40f6440 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -808,8 +808,8 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq) spin_lock_irq(&rtc_lock); - /* Ensure that the RTC is accessible. Bit 0-6 must be 0! */ - if ((CMOS_READ(RTC_VALID) & 0x7f) != 0) { + /* Ensure that the RTC is accessible. Bit 6 must be 0! */ + if ((CMOS_READ(RTC_VALID) & 0x40) != 0) { spin_unlock_irq(&rtc_lock); dev_warn(dev, "not accessible\n"); retval = -ENXIO; diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c index 7f01dc41271d..6ed2cd5d2bba 100644 --- a/drivers/rtc/rtc-mc146818-lib.c +++ b/drivers/rtc/rtc-mc146818-lib.c @@ -21,8 +21,8 @@ unsigned int mc146818_get_time(struct rtc_time *time) again: spin_lock_irqsave(&rtc_lock, flags); - /* Ensure that the RTC is accessible. Bit 0-6 must be 0! */ - if (WARN_ON_ONCE((CMOS_READ(RTC_VALID) & 0x7f) != 0)) { + /* Ensure that the RTC is accessible. Bit 6 must be 0! */ + if (WARN_ON_ONCE((CMOS_READ(RTC_VALID) & 0x40) != 0)) { spin_unlock_irqrestore(&rtc_lock, flags); memset(time, 0xff, sizeof(*time)); return 0; From 94eaf9966e04b62e879983eb1f883650dedba69d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20Jo=C5=84czyk?= Date: Fri, 16 Jul 2021 23:04:37 +0200 Subject: [PATCH 1218/1477] rtc: cmos: remove stale REVISIT comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e1aba37569f0aa9c993f740828871e48eea79f98 ] It appears mc146818_get_time() and mc146818_set_time() now correctly use the century register as specified in the ACPI FADT table. It is not clear what else could be done here. These comments were introduced by commit 7be2c7c96aff ("[PATCH] RTC framework driver for CMOS RTCs") in 2007, which originally referenced function get_rtc_time() in include/asm-generic/rtc.h . Signed-off-by: Mateusz Jończyk Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20210716210437.29622-1-mat.jonczyk@o2.pl Stable-dep-of: cd17420ebea5 ("rtc: cmos: avoid UIP when writing alarm time") Signed-off-by: Sasha Levin --- drivers/rtc/rtc-cmos.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index 8e8ce40f6440..ed4f512eabf0 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -229,19 +229,13 @@ static int cmos_read_time(struct device *dev, struct rtc_time *t) if (!pm_trace_rtc_valid()) return -EIO; - /* REVISIT: if the clock has a "century" register, use - * that instead of the heuristic in mc146818_get_time(). - * That'll make Y3K compatility (year > 2070) easy! - */ mc146818_get_time(t); return 0; } static int cmos_set_time(struct device *dev, struct rtc_time *t) { - /* REVISIT: set the "century" register if available - * - * NOTE: this ignores the issue whereby updating the seconds + /* NOTE: this ignores the issue whereby updating the seconds * takes effect exactly 500ms after we write the register. * (Also queueing and other delays before we get this far.) */ From b9a5c470e075583c270628bf49e846e08ce910d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20Jo=C5=84czyk?= Date: Fri, 10 Dec 2021 21:01:24 +0100 Subject: [PATCH 1219/1477] rtc: mc146818-lib: change return values of mc146818_get_time() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d35786b3a28dee20b12962ae2dd365892a99ed1a ] No function is checking mc146818_get_time() return values yet, so correct them to make them more customary. Signed-off-by: Mateusz Jończyk Cc: Alessandro Zummo Cc: Alexandre Belloni Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211210200131.153887-3-mat.jonczyk@o2.pl Stable-dep-of: cd17420ebea5 ("rtc: cmos: avoid UIP when writing alarm time") Signed-off-by: Sasha Levin --- drivers/rtc/rtc-mc146818-lib.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c index 6ed2cd5d2bba..6262f0680f13 100644 --- a/drivers/rtc/rtc-mc146818-lib.c +++ b/drivers/rtc/rtc-mc146818-lib.c @@ -25,7 +25,7 @@ again: if (WARN_ON_ONCE((CMOS_READ(RTC_VALID) & 0x40) != 0)) { spin_unlock_irqrestore(&rtc_lock, flags); memset(time, 0xff, sizeof(*time)); - return 0; + return -EIO; } /* @@ -116,7 +116,7 @@ again: time->tm_mon--; - return RTC_24H; + return 0; } EXPORT_SYMBOL_GPL(mc146818_get_time); From 775d4661f1455a623f5008a2b53b350df07da3a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20Jo=C5=84czyk?= Date: Fri, 10 Dec 2021 21:01:25 +0100 Subject: [PATCH 1220/1477] rtc: Check return value from mc146818_get_time() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 0dd8d6cb9eddfe637bcd821bbfd40ebd5a0737b9 ] There are 4 users of mc146818_get_time() and none of them was checking the return value from this function. Change this. Print the appropriate warnings in callers of mc146818_get_time() instead of in the function mc146818_get_time() itself, in order not to add strings to rtc-mc146818-lib.c, which is kind of a library. The callers of alpha_rtc_read_time() and cmos_read_time() may use the contents of (struct rtc_time *) even when the functions return a failure code. Therefore, set the contents of (struct rtc_time *) to 0x00, which looks more sensible then 0xff and aligns with the (possibly stale?) comment in cmos_read_time: /* * If pm_trace abused the RTC for storage, set the timespec to 0, * which tells the caller that this RTC value is unusable. */ For consistency, do this in mc146818_get_time(). Note: hpet_rtc_interrupt() may call mc146818_get_time() many times a second. It is very unlikely, though, that the RTC suddenly stops working and mc146818_get_time() would consistently fail. Only compile-tested on alpha. Signed-off-by: Mateusz Jończyk Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: Alessandro Zummo Cc: Alexandre Belloni Cc: linux-alpha@vger.kernel.org Cc: x86@kernel.org Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211210200131.153887-4-mat.jonczyk@o2.pl Stable-dep-of: cd17420ebea5 ("rtc: cmos: avoid UIP when writing alarm time") Signed-off-by: Sasha Levin --- arch/alpha/kernel/rtc.c | 7 ++++++- arch/x86/kernel/hpet.c | 8 ++++++-- drivers/base/power/trace.c | 6 +++++- drivers/rtc/rtc-cmos.c | 9 ++++++++- drivers/rtc/rtc-mc146818-lib.c | 2 +- 5 files changed, 26 insertions(+), 6 deletions(-) diff --git a/arch/alpha/kernel/rtc.c b/arch/alpha/kernel/rtc.c index 1b1d5963ac55..48ffbfbd0624 100644 --- a/arch/alpha/kernel/rtc.c +++ b/arch/alpha/kernel/rtc.c @@ -80,7 +80,12 @@ init_rtc_epoch(void) static int alpha_rtc_read_time(struct device *dev, struct rtc_time *tm) { - mc146818_get_time(tm); + int ret = mc146818_get_time(tm); + + if (ret < 0) { + dev_err_ratelimited(dev, "unable to read current time\n"); + return ret; + } /* Adjust for non-default epochs. It's easier to depend on the generic __get_rtc_time and adjust the epoch here than create diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 4ab7a9757e52..574df24a8e5a 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -1325,8 +1325,12 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id) hpet_rtc_timer_reinit(); memset(&curr_time, 0, sizeof(struct rtc_time)); - if (hpet_rtc_flags & (RTC_UIE | RTC_AIE)) - mc146818_get_time(&curr_time); + if (hpet_rtc_flags & (RTC_UIE | RTC_AIE)) { + if (unlikely(mc146818_get_time(&curr_time) < 0)) { + pr_err_ratelimited("unable to read current time from RTC\n"); + return IRQ_HANDLED; + } + } if (hpet_rtc_flags & RTC_UIE && curr_time.tm_sec != hpet_prev_update_sec) { diff --git a/drivers/base/power/trace.c b/drivers/base/power/trace.c index 94665037f4a3..72b7a92337b1 100644 --- a/drivers/base/power/trace.c +++ b/drivers/base/power/trace.c @@ -120,7 +120,11 @@ static unsigned int read_magic_time(void) struct rtc_time time; unsigned int val; - mc146818_get_time(&time); + if (mc146818_get_time(&time) < 0) { + pr_err("Unable to read current time from RTC\n"); + return 0; + } + pr_info("RTC time: %ptRt, date: %ptRd\n", &time, &time); val = time.tm_year; /* 100 years */ if (val > 100) diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index ed4f512eabf0..f8358bb2ae31 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -222,6 +222,8 @@ static inline void cmos_write_bank2(unsigned char val, unsigned char addr) static int cmos_read_time(struct device *dev, struct rtc_time *t) { + int ret; + /* * If pm_trace abused the RTC for storage, set the timespec to 0, * which tells the caller that this RTC value is unusable. @@ -229,7 +231,12 @@ static int cmos_read_time(struct device *dev, struct rtc_time *t) if (!pm_trace_rtc_valid()) return -EIO; - mc146818_get_time(t); + ret = mc146818_get_time(t); + if (ret < 0) { + dev_err_ratelimited(dev, "unable to read current time\n"); + return ret; + } + return 0; } diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c index 6262f0680f13..3ae5c690f22b 100644 --- a/drivers/rtc/rtc-mc146818-lib.c +++ b/drivers/rtc/rtc-mc146818-lib.c @@ -24,7 +24,7 @@ again: /* Ensure that the RTC is accessible. Bit 6 must be 0! */ if (WARN_ON_ONCE((CMOS_READ(RTC_VALID) & 0x40) != 0)) { spin_unlock_irqrestore(&rtc_lock, flags); - memset(time, 0xff, sizeof(*time)); + memset(time, 0, sizeof(*time)); return -EIO; } From 8bb5fe58305ff55e02af368327530aece5ebcb71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20Jo=C5=84czyk?= Date: Fri, 10 Dec 2021 21:01:26 +0100 Subject: [PATCH 1221/1477] rtc: mc146818-lib: fix RTC presence check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit ea6fa4961aab8f90a8aa03575a98b4bda368d4b6 ] To prevent an infinite loop in mc146818_get_time(), commit 211e5db19d15 ("rtc: mc146818: Detect and handle broken RTCs") added a check for RTC availability. Together with a later fix, it checked if bit 6 in register 0x0d is cleared. This, however, caused a false negative on a motherboard with an AMD SB710 southbridge; according to the specification [1], bit 6 of register 0x0d of this chipset is a scratchbit. This caused a regression in Linux 5.11 - the RTC was determined broken by the kernel and not used by rtc-cmos.c [3]. This problem was also reported in Fedora [4]. As a better alternative, check whether the UIP ("Update-in-progress") bit is set for longer then 10ms. If that is the case, then apparently the RTC is either absent (and all register reads return 0xff) or broken. Also limit the number of loop iterations in mc146818_get_time() to 10 to prevent an infinite loop there. The functions mc146818_get_time() and mc146818_does_rtc_work() will be refactored later in this patch series, in order to fix a separate problem with reading / setting the RTC alarm time. This is done so to avoid a confusion about what is being fixed when. In a previous approach to this problem, I implemented a check whether the RTC_HOURS register contains a value <= 24. This, however, sometimes did not work correctly on my Intel Kaby Lake laptop. According to Intel's documentation [2], "the time and date RAM locations (0-9) are disconnected from the external bus" during the update cycle so reading this register without checking the UIP bit is incorrect. [1] AMD SB700/710/750 Register Reference Guide, page 308, https://developer.amd.com/wordpress/media/2012/10/43009_sb7xx_rrg_pub_1.00.pdf [2] 7th Generation Intel ® Processor Family I/O for U/Y Platforms [...] Datasheet Volume 1 of 2, page 209 Intel's Document Number: 334658-006, https://www.intel.com/content/dam/www/public/us/en/documents/datasheets/7th-and-8th-gen-core-family-mobile-u-y-processor-lines-i-o-datasheet-vol-1.pdf [3] Functions in arch/x86/kernel/rtc.c apparently were using it. [4] https://bugzilla.redhat.com/show_bug.cgi?id=1936688 Fixes: 211e5db19d15 ("rtc: mc146818: Detect and handle broken RTCs") Fixes: ebb22a059436 ("rtc: mc146818: Dont test for bit 0-5 in Register D") Signed-off-by: Mateusz Jończyk Cc: Thomas Gleixner Cc: Alessandro Zummo Cc: Alexandre Belloni Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211210200131.153887-5-mat.jonczyk@o2.pl Stable-dep-of: cd17420ebea5 ("rtc: cmos: avoid UIP when writing alarm time") Signed-off-by: Sasha Levin --- drivers/rtc/rtc-cmos.c | 10 ++++------ drivers/rtc/rtc-mc146818-lib.c | 34 ++++++++++++++++++++++++++++++---- include/linux/mc146818rtc.h | 1 + 3 files changed, 35 insertions(+), 10 deletions(-) diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index f8358bb2ae31..93ffb9eaf63a 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -807,16 +807,14 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq) rename_region(ports, dev_name(&cmos_rtc.rtc->dev)); - spin_lock_irq(&rtc_lock); - - /* Ensure that the RTC is accessible. Bit 6 must be 0! */ - if ((CMOS_READ(RTC_VALID) & 0x40) != 0) { - spin_unlock_irq(&rtc_lock); - dev_warn(dev, "not accessible\n"); + if (!mc146818_does_rtc_work()) { + dev_warn(dev, "broken or not accessible\n"); retval = -ENXIO; goto cleanup1; } + spin_lock_irq(&rtc_lock); + if (!(flags & CMOS_RTC_FLAGS_NOFREQ)) { /* force periodic irq to CMOS reset default of 1024Hz; * diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c index 3ae5c690f22b..94df6056c5c0 100644 --- a/drivers/rtc/rtc-mc146818-lib.c +++ b/drivers/rtc/rtc-mc146818-lib.c @@ -8,10 +8,36 @@ #include #endif +/* + * If the UIP (Update-in-progress) bit of the RTC is set for more then + * 10ms, the RTC is apparently broken or not present. + */ +bool mc146818_does_rtc_work(void) +{ + int i; + unsigned char val; + unsigned long flags; + + for (i = 0; i < 10; i++) { + spin_lock_irqsave(&rtc_lock, flags); + val = CMOS_READ(RTC_FREQ_SELECT); + spin_unlock_irqrestore(&rtc_lock, flags); + + if ((val & RTC_UIP) == 0) + return true; + + mdelay(1); + } + + return false; +} +EXPORT_SYMBOL_GPL(mc146818_does_rtc_work); + unsigned int mc146818_get_time(struct rtc_time *time) { unsigned char ctrl; unsigned long flags; + unsigned int iter_count = 0; unsigned char century = 0; bool retry; @@ -20,13 +46,13 @@ unsigned int mc146818_get_time(struct rtc_time *time) #endif again: - spin_lock_irqsave(&rtc_lock, flags); - /* Ensure that the RTC is accessible. Bit 6 must be 0! */ - if (WARN_ON_ONCE((CMOS_READ(RTC_VALID) & 0x40) != 0)) { - spin_unlock_irqrestore(&rtc_lock, flags); + if (iter_count > 10) { memset(time, 0, sizeof(*time)); return -EIO; } + iter_count++; + + spin_lock_irqsave(&rtc_lock, flags); /* * Check whether there is an update in progress during which the diff --git a/include/linux/mc146818rtc.h b/include/linux/mc146818rtc.h index 1e0205811394..c246ce191915 100644 --- a/include/linux/mc146818rtc.h +++ b/include/linux/mc146818rtc.h @@ -125,6 +125,7 @@ struct cmos_rtc_board_info { #define RTC_IO_EXTENT_USED RTC_IO_EXTENT #endif /* ARCH_RTC_LOCATION */ +bool mc146818_does_rtc_work(void); unsigned int mc146818_get_time(struct rtc_time *time); int mc146818_set_time(struct rtc_time *time); From 33ac73a41af6ee7ab88762e09497e3d56dde6d52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20Jo=C5=84czyk?= Date: Fri, 10 Dec 2021 21:01:27 +0100 Subject: [PATCH 1222/1477] rtc: mc146818-lib: extract mc146818_avoid_UIP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit ec5895c0f2d87b9bf4185db1915e40fa6fcfc0ac ] Function mc146818_get_time() contains an elaborate mechanism of reading the RTC time while no RTC update is in progress. It turns out that reading the RTC alarm clock also requires avoiding the RTC update. Therefore, the mechanism in mc146818_get_time() should be reused - so extract it into a separate function. The logic in mc146818_avoid_UIP() is same as in mc146818_get_time() except that after every if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) { there is now "mdelay(1)". To avoid producing a very unreadable patch, mc146818_get_time() will be refactored to use mc146818_avoid_UIP() in the next patch. Signed-off-by: Mateusz Jończyk Cc: Alessandro Zummo Cc: Alexandre Belloni Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211210200131.153887-6-mat.jonczyk@o2.pl Stable-dep-of: cd17420ebea5 ("rtc: cmos: avoid UIP when writing alarm time") Signed-off-by: Sasha Levin --- drivers/rtc/rtc-mc146818-lib.c | 70 ++++++++++++++++++++++++++++++++++ include/linux/mc146818rtc.h | 3 ++ 2 files changed, 73 insertions(+) diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c index 94df6056c5c0..46527a5d3912 100644 --- a/drivers/rtc/rtc-mc146818-lib.c +++ b/drivers/rtc/rtc-mc146818-lib.c @@ -8,6 +8,76 @@ #include #endif +/* + * Execute a function while the UIP (Update-in-progress) bit of the RTC is + * unset. + * + * Warning: callback may be executed more then once. + */ +bool mc146818_avoid_UIP(void (*callback)(unsigned char seconds, void *param), + void *param) +{ + int i; + unsigned long flags; + unsigned char seconds; + + for (i = 0; i < 10; i++) { + spin_lock_irqsave(&rtc_lock, flags); + + /* + * Check whether there is an update in progress during which the + * readout is unspecified. The maximum update time is ~2ms. Poll + * every msec for completion. + * + * Store the second value before checking UIP so a long lasting + * NMI which happens to hit after the UIP check cannot make + * an update cycle invisible. + */ + seconds = CMOS_READ(RTC_SECONDS); + + if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) { + spin_unlock_irqrestore(&rtc_lock, flags); + mdelay(1); + continue; + } + + /* Revalidate the above readout */ + if (seconds != CMOS_READ(RTC_SECONDS)) { + spin_unlock_irqrestore(&rtc_lock, flags); + continue; + } + + if (callback) + callback(seconds, param); + + /* + * Check for the UIP bit again. If it is set now then + * the above values may contain garbage. + */ + if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) { + spin_unlock_irqrestore(&rtc_lock, flags); + mdelay(1); + continue; + } + + /* + * A NMI might have interrupted the above sequence so check + * whether the seconds value has changed which indicates that + * the NMI took longer than the UIP bit was set. Unlikely, but + * possible and there is also virt... + */ + if (seconds != CMOS_READ(RTC_SECONDS)) { + spin_unlock_irqrestore(&rtc_lock, flags); + continue; + } + spin_unlock_irqrestore(&rtc_lock, flags); + + return true; + } + return false; +} +EXPORT_SYMBOL_GPL(mc146818_avoid_UIP); + /* * If the UIP (Update-in-progress) bit of the RTC is set for more then * 10ms, the RTC is apparently broken or not present. diff --git a/include/linux/mc146818rtc.h b/include/linux/mc146818rtc.h index c246ce191915..fb042e0e7d76 100644 --- a/include/linux/mc146818rtc.h +++ b/include/linux/mc146818rtc.h @@ -129,4 +129,7 @@ bool mc146818_does_rtc_work(void); unsigned int mc146818_get_time(struct rtc_time *time); int mc146818_set_time(struct rtc_time *time); +bool mc146818_avoid_UIP(void (*callback)(unsigned char seconds, void *param), + void *param); + #endif /* _MC146818RTC_H */ From 949bae02827e296e59d0805386151af8b75ef9b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20Jo=C5=84czyk?= Date: Fri, 10 Dec 2021 21:01:31 +0100 Subject: [PATCH 1223/1477] rtc: cmos: avoid UIP when writing alarm time MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit cd17420ebea580c22dd3a93f7237de3d2cfafc37 ] Some Intel chipsets disconnect the time and date RTC registers when the clock update is in progress: during this time reads may return bogus values and writes fail silently. This includes the RTC alarm registers. [1] cmos_set_alarm() did not take account for that, fix it. [1] 7th Generation Intel ® Processor Family I/O for U/Y Platforms [...] Datasheet, Volume 1 of 2 (Intel's Document Number: 334658-006) Page 208 https://www.intel.com/content/dam/www/public/us/en/documents/datasheets/7th-and-8th-gen-core-family-mobile-u-y-processor-lines-i-o-datasheet-vol-1.pdf "If a RAM read from the ten time and date bytes is attempted during an update cycle, the value read do not necessarily represent the true contents of those locations. Any RAM writes under the same conditions are ignored." Signed-off-by: Mateusz Jończyk Cc: Alessandro Zummo Cc: Alexandre Belloni Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211210200131.153887-10-mat.jonczyk@o2.pl Signed-off-by: Sasha Levin --- drivers/rtc/rtc-cmos.c | 107 +++++++++++++++++++++++++---------------- 1 file changed, 66 insertions(+), 41 deletions(-) diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index 93ffb9eaf63a..601e3967e1f0 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -444,10 +444,57 @@ static int cmos_validate_alarm(struct device *dev, struct rtc_wkalrm *t) return 0; } +struct cmos_set_alarm_callback_param { + struct cmos_rtc *cmos; + unsigned char mon, mday, hrs, min, sec; + struct rtc_wkalrm *t; +}; + +/* Note: this function may be executed by mc146818_avoid_UIP() more then + * once + */ +static void cmos_set_alarm_callback(unsigned char __always_unused seconds, + void *param_in) +{ + struct cmos_set_alarm_callback_param *p = + (struct cmos_set_alarm_callback_param *)param_in; + + /* next rtc irq must not be from previous alarm setting */ + cmos_irq_disable(p->cmos, RTC_AIE); + + /* update alarm */ + CMOS_WRITE(p->hrs, RTC_HOURS_ALARM); + CMOS_WRITE(p->min, RTC_MINUTES_ALARM); + CMOS_WRITE(p->sec, RTC_SECONDS_ALARM); + + /* the system may support an "enhanced" alarm */ + if (p->cmos->day_alrm) { + CMOS_WRITE(p->mday, p->cmos->day_alrm); + if (p->cmos->mon_alrm) + CMOS_WRITE(p->mon, p->cmos->mon_alrm); + } + + if (use_hpet_alarm()) { + /* + * FIXME the HPET alarm glue currently ignores day_alrm + * and mon_alrm ... + */ + hpet_set_alarm_time(p->t->time.tm_hour, p->t->time.tm_min, + p->t->time.tm_sec); + } + + if (p->t->enabled) + cmos_irq_enable(p->cmos, RTC_AIE); +} + static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t) { struct cmos_rtc *cmos = dev_get_drvdata(dev); - unsigned char mon, mday, hrs, min, sec, rtc_control; + struct cmos_set_alarm_callback_param p = { + .cmos = cmos, + .t = t + }; + unsigned char rtc_control; int ret; /* This not only a rtc_op, but also called directly */ @@ -458,11 +505,11 @@ static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t) if (ret < 0) return ret; - mon = t->time.tm_mon + 1; - mday = t->time.tm_mday; - hrs = t->time.tm_hour; - min = t->time.tm_min; - sec = t->time.tm_sec; + p.mon = t->time.tm_mon + 1; + p.mday = t->time.tm_mday; + p.hrs = t->time.tm_hour; + p.min = t->time.tm_min; + p.sec = t->time.tm_sec; spin_lock_irq(&rtc_lock); rtc_control = CMOS_READ(RTC_CONTROL); @@ -470,43 +517,21 @@ static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t) if (!(rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { /* Writing 0xff means "don't care" or "match all". */ - mon = (mon <= 12) ? bin2bcd(mon) : 0xff; - mday = (mday >= 1 && mday <= 31) ? bin2bcd(mday) : 0xff; - hrs = (hrs < 24) ? bin2bcd(hrs) : 0xff; - min = (min < 60) ? bin2bcd(min) : 0xff; - sec = (sec < 60) ? bin2bcd(sec) : 0xff; + p.mon = (p.mon <= 12) ? bin2bcd(p.mon) : 0xff; + p.mday = (p.mday >= 1 && p.mday <= 31) ? bin2bcd(p.mday) : 0xff; + p.hrs = (p.hrs < 24) ? bin2bcd(p.hrs) : 0xff; + p.min = (p.min < 60) ? bin2bcd(p.min) : 0xff; + p.sec = (p.sec < 60) ? bin2bcd(p.sec) : 0xff; } - spin_lock_irq(&rtc_lock); - - /* next rtc irq must not be from previous alarm setting */ - cmos_irq_disable(cmos, RTC_AIE); - - /* update alarm */ - CMOS_WRITE(hrs, RTC_HOURS_ALARM); - CMOS_WRITE(min, RTC_MINUTES_ALARM); - CMOS_WRITE(sec, RTC_SECONDS_ALARM); - - /* the system may support an "enhanced" alarm */ - if (cmos->day_alrm) { - CMOS_WRITE(mday, cmos->day_alrm); - if (cmos->mon_alrm) - CMOS_WRITE(mon, cmos->mon_alrm); - } - - if (use_hpet_alarm()) { - /* - * FIXME the HPET alarm glue currently ignores day_alrm - * and mon_alrm ... - */ - hpet_set_alarm_time(t->time.tm_hour, t->time.tm_min, - t->time.tm_sec); - } - - if (t->enabled) - cmos_irq_enable(cmos, RTC_AIE); - - spin_unlock_irq(&rtc_lock); + /* + * Some Intel chipsets disconnect the alarm registers when the clock + * update is in progress - during this time writes fail silently. + * + * Use mc146818_avoid_UIP() to avoid this. + */ + if (!mc146818_avoid_UIP(cmos_set_alarm_callback, &p)) + return -EIO; cmos->alarm_expires = rtc_tm_to_time64(&t->time); From acfd8ef683fbf5261be8ed5075e00b2a45e152b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20Jo=C5=84czyk?= Date: Fri, 10 Dec 2021 21:01:30 +0100 Subject: [PATCH 1224/1477] rtc: cmos: avoid UIP when reading alarm time MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit cdedc45c579faf8cc6608d3ef81576ee0d512aa4 ] Some Intel chipsets disconnect the time and date RTC registers when the clock update is in progress: during this time reads may return bogus values and writes fail silently. This includes the RTC alarm registers. [1] cmos_read_alarm() did not take account for that, which caused alarm time reads to sometimes return bogus values. This can be shown with a test patch that I am attaching to this patch series. Fix this, by using mc146818_avoid_UIP(). [1] 7th Generation Intel ® Processor Family I/O for U/Y Platforms [...] Datasheet, Volume 1 of 2 (Intel's Document Number: 334658-006) Page 208 https://www.intel.com/content/dam/www/public/us/en/documents/datasheets/7th-and-8th-gen-core-family-mobile-u-y-processor-lines-i-o-datasheet-vol-1.pdf "If a RAM read from the ten time and date bytes is attempted during an update cycle, the value read do not necessarily represent the true contents of those locations. Any RAM writes under the same conditions are ignored." Signed-off-by: Mateusz Jończyk Cc: Alessandro Zummo Cc: Alexandre Belloni Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211210200131.153887-9-mat.jonczyk@o2.pl Signed-off-by: Sasha Levin --- drivers/rtc/rtc-cmos.c | 72 ++++++++++++++++++++++++++++-------------- 1 file changed, 49 insertions(+), 23 deletions(-) diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index 601e3967e1f0..d419eb988b22 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -249,10 +249,46 @@ static int cmos_set_time(struct device *dev, struct rtc_time *t) return mc146818_set_time(t); } +struct cmos_read_alarm_callback_param { + struct cmos_rtc *cmos; + struct rtc_time *time; + unsigned char rtc_control; +}; + +static void cmos_read_alarm_callback(unsigned char __always_unused seconds, + void *param_in) +{ + struct cmos_read_alarm_callback_param *p = + (struct cmos_read_alarm_callback_param *)param_in; + struct rtc_time *time = p->time; + + time->tm_sec = CMOS_READ(RTC_SECONDS_ALARM); + time->tm_min = CMOS_READ(RTC_MINUTES_ALARM); + time->tm_hour = CMOS_READ(RTC_HOURS_ALARM); + + if (p->cmos->day_alrm) { + /* ignore upper bits on readback per ACPI spec */ + time->tm_mday = CMOS_READ(p->cmos->day_alrm) & 0x3f; + if (!time->tm_mday) + time->tm_mday = -1; + + if (p->cmos->mon_alrm) { + time->tm_mon = CMOS_READ(p->cmos->mon_alrm); + if (!time->tm_mon) + time->tm_mon = -1; + } + } + + p->rtc_control = CMOS_READ(RTC_CONTROL); +} + static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t) { struct cmos_rtc *cmos = dev_get_drvdata(dev); - unsigned char rtc_control; + struct cmos_read_alarm_callback_param p = { + .cmos = cmos, + .time = &t->time, + }; /* This not only a rtc_op, but also called directly */ if (!is_valid_irq(cmos->irq)) @@ -263,28 +299,18 @@ static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t) * the future. */ - spin_lock_irq(&rtc_lock); - t->time.tm_sec = CMOS_READ(RTC_SECONDS_ALARM); - t->time.tm_min = CMOS_READ(RTC_MINUTES_ALARM); - t->time.tm_hour = CMOS_READ(RTC_HOURS_ALARM); + /* Some Intel chipsets disconnect the alarm registers when the clock + * update is in progress - during this time reads return bogus values + * and writes may fail silently. See for example "7th Generation Intel® + * Processor Family I/O for U/Y Platforms [...] Datasheet", section + * 27.7.1 + * + * Use the mc146818_avoid_UIP() function to avoid this. + */ + if (!mc146818_avoid_UIP(cmos_read_alarm_callback, &p)) + return -EIO; - if (cmos->day_alrm) { - /* ignore upper bits on readback per ACPI spec */ - t->time.tm_mday = CMOS_READ(cmos->day_alrm) & 0x3f; - if (!t->time.tm_mday) - t->time.tm_mday = -1; - - if (cmos->mon_alrm) { - t->time.tm_mon = CMOS_READ(cmos->mon_alrm); - if (!t->time.tm_mon) - t->time.tm_mon = -1; - } - } - - rtc_control = CMOS_READ(RTC_CONTROL); - spin_unlock_irq(&rtc_lock); - - if (!(rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { + if (!(p.rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { if (((unsigned)t->time.tm_sec) < 0x60) t->time.tm_sec = bcd2bin(t->time.tm_sec); else @@ -313,7 +339,7 @@ static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t) } } - t->enabled = !!(rtc_control & RTC_AIE); + t->enabled = !!(p.rtc_control & RTC_AIE); t->pending = 0; return 0; From 17293d630f5f88355b43712888208945581cdc87 Mon Sep 17 00:00:00 2001 From: Xiaofei Tan Date: Wed, 3 Feb 2021 20:39:36 +0800 Subject: [PATCH 1225/1477] rtc: cmos: Replace spin_lock_irqsave with spin_lock in hard IRQ [ Upstream commit 6950d046eb6eabbc271fda416460c05f7a85698a ] It is redundant to do irqsave and irqrestore in hardIRQ context, where it has been in a irq-disabled context. Signed-off-by: Xiaofei Tan Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/1612355981-6764-2-git-send-email-tanxiaofei@huawei.com Signed-off-by: Sasha Levin --- drivers/rtc/rtc-cmos.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index d419eb988b22..21f2bdd025b6 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -704,11 +704,10 @@ static struct cmos_rtc cmos_rtc; static irqreturn_t cmos_interrupt(int irq, void *p) { - unsigned long flags; u8 irqstat; u8 rtc_control; - spin_lock_irqsave(&rtc_lock, flags); + spin_lock(&rtc_lock); /* When the HPET interrupt handler calls us, the interrupt * status is passed as arg1 instead of the irq number. But @@ -742,7 +741,7 @@ static irqreturn_t cmos_interrupt(int irq, void *p) hpet_mask_rtc_irq_bit(RTC_AIE); CMOS_READ(RTC_INTR_FLAGS); } - spin_unlock_irqrestore(&rtc_lock, flags); + spin_unlock(&rtc_lock); if (is_intr(irqstat)) { rtc_update_irq(p, 1, irqstat); From db44a9443e586077594808a09820dfe6f408b8b6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 6 Dec 2020 22:46:15 +0100 Subject: [PATCH 1226/1477] rtc: mc146818: Reduce spinlock section in mc146818_set_time() [ Upstream commit dcf257e92622ba0e25fdc4b6699683e7ae67e2a1 ] No need to hold the lock and disable interrupts for doing math. Signed-off-by: Thomas Gleixner Acked-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201206220541.709243630@linutronix.de Signed-off-by: Sasha Levin --- drivers/rtc/rtc-mc146818-lib.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c index 46527a5d3912..1ca866461d10 100644 --- a/drivers/rtc/rtc-mc146818-lib.c +++ b/drivers/rtc/rtc-mc146818-lib.c @@ -249,7 +249,6 @@ int mc146818_set_time(struct rtc_time *time) if (yrs > 255) /* They are unsigned */ return -EINVAL; - spin_lock_irqsave(&rtc_lock, flags); #ifdef CONFIG_MACH_DECSTATION real_yrs = yrs; leap_yr = ((!((yrs + 1900) % 4) && ((yrs + 1900) % 100)) || @@ -278,10 +277,8 @@ int mc146818_set_time(struct rtc_time *time) /* These limits and adjustments are independent of * whether the chip is in binary mode or not. */ - if (yrs > 169) { - spin_unlock_irqrestore(&rtc_lock, flags); + if (yrs > 169) return -EINVAL; - } if (yrs >= 100) yrs -= 100; @@ -297,6 +294,7 @@ int mc146818_set_time(struct rtc_time *time) century = bin2bcd(century); } + spin_lock_irqsave(&rtc_lock, flags); save_control = CMOS_READ(RTC_CONTROL); CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL); save_freq_select = CMOS_READ(RTC_FREQ_SELECT); From 49e07c0768dbebff672ee1834eff9680fc6277bf Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Tue, 22 Nov 2022 09:16:59 +0000 Subject: [PATCH 1227/1477] xen/netback: Ensure protocol headers don't fall in the non-linear area [ Upstream commit ad7f402ae4f466647c3a669b8a6f3e5d4271c84a ] In some cases, the frontend may send a packet where the protocol headers are spread across multiple slots. This would result in netback creating an skb where the protocol headers spill over into the non-linear area. Some drivers and NICs don't handle this properly resulting in an interface reset or worse. This issue was introduced by the removal of an unconditional skb pull in the tx path to improve performance. Fix this without reintroducing the pull by setting up grant copy ops for as many slots as needed to reach the XEN_NETBACK_TX_COPY_LEN size. Adjust the rest of the code to handle multiple copy operations per skb. This is XSA-423 / CVE-2022-3643. Fixes: 7e5d7753956b ("xen-netback: remove unconditional __pskb_pull_tail() in guest Tx path") Signed-off-by: Ross Lagerwall Reviewed-by: Paul Durrant Signed-off-by: Juergen Gross Signed-off-by: Sasha Levin --- drivers/net/xen-netback/netback.c | 223 ++++++++++++++++-------------- 1 file changed, 123 insertions(+), 100 deletions(-) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index b0cbc7fead74..06fd61b71d37 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -330,10 +330,13 @@ static int xenvif_count_requests(struct xenvif_queue *queue, struct xenvif_tx_cb { - u16 pending_idx; + u16 copy_pending_idx[XEN_NETBK_LEGACY_SLOTS_MAX + 1]; + u8 copy_count; }; #define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb) +#define copy_pending_idx(skb, i) (XENVIF_TX_CB(skb)->copy_pending_idx[i]) +#define copy_count(skb) (XENVIF_TX_CB(skb)->copy_count) static inline void xenvif_tx_create_map_op(struct xenvif_queue *queue, u16 pending_idx, @@ -368,31 +371,93 @@ static inline struct sk_buff *xenvif_alloc_skb(unsigned int size) return skb; } -static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif_queue *queue, - struct sk_buff *skb, - struct xen_netif_tx_request *txp, - struct gnttab_map_grant_ref *gop, - unsigned int frag_overflow, - struct sk_buff *nskb) +static void xenvif_get_requests(struct xenvif_queue *queue, + struct sk_buff *skb, + struct xen_netif_tx_request *first, + struct xen_netif_tx_request *txfrags, + unsigned *copy_ops, + unsigned *map_ops, + unsigned int frag_overflow, + struct sk_buff *nskb, + unsigned int extra_count, + unsigned int data_len) { struct skb_shared_info *shinfo = skb_shinfo(skb); skb_frag_t *frags = shinfo->frags; - u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx; - int start; + u16 pending_idx; pending_ring_idx_t index; unsigned int nr_slots; + struct gnttab_copy *cop = queue->tx_copy_ops + *copy_ops; + struct gnttab_map_grant_ref *gop = queue->tx_map_ops + *map_ops; + struct xen_netif_tx_request *txp = first; - nr_slots = shinfo->nr_frags; + nr_slots = shinfo->nr_frags + 1; - /* Skip first skb fragment if it is on same page as header fragment. */ - start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx); + copy_count(skb) = 0; - for (shinfo->nr_frags = start; shinfo->nr_frags < nr_slots; - shinfo->nr_frags++, txp++, gop++) { + /* Create copy ops for exactly data_len bytes into the skb head. */ + __skb_put(skb, data_len); + while (data_len > 0) { + int amount = data_len > txp->size ? txp->size : data_len; + + cop->source.u.ref = txp->gref; + cop->source.domid = queue->vif->domid; + cop->source.offset = txp->offset; + + cop->dest.domid = DOMID_SELF; + cop->dest.offset = (offset_in_page(skb->data + + skb_headlen(skb) - + data_len)) & ~XEN_PAGE_MASK; + cop->dest.u.gmfn = virt_to_gfn(skb->data + skb_headlen(skb) + - data_len); + + cop->len = amount; + cop->flags = GNTCOPY_source_gref; + + index = pending_index(queue->pending_cons); + pending_idx = queue->pending_ring[index]; + callback_param(queue, pending_idx).ctx = NULL; + copy_pending_idx(skb, copy_count(skb)) = pending_idx; + copy_count(skb)++; + + cop++; + data_len -= amount; + + if (amount == txp->size) { + /* The copy op covered the full tx_request */ + + memcpy(&queue->pending_tx_info[pending_idx].req, + txp, sizeof(*txp)); + queue->pending_tx_info[pending_idx].extra_count = + (txp == first) ? extra_count : 0; + + if (txp == first) + txp = txfrags; + else + txp++; + queue->pending_cons++; + nr_slots--; + } else { + /* The copy op partially covered the tx_request. + * The remainder will be mapped. + */ + txp->offset += amount; + txp->size -= amount; + } + } + + for (shinfo->nr_frags = 0; shinfo->nr_frags < nr_slots; + shinfo->nr_frags++, gop++) { index = pending_index(queue->pending_cons++); pending_idx = queue->pending_ring[index]; - xenvif_tx_create_map_op(queue, pending_idx, txp, 0, gop); + xenvif_tx_create_map_op(queue, pending_idx, txp, + txp == first ? extra_count : 0, gop); frag_set_pending_idx(&frags[shinfo->nr_frags], pending_idx); + + if (txp == first) + txp = txfrags; + else + txp++; } if (frag_overflow) { @@ -413,7 +478,8 @@ static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif_queue *que skb_shinfo(skb)->frag_list = nskb; } - return gop; + (*copy_ops) = cop - queue->tx_copy_ops; + (*map_ops) = gop - queue->tx_map_ops; } static inline void xenvif_grant_handle_set(struct xenvif_queue *queue, @@ -449,7 +515,7 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue, struct gnttab_copy **gopp_copy) { struct gnttab_map_grant_ref *gop_map = *gopp_map; - u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx; + u16 pending_idx; /* This always points to the shinfo of the skb being checked, which * could be either the first or the one on the frag_list */ @@ -460,24 +526,37 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue, struct skb_shared_info *first_shinfo = NULL; int nr_frags = shinfo->nr_frags; const bool sharedslot = nr_frags && - frag_get_pending_idx(&shinfo->frags[0]) == pending_idx; + frag_get_pending_idx(&shinfo->frags[0]) == + copy_pending_idx(skb, copy_count(skb) - 1); int i, err; - /* Check status of header. */ - err = (*gopp_copy)->status; - if (unlikely(err)) { - if (net_ratelimit()) - netdev_dbg(queue->vif->dev, - "Grant copy of header failed! status: %d pending_idx: %u ref: %u\n", - (*gopp_copy)->status, - pending_idx, - (*gopp_copy)->source.u.ref); - /* The first frag might still have this slot mapped */ - if (!sharedslot) - xenvif_idx_release(queue, pending_idx, - XEN_NETIF_RSP_ERROR); + for (i = 0; i < copy_count(skb); i++) { + int newerr; + + /* Check status of header. */ + pending_idx = copy_pending_idx(skb, i); + + newerr = (*gopp_copy)->status; + if (likely(!newerr)) { + /* The first frag might still have this slot mapped */ + if (i < copy_count(skb) - 1 || !sharedslot) + xenvif_idx_release(queue, pending_idx, + XEN_NETIF_RSP_OKAY); + } else { + err = newerr; + if (net_ratelimit()) + netdev_dbg(queue->vif->dev, + "Grant copy of header failed! status: %d pending_idx: %u ref: %u\n", + (*gopp_copy)->status, + pending_idx, + (*gopp_copy)->source.u.ref); + /* The first frag might still have this slot mapped */ + if (i < copy_count(skb) - 1 || !sharedslot) + xenvif_idx_release(queue, pending_idx, + XEN_NETIF_RSP_ERROR); + } + (*gopp_copy)++; } - (*gopp_copy)++; check_frags: for (i = 0; i < nr_frags; i++, gop_map++) { @@ -524,14 +603,6 @@ check_frags: if (err) continue; - /* First error: if the header haven't shared a slot with the - * first frag, release it as well. - */ - if (!sharedslot) - xenvif_idx_release(queue, - XENVIF_TX_CB(skb)->pending_idx, - XEN_NETIF_RSP_OKAY); - /* Invalidate preceding fragments of this skb. */ for (j = 0; j < i; j++) { pending_idx = frag_get_pending_idx(&shinfo->frags[j]); @@ -801,7 +872,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, unsigned *copy_ops, unsigned *map_ops) { - struct gnttab_map_grant_ref *gop = queue->tx_map_ops; struct sk_buff *skb, *nskb; int ret; unsigned int frag_overflow; @@ -883,8 +953,12 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, continue; } + data_len = (txreq.size > XEN_NETBACK_TX_COPY_LEN) ? + XEN_NETBACK_TX_COPY_LEN : txreq.size; + ret = xenvif_count_requests(queue, &txreq, extra_count, txfrags, work_to_do); + if (unlikely(ret < 0)) break; @@ -910,9 +984,8 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, index = pending_index(queue->pending_cons); pending_idx = queue->pending_ring[index]; - data_len = (txreq.size > XEN_NETBACK_TX_COPY_LEN && - ret < XEN_NETBK_LEGACY_SLOTS_MAX) ? - XEN_NETBACK_TX_COPY_LEN : txreq.size; + if (ret >= XEN_NETBK_LEGACY_SLOTS_MAX - 1 && data_len < txreq.size) + data_len = txreq.size; skb = xenvif_alloc_skb(data_len); if (unlikely(skb == NULL)) { @@ -923,8 +996,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, } skb_shinfo(skb)->nr_frags = ret; - if (data_len < txreq.size) - skb_shinfo(skb)->nr_frags++; /* At this point shinfo->nr_frags is in fact the number of * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX. */ @@ -986,54 +1057,19 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, type); } - XENVIF_TX_CB(skb)->pending_idx = pending_idx; - - __skb_put(skb, data_len); - queue->tx_copy_ops[*copy_ops].source.u.ref = txreq.gref; - queue->tx_copy_ops[*copy_ops].source.domid = queue->vif->domid; - queue->tx_copy_ops[*copy_ops].source.offset = txreq.offset; - - queue->tx_copy_ops[*copy_ops].dest.u.gmfn = - virt_to_gfn(skb->data); - queue->tx_copy_ops[*copy_ops].dest.domid = DOMID_SELF; - queue->tx_copy_ops[*copy_ops].dest.offset = - offset_in_page(skb->data) & ~XEN_PAGE_MASK; - - queue->tx_copy_ops[*copy_ops].len = data_len; - queue->tx_copy_ops[*copy_ops].flags = GNTCOPY_source_gref; - - (*copy_ops)++; - - if (data_len < txreq.size) { - frag_set_pending_idx(&skb_shinfo(skb)->frags[0], - pending_idx); - xenvif_tx_create_map_op(queue, pending_idx, &txreq, - extra_count, gop); - gop++; - } else { - frag_set_pending_idx(&skb_shinfo(skb)->frags[0], - INVALID_PENDING_IDX); - memcpy(&queue->pending_tx_info[pending_idx].req, - &txreq, sizeof(txreq)); - queue->pending_tx_info[pending_idx].extra_count = - extra_count; - } - - queue->pending_cons++; - - gop = xenvif_get_requests(queue, skb, txfrags, gop, - frag_overflow, nskb); + xenvif_get_requests(queue, skb, &txreq, txfrags, copy_ops, + map_ops, frag_overflow, nskb, extra_count, + data_len); __skb_queue_tail(&queue->tx_queue, skb); queue->tx.req_cons = idx; - if (((gop-queue->tx_map_ops) >= ARRAY_SIZE(queue->tx_map_ops)) || + if ((*map_ops >= ARRAY_SIZE(queue->tx_map_ops)) || (*copy_ops >= ARRAY_SIZE(queue->tx_copy_ops))) break; } - (*map_ops) = gop - queue->tx_map_ops; return; } @@ -1112,9 +1148,8 @@ static int xenvif_tx_submit(struct xenvif_queue *queue) while ((skb = __skb_dequeue(&queue->tx_queue)) != NULL) { struct xen_netif_tx_request *txp; u16 pending_idx; - unsigned data_len; - pending_idx = XENVIF_TX_CB(skb)->pending_idx; + pending_idx = copy_pending_idx(skb, 0); txp = &queue->pending_tx_info[pending_idx].req; /* Check the remap error code. */ @@ -1133,18 +1168,6 @@ static int xenvif_tx_submit(struct xenvif_queue *queue) continue; } - data_len = skb->len; - callback_param(queue, pending_idx).ctx = NULL; - if (data_len < txp->size) { - /* Append the packet payload as a fragment. */ - txp->offset += data_len; - txp->size -= data_len; - } else { - /* Schedule a response immediately. */ - xenvif_idx_release(queue, pending_idx, - XEN_NETIF_RSP_OKAY); - } - if (txp->flags & XEN_NETTXF_csum_blank) skb->ip_summed = CHECKSUM_PARTIAL; else if (txp->flags & XEN_NETTXF_data_validated) @@ -1330,7 +1353,7 @@ static inline void xenvif_tx_dealloc_action(struct xenvif_queue *queue) /* Called after netfront has transmitted */ int xenvif_tx_action(struct xenvif_queue *queue, int budget) { - unsigned nr_mops, nr_cops = 0; + unsigned nr_mops = 0, nr_cops = 0; int work_done, ret; if (unlikely(!tx_work_todo(queue))) From 3eecd2bc10e0af4fe8d913ff5a57927abc4f81bb Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 8 Jun 2022 06:37:26 +0200 Subject: [PATCH 1228/1477] xen/netback: do some code cleanup [ Upstream commit 5834e72eda0b7e5767eb107259d98eef19ebd11f ] Remove some unused macros and functions, make local functions static. Signed-off-by: Juergen Gross Acked-by: Wei Liu Link: https://lore.kernel.org/r/20220608043726.9380-1-jgross@suse.com Signed-off-by: Jakub Kicinski Stable-dep-of: 74e7e1efdad4 ("xen/netback: don't call kfree_skb() with interrupts disabled") Signed-off-by: Sasha Levin --- drivers/net/xen-netback/common.h | 12 ------------ drivers/net/xen-netback/interface.c | 16 +--------------- drivers/net/xen-netback/netback.c | 4 +++- drivers/net/xen-netback/rx.c | 2 +- 4 files changed, 5 insertions(+), 29 deletions(-) diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 6a9178896c90..945647128c0e 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -48,7 +48,6 @@ #include typedef unsigned int pending_ring_idx_t; -#define INVALID_PENDING_RING_IDX (~0U) struct pending_tx_info { struct xen_netif_tx_request req; /* tx request */ @@ -82,8 +81,6 @@ struct xenvif_rx_meta { /* Discriminate from any valid pending_idx value. */ #define INVALID_PENDING_IDX 0xFFFF -#define MAX_BUFFER_OFFSET XEN_PAGE_SIZE - #define MAX_PENDING_REQS XEN_NETIF_TX_RING_SIZE /* The maximum number of frags is derived from the size of a grant (same @@ -367,11 +364,6 @@ void xenvif_free(struct xenvif *vif); int xenvif_xenbus_init(void); void xenvif_xenbus_fini(void); -int xenvif_schedulable(struct xenvif *vif); - -int xenvif_queue_stopped(struct xenvif_queue *queue); -void xenvif_wake_queue(struct xenvif_queue *queue); - /* (Un)Map communication rings. */ void xenvif_unmap_frontend_data_rings(struct xenvif_queue *queue); int xenvif_map_frontend_data_rings(struct xenvif_queue *queue, @@ -394,7 +386,6 @@ int xenvif_dealloc_kthread(void *data); irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data); bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread); -void xenvif_rx_action(struct xenvif_queue *queue); void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb); void xenvif_carrier_on(struct xenvif *vif); @@ -402,9 +393,6 @@ void xenvif_carrier_on(struct xenvif *vif); /* Callback from stack when TX packet can be released */ void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success); -/* Unmap a pending page and release it back to the guest */ -void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx); - static inline pending_ring_idx_t nr_pending_reqs(struct xenvif_queue *queue) { return MAX_PENDING_REQS - diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 7ce9807fc24c..645a804ab788 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -70,7 +70,7 @@ void xenvif_skb_zerocopy_complete(struct xenvif_queue *queue) wake_up(&queue->dealloc_wq); } -int xenvif_schedulable(struct xenvif *vif) +static int xenvif_schedulable(struct xenvif *vif) { return netif_running(vif->dev) && test_bit(VIF_STATUS_CONNECTED, &vif->status) && @@ -178,20 +178,6 @@ irqreturn_t xenvif_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -int xenvif_queue_stopped(struct xenvif_queue *queue) -{ - struct net_device *dev = queue->vif->dev; - unsigned int id = queue->id; - return netif_tx_queue_stopped(netdev_get_tx_queue(dev, id)); -} - -void xenvif_wake_queue(struct xenvif_queue *queue) -{ - struct net_device *dev = queue->vif->dev; - unsigned int id = queue->id; - netif_tx_wake_queue(netdev_get_tx_queue(dev, id)); -} - static u16 xenvif_select_queue(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev) { diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 06fd61b71d37..fed0f7458e18 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -112,6 +112,8 @@ static void make_tx_response(struct xenvif_queue *queue, s8 st); static void push_tx_responses(struct xenvif_queue *queue); +static void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx); + static inline int tx_work_todo(struct xenvif_queue *queue); static inline unsigned long idx_to_pfn(struct xenvif_queue *queue, @@ -1440,7 +1442,7 @@ static void push_tx_responses(struct xenvif_queue *queue) notify_remote_via_irq(queue->tx_irq); } -void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx) +static void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx) { int ret; struct gnttab_unmap_grant_ref tx_unmap_op; diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c index a0335407be42..932762177110 100644 --- a/drivers/net/xen-netback/rx.c +++ b/drivers/net/xen-netback/rx.c @@ -486,7 +486,7 @@ static void xenvif_rx_skb(struct xenvif_queue *queue) #define RX_BATCH_SIZE 64 -void xenvif_rx_action(struct xenvif_queue *queue) +static void xenvif_rx_action(struct xenvif_queue *queue) { struct sk_buff_head completed_skbs; unsigned int work_done = 0; From 83632fc41449c480f2d0193683ec202caaa186c9 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 6 Dec 2022 08:54:24 +0100 Subject: [PATCH 1229/1477] xen/netback: don't call kfree_skb() with interrupts disabled [ Upstream commit 74e7e1efdad45580cc3839f2a155174cf158f9b5 ] It is not allowed to call kfree_skb() from hardware interrupt context or with interrupts being disabled. So remove kfree_skb() from the spin_lock_irqsave() section and use the already existing "drop" label in xenvif_start_xmit() for dropping the SKB. At the same time replace the dev_kfree_skb() call there with a call of dev_kfree_skb_any(), as xenvif_start_xmit() can be called with disabled interrupts. This is XSA-424 / CVE-2022-42328 / CVE-2022-42329. Fixes: be81992f9086 ("xen/netback: don't queue unlimited number of packages") Reported-by: Yang Yingliang Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Signed-off-by: Juergen Gross Signed-off-by: Sasha Levin --- drivers/net/xen-netback/common.h | 2 +- drivers/net/xen-netback/interface.c | 6 ++++-- drivers/net/xen-netback/rx.c | 8 +++++--- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 945647128c0e..1ba974969216 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -386,7 +386,7 @@ int xenvif_dealloc_kthread(void *data); irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data); bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread); -void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb); +bool xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb); void xenvif_carrier_on(struct xenvif *vif); diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 645a804ab788..97cf5bc48902 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -255,14 +255,16 @@ xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) if (vif->hash.alg == XEN_NETIF_CTRL_HASH_ALGORITHM_NONE) skb_clear_hash(skb); - xenvif_rx_queue_tail(queue, skb); + if (!xenvif_rx_queue_tail(queue, skb)) + goto drop; + xenvif_kick_thread(queue); return NETDEV_TX_OK; drop: vif->dev->stats.tx_dropped++; - dev_kfree_skb(skb); + dev_kfree_skb_any(skb); return NETDEV_TX_OK; } diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c index 932762177110..0ba754ebc5ba 100644 --- a/drivers/net/xen-netback/rx.c +++ b/drivers/net/xen-netback/rx.c @@ -82,9 +82,10 @@ static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue) return false; } -void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) +bool xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) { unsigned long flags; + bool ret = true; spin_lock_irqsave(&queue->rx_queue.lock, flags); @@ -92,8 +93,7 @@ void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) struct net_device *dev = queue->vif->dev; netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id)); - kfree_skb(skb); - queue->vif->dev->stats.rx_dropped++; + ret = false; } else { if (skb_queue_empty(&queue->rx_queue)) xenvif_update_needed_slots(queue, skb); @@ -104,6 +104,8 @@ void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) } spin_unlock_irqrestore(&queue->rx_queue.lock, flags); + + return ret; } static struct sk_buff *xenvif_rx_dequeue(struct xenvif_queue *queue) From abfb8ae69bdc5cafcc9fef44519789d6fbb6087c Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 7 Dec 2022 13:04:34 +0000 Subject: [PATCH 1230/1477] media: videobuf2-core: take mmap_lock in vb2_get_unmapped_area() [ Upstream commit 098e5edc5d048a8df8691fd9fde895af100be42b ] While vb2_mmap took the mmap_lock mutex, vb2_get_unmapped_area didn't. Add this. Also take this opportunity to move the 'q->memory != VB2_MEMORY_MMAP' check and vb2_fileio_is_active() check into __find_plane_by_offset() so both vb2_mmap and vb2_get_unmapped_area do the same checks. Since q->memory is checked while mmap_lock is held, also take that lock in reqbufs and create_bufs when it is set, and set it back to MEMORY_UNKNOWN on error. Fixes: f035eb4e976e ("[media] videobuf2: fix lockdep warning") Signed-off-by: Hans Verkuil Acked-by: Tomasz Figa Reviewed-by: Ricardo Ribalda Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- .../media/common/videobuf2/videobuf2-core.c | 102 +++++++++++++----- 1 file changed, 73 insertions(+), 29 deletions(-) diff --git a/drivers/media/common/videobuf2/videobuf2-core.c b/drivers/media/common/videobuf2/videobuf2-core.c index 72350343a56a..3bafde87a125 100644 --- a/drivers/media/common/videobuf2/videobuf2-core.c +++ b/drivers/media/common/videobuf2/videobuf2-core.c @@ -787,7 +787,13 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory, num_buffers = max_t(unsigned int, *count, q->min_buffers_needed); num_buffers = min_t(unsigned int, num_buffers, VB2_MAX_FRAME); memset(q->alloc_devs, 0, sizeof(q->alloc_devs)); + /* + * Set this now to ensure that drivers see the correct q->memory value + * in the queue_setup op. + */ + mutex_lock(&q->mmap_lock); q->memory = memory; + mutex_unlock(&q->mmap_lock); /* * Ask the driver how many buffers and planes per buffer it requires. @@ -796,22 +802,27 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory, ret = call_qop(q, queue_setup, q, &num_buffers, &num_planes, plane_sizes, q->alloc_devs); if (ret) - return ret; + goto error; /* Check that driver has set sane values */ - if (WARN_ON(!num_planes)) - return -EINVAL; + if (WARN_ON(!num_planes)) { + ret = -EINVAL; + goto error; + } for (i = 0; i < num_planes; i++) - if (WARN_ON(!plane_sizes[i])) - return -EINVAL; + if (WARN_ON(!plane_sizes[i])) { + ret = -EINVAL; + goto error; + } /* Finally, allocate buffers and video memory */ allocated_buffers = __vb2_queue_alloc(q, memory, num_buffers, num_planes, plane_sizes); if (allocated_buffers == 0) { dprintk(q, 1, "memory allocation failed\n"); - return -ENOMEM; + ret = -ENOMEM; + goto error; } /* @@ -852,7 +863,8 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory, if (ret < 0) { /* * Note: __vb2_queue_free() will subtract 'allocated_buffers' - * from q->num_buffers. + * from q->num_buffers and it will reset q->memory to + * VB2_MEMORY_UNKNOWN. */ __vb2_queue_free(q, allocated_buffers); mutex_unlock(&q->mmap_lock); @@ -868,6 +880,12 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory, q->waiting_for_buffers = !q->is_output; return 0; + +error: + mutex_lock(&q->mmap_lock); + q->memory = VB2_MEMORY_UNKNOWN; + mutex_unlock(&q->mmap_lock); + return ret; } EXPORT_SYMBOL_GPL(vb2_core_reqbufs); @@ -878,6 +896,7 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory, { unsigned int num_planes = 0, num_buffers, allocated_buffers; unsigned plane_sizes[VB2_MAX_PLANES] = { }; + bool no_previous_buffers = !q->num_buffers; int ret; if (q->num_buffers == VB2_MAX_FRAME) { @@ -885,13 +904,19 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory, return -ENOBUFS; } - if (!q->num_buffers) { + if (no_previous_buffers) { if (q->waiting_in_dqbuf && *count) { dprintk(q, 1, "another dup()ped fd is waiting for a buffer\n"); return -EBUSY; } memset(q->alloc_devs, 0, sizeof(q->alloc_devs)); + /* + * Set this now to ensure that drivers see the correct q->memory + * value in the queue_setup op. + */ + mutex_lock(&q->mmap_lock); q->memory = memory; + mutex_unlock(&q->mmap_lock); q->waiting_for_buffers = !q->is_output; } else { if (q->memory != memory) { @@ -914,14 +939,15 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory, ret = call_qop(q, queue_setup, q, &num_buffers, &num_planes, plane_sizes, q->alloc_devs); if (ret) - return ret; + goto error; /* Finally, allocate buffers and video memory */ allocated_buffers = __vb2_queue_alloc(q, memory, num_buffers, num_planes, plane_sizes); if (allocated_buffers == 0) { dprintk(q, 1, "memory allocation failed\n"); - return -ENOMEM; + ret = -ENOMEM; + goto error; } /* @@ -952,7 +978,8 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory, if (ret < 0) { /* * Note: __vb2_queue_free() will subtract 'allocated_buffers' - * from q->num_buffers. + * from q->num_buffers and it will reset q->memory to + * VB2_MEMORY_UNKNOWN. */ __vb2_queue_free(q, allocated_buffers); mutex_unlock(&q->mmap_lock); @@ -967,6 +994,14 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory, *count = allocated_buffers; return 0; + +error: + if (no_previous_buffers) { + mutex_lock(&q->mmap_lock); + q->memory = VB2_MEMORY_UNKNOWN; + mutex_unlock(&q->mmap_lock); + } + return ret; } EXPORT_SYMBOL_GPL(vb2_core_create_bufs); @@ -2120,6 +2155,22 @@ static int __find_plane_by_offset(struct vb2_queue *q, unsigned long off, struct vb2_buffer *vb; unsigned int buffer, plane; + /* + * Sanity checks to ensure the lock is held, MEMORY_MMAP is + * used and fileio isn't active. + */ + lockdep_assert_held(&q->mmap_lock); + + if (q->memory != VB2_MEMORY_MMAP) { + dprintk(q, 1, "queue is not currently set up for mmap\n"); + return -EINVAL; + } + + if (vb2_fileio_is_active(q)) { + dprintk(q, 1, "file io in progress\n"); + return -EBUSY; + } + /* * Go over all buffers and their planes, comparing the given offset * with an offset assigned to each plane. If a match is found, @@ -2219,11 +2270,6 @@ int vb2_mmap(struct vb2_queue *q, struct vm_area_struct *vma) int ret; unsigned long length; - if (q->memory != VB2_MEMORY_MMAP) { - dprintk(q, 1, "queue is not currently set up for mmap\n"); - return -EINVAL; - } - /* * Check memory area access mode. */ @@ -2245,14 +2291,9 @@ int vb2_mmap(struct vb2_queue *q, struct vm_area_struct *vma) mutex_lock(&q->mmap_lock); - if (vb2_fileio_is_active(q)) { - dprintk(q, 1, "mmap: file io in progress\n"); - ret = -EBUSY; - goto unlock; - } - /* - * Find the plane corresponding to the offset passed by userspace. + * Find the plane corresponding to the offset passed by userspace. This + * will return an error if not MEMORY_MMAP or file I/O is in progress. */ ret = __find_plane_by_offset(q, off, &buffer, &plane); if (ret) @@ -2305,22 +2346,25 @@ unsigned long vb2_get_unmapped_area(struct vb2_queue *q, void *vaddr; int ret; - if (q->memory != VB2_MEMORY_MMAP) { - dprintk(q, 1, "queue is not currently set up for mmap\n"); - return -EINVAL; - } + mutex_lock(&q->mmap_lock); /* - * Find the plane corresponding to the offset passed by userspace. + * Find the plane corresponding to the offset passed by userspace. This + * will return an error if not MEMORY_MMAP or file I/O is in progress. */ ret = __find_plane_by_offset(q, off, &buffer, &plane); if (ret) - return ret; + goto unlock; vb = q->bufs[buffer]; vaddr = vb2_plane_vaddr(vb, plane); + mutex_unlock(&q->mmap_lock); return vaddr ? (unsigned long)vaddr : -EINVAL; + +unlock: + mutex_unlock(&q->mmap_lock); + return ret; } EXPORT_SYMBOL_GPL(vb2_get_unmapped_area); #endif From 043b2bc96ca2a63d65c0683650ba4d491e8f0f24 Mon Sep 17 00:00:00 2001 From: Francesco Dolcini Date: Mon, 5 Dec 2022 16:23:27 +0100 Subject: [PATCH 1231/1477] Revert "ARM: dts: imx7: Fix NAND controller size-cells" commit ef19964da8a668c683f1d38274f6fb756e047945 upstream. This reverts commit 753395ea1e45c724150070b5785900b6a44bd5fb. It introduced a boot regression on colibri-imx7, and potentially any other i.MX7 boards with MTD partition list generated into the fdt by U-Boot. While the commit we are reverting here is not obviously wrong, it fixes only a dt binding checker warning that is non-functional, while it introduces a boot regression and there is no obvious fix ready. Fixes: 753395ea1e45 ("ARM: dts: imx7: Fix NAND controller size-cells") Signed-off-by: Francesco Dolcini Reviewed-by: Miquel Raynal Acked-by: Marek Vasut Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/Y4dgBTGNWpM6SQXI@francesco-nb.int.toradex.com/ Link: https://lore.kernel.org/all/20221205144917.6514168a@xps-13/ Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/imx7s.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/imx7s.dtsi b/arch/arm/boot/dts/imx7s.dtsi index e4ff47110a96..9e1b0af0aa43 100644 --- a/arch/arm/boot/dts/imx7s.dtsi +++ b/arch/arm/boot/dts/imx7s.dtsi @@ -1221,10 +1221,10 @@ clocks = <&clks IMX7D_NAND_USDHC_BUS_RAWNAND_CLK>; }; - gpmi: nand-controller@33002000 { + gpmi: nand-controller@33002000{ compatible = "fsl,imx7d-gpmi-nand"; #address-cells = <1>; - #size-cells = <0>; + #size-cells = <1>; reg = <0x33002000 0x2000>, <0x33004000 0x4000>; reg-names = "gpmi-nand", "bch"; interrupts = ; From 32f01f0306a98629508f84d7ef0d1d037bc274a2 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 16 Nov 2022 15:07:22 +0000 Subject: [PATCH 1232/1477] media: v4l2-dv-timings.c: fix too strict blanking sanity checks commit 5eef2141776da02772c44ec406d6871a790761ee upstream. Sanity checks were added to verify the v4l2_bt_timings blanking fields in order to avoid integer overflows when userspace passes weird values. But that assumed that userspace would correctly fill in the front porch, backporch and sync values, but sometimes all you know is the total blanking, which is then assigned to just one of these fields. And that can fail with these checks. So instead set a maximum for the total horizontal and vertical blanking and check that each field remains below that. That is still sufficient to avoid integer overflows, but it also allows for more flexibility in how userspace fills in these fields. Signed-off-by: Hans Verkuil Fixes: 4b6d66a45ed3 ("media: v4l2-dv-timings: add sanity checks for blanking values") Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/v4l2-dv-timings.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/media/v4l2-core/v4l2-dv-timings.c b/drivers/media/v4l2-core/v4l2-dv-timings.c index 003c32fed3f7..942d0005c55e 100644 --- a/drivers/media/v4l2-core/v4l2-dv-timings.c +++ b/drivers/media/v4l2-core/v4l2-dv-timings.c @@ -145,6 +145,8 @@ bool v4l2_valid_dv_timings(const struct v4l2_dv_timings *t, const struct v4l2_bt_timings *bt = &t->bt; const struct v4l2_bt_timings_cap *cap = &dvcap->bt; u32 caps = cap->capabilities; + const u32 max_vert = 10240; + u32 max_hor = 3 * bt->width; if (t->type != V4L2_DV_BT_656_1120) return false; @@ -166,14 +168,20 @@ bool v4l2_valid_dv_timings(const struct v4l2_dv_timings *t, if (!bt->interlaced && (bt->il_vbackporch || bt->il_vsync || bt->il_vfrontporch)) return false; - if (bt->hfrontporch > 2 * bt->width || - bt->hsync > 1024 || bt->hbackporch > 1024) + /* + * Some video receivers cannot properly separate the frontporch, + * backporch and sync values, and instead they only have the total + * blanking. That can be assigned to any of these three fields. + * So just check that none of these are way out of range. + */ + if (bt->hfrontporch > max_hor || + bt->hsync > max_hor || bt->hbackporch > max_hor) return false; - if (bt->vfrontporch > 4096 || - bt->vsync > 128 || bt->vbackporch > 4096) + if (bt->vfrontporch > max_vert || + bt->vsync > max_vert || bt->vbackporch > max_vert) return false; - if (bt->interlaced && (bt->il_vfrontporch > 4096 || - bt->il_vsync > 128 || bt->il_vbackporch > 4096)) + if (bt->interlaced && (bt->il_vfrontporch > max_vert || + bt->il_vsync > max_vert || bt->il_vbackporch > max_vert)) return false; return fnc == NULL || fnc(t, fnc_handle); } From f1f7f36cf682fa59db15e2089039a2eeb58ff2ad Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 7 Dec 2022 16:53:15 -1000 Subject: [PATCH 1233/1477] memcg: fix possible use-after-free in memcg_write_event_control() commit 4a7ba45b1a435e7097ca0f79a847d0949d0eb088 upstream. memcg_write_event_control() accesses the dentry->d_name of the specified control fd to route the write call. As a cgroup interface file can't be renamed, it's safe to access d_name as long as the specified file is a regular cgroup file. Also, as these cgroup interface files can't be removed before the directory, it's safe to access the parent too. Prior to 347c4a874710 ("memcg: remove cgroup_event->cft"), there was a call to __file_cft() which verified that the specified file is a regular cgroupfs file before further accesses. The cftype pointer returned from __file_cft() was no longer necessary and the commit inadvertently dropped the file type check with it allowing any file to slip through. With the invarients broken, the d_name and parent accesses can now race against renames and removals of arbitrary files and cause use-after-free's. Fix the bug by resurrecting the file type check in __file_cft(). Now that cgroupfs is implemented through kernfs, checking the file operations needs to go through a layer of indirection. Instead, let's check the superblock and dentry type. Link: https://lkml.kernel.org/r/Y5FRm/cfcKPGzWwl@slm.duckdns.org Fixes: 347c4a874710 ("memcg: remove cgroup_event->cft") Signed-off-by: Tejun Heo Reported-by: Jann Horn Acked-by: Roman Gushchin Acked-by: Johannes Weiner Cc: Linus Torvalds Cc: Michal Hocko Cc: Muchun Song Cc: Shakeel Butt Cc: [3.14+] Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- include/linux/cgroup.h | 1 + kernel/cgroup/cgroup-internal.h | 1 - mm/memcontrol.c | 15 +++++++++++++-- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 618838c48313..959b370733f0 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -68,6 +68,7 @@ struct css_task_iter { struct list_head iters_node; /* css_set->task_iters */ }; +extern struct file_system_type cgroup_fs_type; extern struct cgroup_root cgrp_dfl_root; extern struct css_set init_css_set; diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index 6e36e854b512..d8fcc139ac05 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -169,7 +169,6 @@ extern struct mutex cgroup_mutex; extern spinlock_t css_set_lock; extern struct cgroup_subsys *cgroup_subsys[]; extern struct list_head cgroup_roots; -extern struct file_system_type cgroup_fs_type; /* iterate across the hierarchies */ #define for_each_root(root) \ diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 92ab00877718..c62d997c8ca1 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4866,6 +4866,7 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of, unsigned int efd, cfd; struct fd efile; struct fd cfile; + struct dentry *cdentry; const char *name; char *endp; int ret; @@ -4916,6 +4917,16 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of, if (ret < 0) goto out_put_cfile; + /* + * The control file must be a regular cgroup1 file. As a regular cgroup + * file can't be renamed, it's safe to access its name afterwards. + */ + cdentry = cfile.file->f_path.dentry; + if (cdentry->d_sb->s_type != &cgroup_fs_type || !d_is_reg(cdentry)) { + ret = -EINVAL; + goto out_put_cfile; + } + /* * Determine the event callbacks and set them in @event. This used * to be done via struct cftype but cgroup core no longer knows @@ -4924,7 +4935,7 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of, * * DO NOT ADD NEW FILES. */ - name = cfile.file->f_path.dentry->d_name.name; + name = cdentry->d_name.name; if (!strcmp(name, "memory.usage_in_bytes")) { event->register_event = mem_cgroup_usage_register_event; @@ -4948,7 +4959,7 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of, * automatically removed on cgroup destruction but the removal is * asynchronous, so take an extra ref on @css. */ - cfile_css = css_tryget_online_from_dir(cfile.file->f_path.dentry->d_parent, + cfile_css = css_tryget_online_from_dir(cdentry->d_parent, &memory_cgrp_subsys); ret = -EINVAL; if (IS_ERR(cfile_css)) From f1cf856123ceb766c49967ec79b841030fa1741f Mon Sep 17 00:00:00 2001 From: John Starks Date: Tue, 6 Dec 2022 22:00:53 -0800 Subject: [PATCH 1234/1477] mm/gup: fix gup_pud_range() for dax commit fcd0ccd836ffad73d98a66f6fea7b16f735ea920 upstream. For dax pud, pud_huge() returns true on x86. So the function works as long as hugetlb is configured. However, dax doesn't depend on hugetlb. Commit 414fd080d125 ("mm/gup: fix gup_pmd_range() for dax") fixed devmap-backed huge PMDs, but missed devmap-backed huge PUDs. Fix this as well. This fixes the below kernel panic: general protection fault, probably for non-canonical address 0x69e7c000cc478: 0000 [#1] SMP < snip > Call Trace: get_user_pages_fast+0x1f/0x40 iov_iter_get_pages+0xc6/0x3b0 ? mempool_alloc+0x5d/0x170 bio_iov_iter_get_pages+0x82/0x4e0 ? bvec_alloc+0x91/0xc0 ? bio_alloc_bioset+0x19a/0x2a0 blkdev_direct_IO+0x282/0x480 ? __io_complete_rw_common+0xc0/0xc0 ? filemap_range_has_page+0x82/0xc0 generic_file_direct_write+0x9d/0x1a0 ? inode_update_time+0x24/0x30 __generic_file_write_iter+0xbd/0x1e0 blkdev_write_iter+0xb4/0x150 ? io_import_iovec+0x8d/0x340 io_write+0xf9/0x300 io_issue_sqe+0x3c3/0x1d30 ? sysvec_reschedule_ipi+0x6c/0x80 __io_queue_sqe+0x33/0x240 ? fget+0x76/0xa0 io_submit_sqes+0xe6a/0x18d0 ? __fget_light+0xd1/0x100 __x64_sys_io_uring_enter+0x199/0x880 ? __context_tracking_enter+0x1f/0x70 ? irqentry_exit_to_user_mode+0x24/0x30 ? irqentry_exit+0x1d/0x30 ? __context_tracking_exit+0xe/0x70 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x61/0xcb RIP: 0033:0x7fc97c11a7be < snip > ---[ end trace 48b2e0e67debcaeb ]--- RIP: 0010:internal_get_user_pages_fast+0x340/0x990 < snip > Kernel panic - not syncing: Fatal exception Kernel Offset: disabled Link: https://lkml.kernel.org/r/1670392853-28252-1-git-send-email-ssengar@linux.microsoft.com Fixes: 414fd080d125 ("mm/gup: fix gup_pmd_range() for dax") Signed-off-by: John Starks Signed-off-by: Saurabh Sengar Cc: Jan Kara Cc: Yu Zhao Cc: Jason Gunthorpe Cc: John Hubbard Cc: David Hildenbrand Cc: Dan Williams Cc: Alistair Popple Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/gup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/gup.c b/mm/gup.c index b47c751df069..bd2c011c7ca9 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -2564,7 +2564,7 @@ static int gup_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr, unsigned lo next = pud_addr_end(addr, end); if (unlikely(!pud_present(pud))) return 0; - if (unlikely(pud_huge(pud))) { + if (unlikely(pud_huge(pud) || pud_devmap(pud))) { if (!gup_huge_pud(pud, pudp, addr, next, flags, pages, nr)) return 0; From 380d183e998b597bbba82cfef1c07b3e0327d860 Mon Sep 17 00:00:00 2001 From: Ismael Ferreras Morezuelas Date: Sat, 29 Oct 2022 22:24:53 +0200 Subject: [PATCH 1235/1477] Bluetooth: btusb: Add debug message for CSR controllers commit 955aebd445e2b49622f2184b7abb82b05c060549 upstream. The rationale of showing this is that it's potentially critical information to diagnose and find more CSR compatibility bugs in the future and it will save a lot of headaches. Given that clones come from a wide array of vendors (some are actually Barrot, some are something else) and these numbers are what let us find differences between actual and fake ones, it will be immensely helpful to scour the Internet looking for this pattern and building an actual database to find correlations and improve the checks. Cc: stable@vger.kernel.org Cc: Hans de Goede Signed-off-by: Ismael Ferreras Morezuelas Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btusb.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 6efd981979bd..54001ad5de9f 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -1833,6 +1833,11 @@ static int btusb_setup_csr(struct hci_dev *hdev) rp = (struct hci_rp_read_local_version *)skb->data; + bt_dev_info(hdev, "CSR: Setting up dongle with HCI ver=%u rev=%04x; LMP ver=%u subver=%04x; manufacturer=%u", + le16_to_cpu(rp->hci_ver), le16_to_cpu(rp->hci_rev), + le16_to_cpu(rp->lmp_ver), le16_to_cpu(rp->lmp_subver), + le16_to_cpu(rp->manufacturer)); + /* Detect a wide host of Chinese controllers that aren't CSR. * * Known fake bcdDevices: 0x0100, 0x0134, 0x1915, 0x2520, 0x7558, 0x8891 From 549b46f8130effccf168293270bb3b1d5da529cc Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 29 Nov 2022 12:54:13 -0800 Subject: [PATCH 1236/1477] Bluetooth: Fix crash when replugging CSR fake controllers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b5ca338751ad4783ec8d37b5d99c3e37b7813e59 upstream. It seems fake CSR 5.0 clones can cause the suspend notifier to be registered twice causing the following kernel panic: [ 71.986122] Call Trace: [ 71.986124] [ 71.986125] blocking_notifier_chain_register+0x33/0x60 [ 71.986130] hci_register_dev+0x316/0x3d0 [bluetooth 99b5497ea3d09708fa1366c1dc03288bf3cca8da] [ 71.986154] btusb_probe+0x979/0xd85 [btusb e1e0605a4f4c01984a4b9c8ac58c3666ae287477] [ 71.986159] ? __pm_runtime_set_status+0x1a9/0x300 [ 71.986162] ? ktime_get_mono_fast_ns+0x3e/0x90 [ 71.986167] usb_probe_interface+0xe3/0x2b0 [ 71.986171] really_probe+0xdb/0x380 [ 71.986174] ? pm_runtime_barrier+0x54/0x90 [ 71.986177] __driver_probe_device+0x78/0x170 [ 71.986180] driver_probe_device+0x1f/0x90 [ 71.986183] __device_attach_driver+0x89/0x110 [ 71.986186] ? driver_allows_async_probing+0x70/0x70 [ 71.986189] bus_for_each_drv+0x8c/0xe0 [ 71.986192] __device_attach+0xb2/0x1e0 [ 71.986195] bus_probe_device+0x92/0xb0 [ 71.986198] device_add+0x422/0x9a0 [ 71.986201] ? sysfs_merge_group+0xd4/0x110 [ 71.986205] usb_set_configuration+0x57a/0x820 [ 71.986208] usb_generic_driver_probe+0x4f/0x70 [ 71.986211] usb_probe_device+0x3a/0x110 [ 71.986213] really_probe+0xdb/0x380 [ 71.986216] ? pm_runtime_barrier+0x54/0x90 [ 71.986219] __driver_probe_device+0x78/0x170 [ 71.986221] driver_probe_device+0x1f/0x90 [ 71.986224] __device_attach_driver+0x89/0x110 [ 71.986227] ? driver_allows_async_probing+0x70/0x70 [ 71.986230] bus_for_each_drv+0x8c/0xe0 [ 71.986232] __device_attach+0xb2/0x1e0 [ 71.986235] bus_probe_device+0x92/0xb0 [ 71.986237] device_add+0x422/0x9a0 [ 71.986239] ? _dev_info+0x7d/0x98 [ 71.986242] ? blake2s_update+0x4c/0xc0 [ 71.986246] usb_new_device.cold+0x148/0x36d [ 71.986250] hub_event+0xa8a/0x1910 [ 71.986255] process_one_work+0x1c4/0x380 [ 71.986259] worker_thread+0x51/0x390 [ 71.986262] ? rescuer_thread+0x3b0/0x3b0 [ 71.986264] kthread+0xdb/0x110 [ 71.986266] ? kthread_complete_and_exit+0x20/0x20 [ 71.986268] ret_from_fork+0x1f/0x30 [ 71.986273] [ 71.986274] ---[ end trace 0000000000000000 ]--- [ 71.986284] btusb: probe of 2-1.6:1.0 failed with error -17 Link: https://bugzilla.kernel.org/show_bug.cgi?id=216683 Cc: stable@vger.kernel.org Signed-off-by: Luiz Augusto von Dentz Tested-by: Leonardo Eugênio Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/hci_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 866eb22432de..f8aab38ab595 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3799,7 +3799,8 @@ int hci_register_dev(struct hci_dev *hdev) hci_sock_dev_event(hdev, HCI_DEV_REG); hci_dev_hold(hdev); - if (!test_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks)) { + if (!hdev->suspend_notifier.notifier_call && + !test_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks)) { hdev->suspend_notifier.notifier_call = hci_suspend_notifier; error = register_pm_notifier(&hdev->suspend_notifier); if (error) From 3cb78c39252ead0079506a0211abba6d74392e82 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Wed, 23 Nov 2022 10:08:33 +0100 Subject: [PATCH 1237/1477] KVM: s390: vsie: Fix the initialization of the epoch extension (epdx) field commit 0dd4cdccdab3d74bd86b868768a7dca216bcce7e upstream. We recently experienced some weird huge time jumps in nested guests when rebooting them in certain cases. After adding some debug code to the epoch handling in vsie.c (thanks to David Hildenbrand for the idea!), it was obvious that the "epdx" field (the multi-epoch extension) did not get set to 0xff in case the "epoch" field was negative. Seems like the code misses to copy the value from the epdx field from the guest to the shadow control block. By doing so, the weird time jumps are gone in our scenarios. Link: https://bugzilla.redhat.com/show_bug.cgi?id=2140899 Fixes: 8fa1696ea781 ("KVM: s390: Multiple Epoch Facility support") Signed-off-by: Thomas Huth Reviewed-by: Christian Borntraeger Acked-by: David Hildenbrand Reviewed-by: Claudio Imbrenda Reviewed-by: Janosch Frank Cc: stable@vger.kernel.org # 4.19+ Link: https://lore.kernel.org/r/20221123090833.292938-1-thuth@redhat.com Message-Id: <20221123090833.292938-1-thuth@redhat.com> Signed-off-by: Janosch Frank Signed-off-by: Greg Kroah-Hartman --- arch/s390/kvm/vsie.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 3fbf7081c000..ff58decfef5e 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -535,8 +535,10 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_CEI)) scb_s->eca |= scb_o->eca & ECA_CEI; /* Epoch Extension */ - if (test_kvm_facility(vcpu->kvm, 139)) + if (test_kvm_facility(vcpu->kvm, 139)) { scb_s->ecd |= scb_o->ecd & ECD_MEF; + scb_s->epdx = scb_o->epdx; + } /* etoken */ if (test_kvm_facility(vcpu->kvm, 156)) From 007f561f599f4977e1e281a92db8c11ad78762d4 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Thu, 1 Dec 2022 12:53:41 -0500 Subject: [PATCH 1238/1477] drm/vmwgfx: Don't use screen objects when SEV is active commit 6e90293618ed476d6b11f82ce724efbb9e9a071b upstream. When SEV is enabled gmr's and mob's are explicitly disabled because the encrypted system memory can not be used by the hypervisor. The driver was disabling GMR's but the presentation code, which depends on GMR's, wasn't honoring it which lead to black screen on hosts with SEV enabled. Make sure screen objects presentation is not used when guest memory regions have been disabled to fix presentation on SEV enabled hosts. Fixes: 3b0d6458c705 ("drm/vmwgfx: Refuse DMA operation when SEV encryption is active") Cc: # v5.7+ Signed-off-by: Zack Rusin Reported-by: Nicholas Hunt Reviewed-by: Martin Krastev Link: https://patchwork.freedesktop.org/patch/msgid/20221201175341.491884-1-zack@kde.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c index 4bf0f5ec4fc2..2b6590344468 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c @@ -949,6 +949,10 @@ int vmw_kms_sou_init_display(struct vmw_private *dev_priv) struct drm_device *dev = dev_priv->dev; int i, ret; + /* Screen objects won't work if GMR's aren't available */ + if (!dev_priv->has_gmr) + return -ENOSYS; + if (!(dev_priv->capabilities & SVGA_CAP_SCREEN_OBJECT_2)) { DRM_INFO("Not using screen objects," " missing cap SCREEN_OBJECT_2\n"); From 6a4da05acd062ae7774b6b19cef2b7d922902d36 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 30 Nov 2022 10:57:47 -0800 Subject: [PATCH 1239/1477] drm/shmem-helper: Remove errant put in error path commit 24013314be6ee4ee456114a671e9fa3461323de8 upstream. drm_gem_shmem_mmap() doesn't own this reference, resulting in the GEM object getting prematurely freed leading to a later use-after-free. Link: https://syzkaller.appspot.com/bug?extid=c8ae65286134dd1b800d Reported-by: syzbot+c8ae65286134dd1b800d@syzkaller.appspotmail.com Fixes: 2194a63a818d ("drm: Add library for shmem backed GEM objects") Cc: stable@vger.kernel.org Signed-off-by: Rob Clark Reviewed-by: Daniel Vetter Signed-off-by: Javier Martinez Canillas Link: https://patchwork.freedesktop.org/patch/msgid/20221130185748.357410-2-robdclark@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_gem_shmem_helper.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index cfacce0418a4..8b97e25b8b0c 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -616,10 +616,8 @@ int drm_gem_shmem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) shmem = to_drm_gem_shmem_obj(obj); ret = drm_gem_shmem_get_pages(shmem); - if (ret) { - drm_gem_vm_close(vma); + if (ret) return ret; - } vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND; vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); From 11e95d85c3c99a226cc7003c787e31a02ee5b2ba Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 30 Nov 2022 10:57:48 -0800 Subject: [PATCH 1240/1477] drm/shmem-helper: Avoid vm_open error paths commit 09bf649a74573cb596e211418a4f8008f265c5a9 upstream. vm_open() is not allowed to fail. Fortunately we are guaranteed that the pages are already pinned, thanks to the initial mmap which is now being cloned into a forked process, and only need to increment the refcnt. So just increment it directly. Previously if a signal was delivered at the wrong time to the forking process, the mutex_lock_interruptible() could fail resulting in the pages_use_count not being incremented. Fixes: 2194a63a818d ("drm: Add library for shmem backed GEM objects") Cc: stable@vger.kernel.org Signed-off-by: Rob Clark Reviewed-by: Daniel Vetter Signed-off-by: Javier Martinez Canillas Link: https://patchwork.freedesktop.org/patch/msgid/20221130185748.357410-3-robdclark@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_gem_shmem_helper.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index 8b97e25b8b0c..c56656a95cf9 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -563,12 +563,20 @@ static void drm_gem_shmem_vm_open(struct vm_area_struct *vma) { struct drm_gem_object *obj = vma->vm_private_data; struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); - int ret; WARN_ON(shmem->base.import_attach); - ret = drm_gem_shmem_get_pages(shmem); - WARN_ON_ONCE(ret != 0); + mutex_lock(&shmem->pages_lock); + + /* + * We should have already pinned the pages when the buffer was first + * mmap'd, vm_open() just grabs an additional reference for the new + * mm the vma is getting copied into (ie. on fork()). + */ + if (!WARN_ON_ONCE(!shmem->pages_use_count)) + shmem->pages_use_count++; + + mutex_unlock(&shmem->pages_lock); drm_gem_vm_open(vma); } From 4dde75945a9ca23b09dbdcafbbdee9dd5dd953af Mon Sep 17 00:00:00 2001 From: Ankit Patel Date: Tue, 22 Nov 2022 15:35:20 +0800 Subject: [PATCH 1241/1477] HID: usbhid: Add ALWAYS_POLL quirk for some mice commit f6d910a89a2391e5ce1f275d205023880a33d3f8 upstream. Some additional USB mouse devices are needing ALWAYS_POLL quirk without which they disconnect and reconnect every 60s. Add below devices to the known quirk list. CHERRY VID 0x046a, PID 0x000c MICROSOFT VID 0x045e, PID 0x0783 PRIMAX VID 0x0461, PID 0x4e2a Signed-off-by: Ankit Patel Signed-off-by: Haotien Hsu Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-ids.h | 3 +++ drivers/hid/hid-quirks.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 3350a41d7dce..70a693f8f034 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -257,6 +257,7 @@ #define USB_DEVICE_ID_CH_AXIS_295 0x001c #define USB_VENDOR_ID_CHERRY 0x046a +#define USB_DEVICE_ID_CHERRY_MOUSE_000C 0x000c #define USB_DEVICE_ID_CHERRY_CYMOTION 0x0023 #define USB_DEVICE_ID_CHERRY_CYMOTION_SOLAR 0x0027 @@ -874,6 +875,7 @@ #define USB_DEVICE_ID_MS_XBOX_ONE_S_CONTROLLER 0x02fd #define USB_DEVICE_ID_MS_PIXART_MOUSE 0x00cb #define USB_DEVICE_ID_8BITDO_SN30_PRO_PLUS 0x02e0 +#define USB_DEVICE_ID_MS_MOUSE_0783 0x0783 #define USB_VENDOR_ID_MOJO 0x8282 #define USB_DEVICE_ID_RETRO_ADAPTER 0x3201 @@ -1302,6 +1304,7 @@ #define USB_VENDOR_ID_PRIMAX 0x0461 #define USB_DEVICE_ID_PRIMAX_MOUSE_4D22 0x4d22 +#define USB_DEVICE_ID_PRIMAX_MOUSE_4E2A 0x4e2a #define USB_DEVICE_ID_PRIMAX_KEYBOARD 0x4e05 #define USB_DEVICE_ID_PRIMAX_REZEL 0x4e72 #define USB_DEVICE_ID_PRIMAX_PIXART_MOUSE_4D0F 0x4d0f diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index 4a8014e9a511..1efde40e5136 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -54,6 +54,7 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_FLIGHT_SIM_YOKE), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_PRO_PEDALS), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_PRO_THROTTLE), HID_QUIRK_NOGET }, + { HID_USB_DEVICE(USB_VENDOR_ID_CHERRY, USB_DEVICE_ID_CHERRY_MOUSE_000C), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB_RAPIDFIRE), HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K70RGB), HID_QUIRK_NO_INIT_REPORTS }, @@ -122,6 +123,7 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_MOUSE_C05A), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_MOUSE_C06A), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_MCS, USB_DEVICE_ID_MCS_GAMEPADBLOCK), HID_QUIRK_MULTI_INPUT }, + { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_MOUSE_0783), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_PIXART_MOUSE), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_POWER_COVER), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_SURFACE3_COVER), HID_QUIRK_NO_INIT_REPORTS }, @@ -146,6 +148,7 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_USB_OPTICAL_MOUSE), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_MOUSE_4D22), HID_QUIRK_ALWAYS_POLL }, + { HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_MOUSE_4E2A), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_PIXART_MOUSE_4D0F), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_PIXART_MOUSE_4D65), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_PIXART_MOUSE_4E22), HID_QUIRK_ALWAYS_POLL }, From 959a23a4d1118ed3d557b297f7783537aa63a5a4 Mon Sep 17 00:00:00 2001 From: Anastasia Belova Date: Fri, 11 Nov 2022 15:55:11 +0300 Subject: [PATCH 1242/1477] HID: hid-lg4ff: Add check for empty lbuf commit d180b6496143cd360c5d5f58ae4b9a8229c1f344 upstream. If an empty buf is received, lbuf is also empty. So lbuf is accessed by index -1. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: f31a2de3fe36 ("HID: hid-lg4ff: Allow switching of Logitech gaming wheels between compatibility modes") Signed-off-by: Anastasia Belova Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-lg4ff.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/hid/hid-lg4ff.c b/drivers/hid/hid-lg4ff.c index 5e6a0cef2a06..e3fcf1353fb3 100644 --- a/drivers/hid/hid-lg4ff.c +++ b/drivers/hid/hid-lg4ff.c @@ -872,6 +872,12 @@ static ssize_t lg4ff_alternate_modes_store(struct device *dev, struct device_att return -ENOMEM; i = strlen(lbuf); + + if (i == 0) { + kfree(lbuf); + return -EINVAL; + } + if (lbuf[i-1] == '\n') { if (i == 1) { kfree(lbuf); From bc03f809da78fc79e4aee132d4e5c6a2b3aeec73 Mon Sep 17 00:00:00 2001 From: ZhangPeng Date: Wed, 16 Nov 2022 07:14:28 +0000 Subject: [PATCH 1243/1477] HID: core: fix shift-out-of-bounds in hid_report_raw_event commit ec61b41918587be530398b0d1c9a0d16619397e5 upstream. Syzbot reported shift-out-of-bounds in hid_report_raw_event. microsoft 0003:045E:07DA.0001: hid_field_extract() called with n (128) > 32! (swapper/0) ====================================================================== UBSAN: shift-out-of-bounds in drivers/hid/hid-core.c:1323:20 shift exponent 127 is too large for 32-bit type 'int' CPU: 0 PID: 0 Comm: swapper/0 Not tainted 6.1.0-rc4-syzkaller-00159-g4bbf3422df78 #0 Hardware name: Google Compute Engine/Google Compute Engine, BIOS Google 10/26/2022 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x1e3/0x2cb lib/dump_stack.c:106 ubsan_epilogue lib/ubsan.c:151 [inline] __ubsan_handle_shift_out_of_bounds+0x3a6/0x420 lib/ubsan.c:322 snto32 drivers/hid/hid-core.c:1323 [inline] hid_input_fetch_field drivers/hid/hid-core.c:1572 [inline] hid_process_report drivers/hid/hid-core.c:1665 [inline] hid_report_raw_event+0xd56/0x18b0 drivers/hid/hid-core.c:1998 hid_input_report+0x408/0x4f0 drivers/hid/hid-core.c:2066 hid_irq_in+0x459/0x690 drivers/hid/usbhid/hid-core.c:284 __usb_hcd_giveback_urb+0x369/0x530 drivers/usb/core/hcd.c:1671 dummy_timer+0x86b/0x3110 drivers/usb/gadget/udc/dummy_hcd.c:1988 call_timer_fn+0xf5/0x210 kernel/time/timer.c:1474 expire_timers kernel/time/timer.c:1519 [inline] __run_timers+0x76a/0x980 kernel/time/timer.c:1790 run_timer_softirq+0x63/0xf0 kernel/time/timer.c:1803 __do_softirq+0x277/0x75b kernel/softirq.c:571 __irq_exit_rcu+0xec/0x170 kernel/softirq.c:650 irq_exit_rcu+0x5/0x20 kernel/softirq.c:662 sysvec_apic_timer_interrupt+0x91/0xb0 arch/x86/kernel/apic/apic.c:1107 ====================================================================== If the size of the integer (unsigned n) is bigger than 32 in snto32(), shift exponent will be too large for 32-bit type 'int', resulting in a shift-out-of-bounds bug. Fix this by adding a check on the size of the integer (unsigned n) in snto32(). To add support for n greater than 32 bits, set n to 32, if n is greater than 32. Reported-by: syzbot+8b1641d2f14732407e23@syzkaller.appspotmail.com Fixes: dde5845a529f ("[PATCH] Generic HID layer - code split") Signed-off-by: ZhangPeng Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 5550c943f985..eaaf732f0630 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1310,6 +1310,9 @@ static s32 snto32(__u32 value, unsigned n) if (!value || !n) return 0; + if (n > 32) + n = 32; + switch (n) { case 8: return ((__s8)value); case 16: return ((__s16)value); From c42221efb1159d6a3c89e96685ee38acdce86b6f Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 6 Dec 2022 21:12:59 +0100 Subject: [PATCH 1244/1477] can: af_can: fix NULL pointer dereference in can_rcv_filter commit 0acc442309a0a1b01bcdaa135e56e6398a49439c upstream. Analogue to commit 8aa59e355949 ("can: af_can: fix NULL pointer dereference in can_rx_register()") we need to check for a missing initialization of ml_priv in the receive path of CAN frames. Since commit 4e096a18867a ("net: introduce CAN specific pointer in the struct net_device") the check for dev->type to be ARPHRD_CAN is not sufficient anymore since bonding or tun netdevices claim to be CAN devices but do not initialize ml_priv accordingly. Fixes: 4e096a18867a ("net: introduce CAN specific pointer in the struct net_device") Reported-by: syzbot+2d7f58292cb5b29eb5ad@syzkaller.appspotmail.com Reported-by: Wei Chen Signed-off-by: Oliver Hartkopp Link: https://lore.kernel.org/all/20221206201259.3028-1-socketcan@hartkopp.net Cc: stable@vger.kernel.org Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- net/can/af_can.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/can/af_can.c b/net/can/af_can.c index cf554e855521..79f24c6f43c8 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -680,7 +680,7 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev, { struct canfd_frame *cfd = (struct canfd_frame *)skb->data; - if (unlikely(dev->type != ARPHRD_CAN || skb->len != CAN_MTU)) { + if (unlikely(dev->type != ARPHRD_CAN || !can_get_ml_priv(dev) || skb->len != CAN_MTU)) { pr_warn_once("PF_CAN: dropped non conform CAN skbuff: dev type %d, len %d\n", dev->type, skb->len); goto free_skb; @@ -706,7 +706,7 @@ static int canfd_rcv(struct sk_buff *skb, struct net_device *dev, { struct canfd_frame *cfd = (struct canfd_frame *)skb->data; - if (unlikely(dev->type != ARPHRD_CAN || skb->len != CANFD_MTU)) { + if (unlikely(dev->type != ARPHRD_CAN || !can_get_ml_priv(dev) || skb->len != CANFD_MTU)) { pr_warn_once("PF_CAN: dropped non conform CAN FD skbuff: dev type %d, len %d\n", dev->type, skb->len); goto free_skb; From fccee93eb20d72f5390432ecea7f8c16af88c850 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Thu, 1 Sep 2022 18:41:31 +0800 Subject: [PATCH 1245/1477] mm/hugetlb: fix races when looking up a CONT-PTE/PMD size hugetlb page commit fac35ba763ed07ba93154c95ffc0c4a55023707f upstream. On some architectures (like ARM64), it can support CONT-PTE/PMD size hugetlb, which means it can support not only PMD/PUD size hugetlb (2M and 1G), but also CONT-PTE/PMD size(64K and 32M) if a 4K page size specified. So when looking up a CONT-PTE size hugetlb page by follow_page(), it will use pte_offset_map_lock() to get the pte entry lock for the CONT-PTE size hugetlb in follow_page_pte(). However this pte entry lock is incorrect for the CONT-PTE size hugetlb, since we should use huge_pte_lock() to get the correct lock, which is mm->page_table_lock. That means the pte entry of the CONT-PTE size hugetlb under current pte lock is unstable in follow_page_pte(), we can continue to migrate or poison the pte entry of the CONT-PTE size hugetlb, which can cause some potential race issues, even though they are under the 'pte lock'. For example, suppose thread A is trying to look up a CONT-PTE size hugetlb page by move_pages() syscall under the lock, however antoher thread B can migrate the CONT-PTE hugetlb page at the same time, which will cause thread A to get an incorrect page, if thread A also wants to do page migration, then data inconsistency error occurs. Moreover we have the same issue for CONT-PMD size hugetlb in follow_huge_pmd(). To fix above issues, rename the follow_huge_pmd() as follow_huge_pmd_pte() to handle PMD and PTE level size hugetlb, which uses huge_pte_lock() to get the correct pte entry lock to make the pte entry stable. Mike said: Support for CONT_PMD/_PTE was added with bb9dd3df8ee9 ("arm64: hugetlb: refactor find_num_contig()"). Patch series "Support for contiguous pte hugepages", v4. However, I do not believe these code paths were executed until migration support was added with 5480280d3f2d ("arm64/mm: enable HugeTLB migration for contiguous bit HugeTLB pages") I would go with 5480280d3f2d for the Fixes: targe. Link: https://lkml.kernel.org/r/635f43bdd85ac2615a58405da82b4d33c6e5eb05.1662017562.git.baolin.wang@linux.alibaba.com Fixes: 5480280d3f2d ("arm64/mm: enable HugeTLB migration for contiguous bit HugeTLB pages") Signed-off-by: Baolin Wang Suggested-by: Mike Kravetz Reviewed-by: Mike Kravetz Cc: David Hildenbrand Cc: Muchun Song Cc: Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin Signed-off-by: Samuel Mendoza-Jonas Signed-off-by: Greg Kroah-Hartman --- include/linux/hugetlb.h | 8 ++++---- mm/gup.c | 14 +++++++++++++- mm/hugetlb.c | 27 +++++++++++++-------------- 3 files changed, 30 insertions(+), 19 deletions(-) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index b9fbb6d4150e..955b19dc28a8 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -174,8 +174,8 @@ struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, struct page *follow_huge_pd(struct vm_area_struct *vma, unsigned long address, hugepd_t hpd, int flags, int pdshift); -struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, - pmd_t *pmd, int flags); +struct page *follow_huge_pmd_pte(struct vm_area_struct *vma, unsigned long address, + int flags); struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address, pud_t *pud, int flags); struct page *follow_huge_pgd(struct mm_struct *mm, unsigned long address, @@ -261,8 +261,8 @@ static inline struct page *follow_huge_pd(struct vm_area_struct *vma, return NULL; } -static inline struct page *follow_huge_pmd(struct mm_struct *mm, - unsigned long address, pmd_t *pmd, int flags) +static inline struct page *follow_huge_pmd_pte(struct vm_area_struct *vma, + unsigned long address, int flags) { return NULL; } diff --git a/mm/gup.c b/mm/gup.c index bd2c011c7ca9..6d5e4fd55d32 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -405,6 +405,18 @@ static struct page *follow_page_pte(struct vm_area_struct *vma, if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) == (FOLL_PIN | FOLL_GET))) return ERR_PTR(-EINVAL); + + /* + * Considering PTE level hugetlb, like continuous-PTE hugetlb on + * ARM64 architecture. + */ + if (is_vm_hugetlb_page(vma)) { + page = follow_huge_pmd_pte(vma, address, flags); + if (page) + return page; + return no_page_table(vma, flags); + } + retry: if (unlikely(pmd_bad(*pmd))) return no_page_table(vma, flags); @@ -560,7 +572,7 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma, if (pmd_none(pmdval)) return no_page_table(vma, flags); if (pmd_huge(pmdval) && is_vm_hugetlb_page(vma)) { - page = follow_huge_pmd(mm, address, pmd, flags); + page = follow_huge_pmd_pte(vma, address, flags); if (page) return page; return no_page_table(vma, flags); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index d8c63d79af20..3499b3803384 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5585,12 +5585,13 @@ follow_huge_pd(struct vm_area_struct *vma, } struct page * __weak -follow_huge_pmd(struct mm_struct *mm, unsigned long address, - pmd_t *pmd, int flags) +follow_huge_pmd_pte(struct vm_area_struct *vma, unsigned long address, int flags) { + struct hstate *h = hstate_vma(vma); + struct mm_struct *mm = vma->vm_mm; struct page *page = NULL; spinlock_t *ptl; - pte_t pte; + pte_t *ptep, pte; /* FOLL_GET and FOLL_PIN are mutually exclusive. */ if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) == @@ -5598,17 +5599,15 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, return NULL; retry: - ptl = pmd_lockptr(mm, pmd); - spin_lock(ptl); - /* - * make sure that the address range covered by this pmd is not - * unmapped from other threads. - */ - if (!pmd_huge(*pmd)) - goto out; - pte = huge_ptep_get((pte_t *)pmd); + ptep = huge_pte_offset(mm, address, huge_page_size(h)); + if (!ptep) + return NULL; + + ptl = huge_pte_lock(h, mm, ptep); + pte = huge_ptep_get(ptep); if (pte_present(pte)) { - page = pmd_page(*pmd) + ((address & ~PMD_MASK) >> PAGE_SHIFT); + page = pte_page(pte) + + ((address & ~huge_page_mask(h)) >> PAGE_SHIFT); /* * try_grab_page() should always succeed here, because: a) we * hold the pmd (ptl) lock, and b) we've just checked that the @@ -5624,7 +5623,7 @@ retry: } else { if (is_hugetlb_entry_migration(pte)) { spin_unlock(ptl); - __migration_entry_wait(mm, (pte_t *)pmd, ptl); + __migration_entry_wait(mm, ptep, ptl); goto retry; } /* From 5e26531d8113da79dbc03b92d84ea47fa87683af Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 5 Mar 2021 12:21:40 +0000 Subject: [PATCH 1246/1477] rtc: cmos: Disable irq around direct invocation of cmos_interrupt() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 13be2efc390acd2a46a69a359f6efc00ca434599 ] As previously noted in commit 66e4f4a9cc38 ("rtc: cmos: Use spin_lock_irqsave() in cmos_interrupt()"): <4>[ 254.192378] WARNING: inconsistent lock state <4>[ 254.192384] 5.12.0-rc1-CI-CI_DRM_9834+ #1 Not tainted <4>[ 254.192396] -------------------------------- <4>[ 254.192400] inconsistent {IN-HARDIRQ-W} -> {HARDIRQ-ON-W} usage. <4>[ 254.192409] rtcwake/5309 [HC0[0]:SC0[0]:HE1:SE1] takes: <4>[ 254.192429] ffffffff8263c5f8 (rtc_lock){?...}-{2:2}, at: cmos_interrupt+0x18/0x100 <4>[ 254.192481] {IN-HARDIRQ-W} state was registered at: <4>[ 254.192488] lock_acquire+0xd1/0x3d0 <4>[ 254.192504] _raw_spin_lock+0x2a/0x40 <4>[ 254.192519] cmos_interrupt+0x18/0x100 <4>[ 254.192536] rtc_handler+0x1f/0xc0 <4>[ 254.192553] acpi_ev_fixed_event_detect+0x109/0x13c <4>[ 254.192574] acpi_ev_sci_xrupt_handler+0xb/0x28 <4>[ 254.192596] acpi_irq+0x13/0x30 <4>[ 254.192620] __handle_irq_event_percpu+0x43/0x2c0 <4>[ 254.192641] handle_irq_event_percpu+0x2b/0x70 <4>[ 254.192661] handle_irq_event+0x2f/0x50 <4>[ 254.192680] handle_fasteoi_irq+0x9e/0x150 <4>[ 254.192693] __common_interrupt+0x76/0x140 <4>[ 254.192715] common_interrupt+0x96/0xc0 <4>[ 254.192732] asm_common_interrupt+0x1e/0x40 <4>[ 254.192750] _raw_spin_unlock_irqrestore+0x38/0x60 <4>[ 254.192767] resume_irqs+0xba/0xf0 <4>[ 254.192786] dpm_resume_noirq+0x245/0x3d0 <4>[ 254.192811] suspend_devices_and_enter+0x230/0xaa0 <4>[ 254.192835] pm_suspend.cold.8+0x301/0x34a <4>[ 254.192859] state_store+0x7b/0xe0 <4>[ 254.192879] kernfs_fop_write_iter+0x11d/0x1c0 <4>[ 254.192899] new_sync_write+0x11d/0x1b0 <4>[ 254.192916] vfs_write+0x265/0x390 <4>[ 254.192933] ksys_write+0x5a/0xd0 <4>[ 254.192949] do_syscall_64+0x33/0x80 <4>[ 254.192965] entry_SYSCALL_64_after_hwframe+0x44/0xae <4>[ 254.192986] irq event stamp: 43775 <4>[ 254.192994] hardirqs last enabled at (43775): [] asm_sysvec_apic_timer_interrupt+0x12/0x20 <4>[ 254.193023] hardirqs last disabled at (43774): [] sysvec_apic_timer_interrupt+0xa/0xb0 <4>[ 254.193049] softirqs last enabled at (42548): [] __do_softirq+0x342/0x48e <4>[ 254.193074] softirqs last disabled at (42543): [] irq_exit_rcu+0xad/0xd0 <4>[ 254.193101] other info that might help us debug this: <4>[ 254.193107] Possible unsafe locking scenario: <4>[ 254.193112] CPU0 <4>[ 254.193117] ---- <4>[ 254.193121] lock(rtc_lock); <4>[ 254.193137] <4>[ 254.193142] lock(rtc_lock); <4>[ 254.193156] *** DEADLOCK *** <4>[ 254.193161] 6 locks held by rtcwake/5309: <4>[ 254.193174] #0: ffff888104861430 (sb_writers#5){.+.+}-{0:0}, at: ksys_write+0x5a/0xd0 <4>[ 254.193232] #1: ffff88810f823288 (&of->mutex){+.+.}-{3:3}, at: kernfs_fop_write_iter+0xe7/0x1c0 <4>[ 254.193282] #2: ffff888100cef3c0 (kn->active#285 <7>[ 254.192706] i915 0000:00:02.0: [drm:intel_modeset_setup_hw_state [i915]] [CRTC:51:pipe A] hw state readout: disabled <4>[ 254.193307] ){.+.+}-{0:0}, at: kernfs_fop_write_iter+0xf0/0x1c0 <4>[ 254.193333] #3: ffffffff82649fa8 (system_transition_mutex){+.+.}-{3:3}, at: pm_suspend.cold.8+0xce/0x34a <4>[ 254.193387] #4: ffffffff827a2108 (acpi_scan_lock){+.+.}-{3:3}, at: acpi_suspend_begin+0x47/0x70 <4>[ 254.193433] #5: ffff8881019ea178 (&dev->mutex){....}-{3:3}, at: device_resume+0x68/0x1e0 <4>[ 254.193485] stack backtrace: <4>[ 254.193492] CPU: 1 PID: 5309 Comm: rtcwake Not tainted 5.12.0-rc1-CI-CI_DRM_9834+ #1 <4>[ 254.193514] Hardware name: Google Soraka/Soraka, BIOS MrChromebox-4.10 08/25/2019 <4>[ 254.193524] Call Trace: <4>[ 254.193536] dump_stack+0x7f/0xad <4>[ 254.193567] mark_lock.part.47+0x8ca/0xce0 <4>[ 254.193604] __lock_acquire+0x39b/0x2590 <4>[ 254.193626] ? asm_sysvec_apic_timer_interrupt+0x12/0x20 <4>[ 254.193660] lock_acquire+0xd1/0x3d0 <4>[ 254.193677] ? cmos_interrupt+0x18/0x100 <4>[ 254.193716] _raw_spin_lock+0x2a/0x40 <4>[ 254.193735] ? cmos_interrupt+0x18/0x100 <4>[ 254.193758] cmos_interrupt+0x18/0x100 <4>[ 254.193785] cmos_resume+0x2ac/0x2d0 <4>[ 254.193813] ? acpi_pm_set_device_wakeup+0x1f/0x110 <4>[ 254.193842] ? pnp_bus_suspend+0x10/0x10 <4>[ 254.193864] pnp_bus_resume+0x5e/0x90 <4>[ 254.193885] dpm_run_callback+0x5f/0x240 <4>[ 254.193914] device_resume+0xb2/0x1e0 <4>[ 254.193942] ? pm_dev_err+0x25/0x25 <4>[ 254.193974] dpm_resume+0xea/0x3f0 <4>[ 254.194005] dpm_resume_end+0x8/0x10 <4>[ 254.194030] suspend_devices_and_enter+0x29b/0xaa0 <4>[ 254.194066] pm_suspend.cold.8+0x301/0x34a <4>[ 254.194094] state_store+0x7b/0xe0 <4>[ 254.194124] kernfs_fop_write_iter+0x11d/0x1c0 <4>[ 254.194151] new_sync_write+0x11d/0x1b0 <4>[ 254.194183] vfs_write+0x265/0x390 <4>[ 254.194207] ksys_write+0x5a/0xd0 <4>[ 254.194232] do_syscall_64+0x33/0x80 <4>[ 254.194251] entry_SYSCALL_64_after_hwframe+0x44/0xae <4>[ 254.194274] RIP: 0033:0x7f07d79691e7 <4>[ 254.194293] Code: 64 89 02 48 c7 c0 ff ff ff ff eb bb 0f 1f 80 00 00 00 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 48 89 54 24 18 48 89 74 24 <4>[ 254.194312] RSP: 002b:00007ffd9cc2c768 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 <4>[ 254.194337] RAX: ffffffffffffffda RBX: 0000000000000004 RCX: 00007f07d79691e7 <4>[ 254.194352] RDX: 0000000000000004 RSI: 0000556ebfc63590 RDI: 000000000000000b <4>[ 254.194366] RBP: 0000556ebfc63590 R08: 0000000000000000 R09: 0000000000000004 <4>[ 254.194379] R10: 0000556ebf0ec2a6 R11: 0000000000000246 R12: 0000000000000004 which breaks S3-resume on fi-kbl-soraka presumably as that's slow enough to trigger the alarm during the suspend. Fixes: 6950d046eb6e ("rtc: cmos: Replace spin_lock_irqsave with spin_lock in hard IRQ") References: 66e4f4a9cc38 ("rtc: cmos: Use spin_lock_irqsave() in cmos_interrupt()"): Signed-off-by: Chris Wilson Cc: Xiaofei Tan Cc: Alexandre Belloni Cc: Alessandro Zummo Cc: Ville Syrjälä Reviewed-by: Ville Syrjälä Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20210305122140.28774-1-chris@chris-wilson.co.uk Signed-off-by: Sasha Levin --- drivers/rtc/rtc-cmos.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index 21f2bdd025b6..d4f6c4dd42c4 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -1111,7 +1111,9 @@ static void cmos_check_wkalrm(struct device *dev) * ACK the rtc irq here */ if (t_now >= cmos->alarm_expires && cmos_use_acpi_alarm()) { + local_irq_disable(); cmos_interrupt(0, (void *)cmos->rtc); + local_irq_enable(); return; } From 5c432383b6870c4c6e45ff9b02d89c39013f2e9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20Jo=C5=84czyk?= Date: Sun, 20 Feb 2022 10:04:03 +0100 Subject: [PATCH 1247/1477] rtc: mc146818-lib: fix locking in mc146818_set_time MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 811f5559270f25c34c338d6eaa2ece2544c3d3bd ] In mc146818_set_time(), CMOS_READ(RTC_CONTROL) was performed without the rtc_lock taken, which is required for CMOS accesses. Fix this. Nothing in kernel modifies RTC_DM_BINARY, so a separate critical section is allowed here. Fixes: dcf257e92622 ("rtc: mc146818: Reduce spinlock section in mc146818_set_time()") Signed-off-by: Mateusz Jończyk Cc: Alessandro Zummo Cc: Alexandre Belloni Cc: Thomas Gleixner Cc: stable@vger.kernel.org Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20220220090403.153928-1-mat.jonczyk@o2.pl Signed-off-by: Sasha Levin --- drivers/rtc/rtc-mc146818-lib.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c index 1ca866461d10..3783aaf9dd5a 100644 --- a/drivers/rtc/rtc-mc146818-lib.c +++ b/drivers/rtc/rtc-mc146818-lib.c @@ -283,8 +283,10 @@ int mc146818_set_time(struct rtc_time *time) if (yrs >= 100) yrs -= 100; - if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) - || RTC_ALWAYS_BCD) { + spin_lock_irqsave(&rtc_lock, flags); + save_control = CMOS_READ(RTC_CONTROL); + spin_unlock_irqrestore(&rtc_lock, flags); + if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { sec = bin2bcd(sec); min = bin2bcd(min); hrs = bin2bcd(hrs); From cb283cca1ddc23297f61366a4363ce2b9e6f6331 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 11 Jan 2022 10:19:22 +0300 Subject: [PATCH 1248/1477] rtc: mc146818-lib: fix signedness bug in mc146818_get_time() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 7372971c1be5b7d4fdd8ad237798bdc1d1d54162 ] The mc146818_get_time() function returns zero on success or negative a error code on failure. It needs to be type int. Fixes: d35786b3a28d ("rtc: mc146818-lib: change return values of mc146818_get_time()") Signed-off-by: Dan Carpenter Reviewed-by: Mateusz Jończyk Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20220111071922.GE11243@kili Signed-off-by: Sasha Levin --- drivers/rtc/rtc-mc146818-lib.c | 2 +- include/linux/mc146818rtc.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c index 3783aaf9dd5a..347655d24b5d 100644 --- a/drivers/rtc/rtc-mc146818-lib.c +++ b/drivers/rtc/rtc-mc146818-lib.c @@ -103,7 +103,7 @@ bool mc146818_does_rtc_work(void) } EXPORT_SYMBOL_GPL(mc146818_does_rtc_work); -unsigned int mc146818_get_time(struct rtc_time *time) +int mc146818_get_time(struct rtc_time *time) { unsigned char ctrl; unsigned long flags; diff --git a/include/linux/mc146818rtc.h b/include/linux/mc146818rtc.h index fb042e0e7d76..b0da04fe087b 100644 --- a/include/linux/mc146818rtc.h +++ b/include/linux/mc146818rtc.h @@ -126,7 +126,7 @@ struct cmos_rtc_board_info { #endif /* ARCH_RTC_LOCATION */ bool mc146818_does_rtc_work(void); -unsigned int mc146818_get_time(struct rtc_time *time); +int mc146818_get_time(struct rtc_time *time); int mc146818_set_time(struct rtc_time *time); bool mc146818_avoid_UIP(void (*callback)(unsigned char seconds, void *param), From a0418d0a6b2d38e14296ad02ab13fd2f960212d2 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Thu, 24 Nov 2022 13:04:37 +0100 Subject: [PATCH 1249/1477] netfilter: nft_set_pipapo: Actually validate intervals in fields after the first one [ Upstream commit 97d4d394b58777f7056ebba8ffdb4002d0563259 ] Embarrassingly, nft_pipapo_insert() checked for interval validity in the first field only. The start_p and end_p pointers were reset to key data from the first field at every iteration of the loop which was supposed to go over the set fields. Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges") Reported-by: Pablo Neira Ayuso Signed-off-by: Stefano Brivio Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nft_set_pipapo.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 949da87dbb06..30cf0673d6c1 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1162,6 +1162,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, struct nft_pipapo_match *m = priv->clone; u8 genmask = nft_genmask_next(net); struct nft_pipapo_field *f; + const u8 *start_p, *end_p; int i, bsize_max, err = 0; if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY_END)) @@ -1202,9 +1203,9 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, } /* Validate */ + start_p = start; + end_p = end; nft_pipapo_for_each_field(f, i, m) { - const u8 *start_p = start, *end_p = end; - if (f->rules >= (unsigned long)NFT_PIPAPO_RULE0_MAX) return -ENOSPC; From 27c71825ffc4010dceaa935b673feff809f704d5 Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Sun, 20 Nov 2022 15:50:46 +0800 Subject: [PATCH 1250/1477] ieee802154: cc2520: Fix error return code in cc2520_hw_init() [ Upstream commit 4d002d6a2a00ac1c433899bd7625c6400a74cfba ] In cc2520_hw_init(), if oscillator start failed, the error code should be returned. Fixes: 0da6bc8cc341 ("ieee802154: cc2520: adds driver for TI CC2520 radio") Signed-off-by: Ziyang Xuan Link: https://lore.kernel.org/r/20221120075046.2213633-1-william.xuanziyang@huawei.com Signed-off-by: Stefan Schmidt Signed-off-by: Sasha Levin --- drivers/net/ieee802154/cc2520.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ieee802154/cc2520.c b/drivers/net/ieee802154/cc2520.c index 4517517215f2..a8369bfa4050 100644 --- a/drivers/net/ieee802154/cc2520.c +++ b/drivers/net/ieee802154/cc2520.c @@ -970,7 +970,7 @@ static int cc2520_hw_init(struct cc2520_private *priv) if (timeout-- <= 0) { dev_err(&priv->spi->dev, "oscillator start failed!\n"); - return ret; + return -ETIMEDOUT; } udelay(1); } while (!(status & CC2520_STATUS_XOSC32M_STABLE)); From 0a8e66e375736ea12c11f0ef238ba2a8efec460b Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Mon, 21 Nov 2022 01:22:01 +0100 Subject: [PATCH 1251/1477] ca8210: Fix crash by zero initializing data [ Upstream commit 1e24c54da257ab93cff5826be8a793b014a5dc9c ] The struct cas_control embeds multiple generic SPI structures and we have to make sure these structures are initialized to default values. This driver does not set all attributes. When using kmalloc before some attributes were not initialized and contained random data which caused random crashes at bootup. Fixes: ded845a781a5 ("ieee802154: Add CA8210 IEEE 802.15.4 device driver") Signed-off-by: Hauke Mehrtens Link: https://lore.kernel.org/r/20221121002201.1339636-1-hauke@hauke-m.de Signed-off-by: Stefan Schmidt Signed-off-by: Sasha Levin --- drivers/net/ieee802154/ca8210.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c index fd9f33c833fa..95ef3b6f98dd 100644 --- a/drivers/net/ieee802154/ca8210.c +++ b/drivers/net/ieee802154/ca8210.c @@ -926,7 +926,7 @@ static int ca8210_spi_transfer( dev_dbg(&spi->dev, "%s called\n", __func__); - cas_ctl = kmalloc(sizeof(*cas_ctl), GFP_ATOMIC); + cas_ctl = kzalloc(sizeof(*cas_ctl), GFP_ATOMIC); if (!cas_ctl) return -ENOMEM; From b46e8c50c38648617eddf07f8a031acfce6f3d1f Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 28 Nov 2022 10:58:53 +0100 Subject: [PATCH 1252/1477] netfilter: ctnetlink: fix compilation warning after data race fixes in ct mark [ Upstream commit 1feeae071507ad65cf9f462a1bdd543a4bf89e71 ] All warnings (new ones prefixed by >>): net/netfilter/nf_conntrack_netlink.c: In function '__ctnetlink_glue_build': >> net/netfilter/nf_conntrack_netlink.c:2674:13: warning: unused variable 'mark' [-Wunused-variable] 2674 | u32 mark; | ^~~~ Fixes: 52d1aa8b8249 ("netfilter: conntrack: Fix data-races around ct mark") Reported-by: kernel test robot Tested-by: Ivan Babrou Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_conntrack_netlink.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index c402283e7545..2efdc50f978b 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -317,8 +317,13 @@ nla_put_failure: } #ifdef CONFIG_NF_CONNTRACK_MARK -static int ctnetlink_dump_mark(struct sk_buff *skb, u32 mark) +static int ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct) { + u32 mark = READ_ONCE(ct->mark); + + if (!mark) + return 0; + if (nla_put_be32(skb, CTA_MARK, htonl(mark))) goto nla_put_failure; return 0; @@ -532,7 +537,7 @@ static int ctnetlink_dump_extinfo(struct sk_buff *skb, static int ctnetlink_dump_info(struct sk_buff *skb, struct nf_conn *ct) { if (ctnetlink_dump_status(skb, ct) < 0 || - ctnetlink_dump_mark(skb, READ_ONCE(ct->mark)) < 0 || + ctnetlink_dump_mark(skb, ct) < 0 || ctnetlink_dump_secctx(skb, ct) < 0 || ctnetlink_dump_id(skb, ct) < 0 || ctnetlink_dump_use(skb, ct) < 0 || @@ -711,7 +716,6 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) struct sk_buff *skb; unsigned int type; unsigned int flags = 0, group; - u32 mark; int err; if (events & (1 << IPCT_DESTROY)) { @@ -812,9 +816,8 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) } #ifdef CONFIG_NF_CONNTRACK_MARK - mark = READ_ONCE(ct->mark); - if ((events & (1 << IPCT_MARK) || mark) && - ctnetlink_dump_mark(skb, mark) < 0) + if (events & (1 << IPCT_MARK) && + ctnetlink_dump_mark(skb, ct) < 0) goto nla_put_failure; #endif nlmsg_end(skb, nlh); @@ -2671,7 +2674,6 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct) { const struct nf_conntrack_zone *zone; struct nlattr *nest_parms; - u32 mark; zone = nf_ct_zone(ct); @@ -2729,8 +2731,7 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct) goto nla_put_failure; #ifdef CONFIG_NF_CONNTRACK_MARK - mark = READ_ONCE(ct->mark); - if (mark && ctnetlink_dump_mark(skb, mark) < 0) + if (ctnetlink_dump_mark(skb, ct) < 0) goto nla_put_failure; #endif if (ctnetlink_dump_labels(skb, ct) < 0) From b9aca69a6c8282f1ee6ef9d8ddfa9bf0ff1b89a0 Mon Sep 17 00:00:00 2001 From: Qiqi Zhang Date: Fri, 25 Nov 2022 18:45:58 +0800 Subject: [PATCH 1253/1477] drm/bridge: ti-sn65dsi86: Fix output polarity setting bug [ Upstream commit 8c115864501fc09932cdfec53d9ec1cde82b4a28 ] According to the description in ti-sn65dsi86's datasheet: CHA_HSYNC_POLARITY: 0 = Active High Pulse. Synchronization signal is high for the sync pulse width. (default) 1 = Active Low Pulse. Synchronization signal is low for the sync pulse width. CHA_VSYNC_POLARITY: 0 = Active High Pulse. Synchronization signal is high for the sync pulse width. (Default) 1 = Active Low Pulse. Synchronization signal is low for the sync pulse width. We should only set these bits when the polarity is negative. Fixes: a095f15c00e2 ("drm/bridge: add support for sn65dsi86 bridge driver") Signed-off-by: Qiqi Zhang Reviewed-by: Douglas Anderson Tested-by: Douglas Anderson Reviewed-by: Tomi Valkeinen Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20221125104558.84616-1-eddy.zhang@rock-chips.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/bridge/ti-sn65dsi86.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c index 1a58481037b3..77a447a3fb1d 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c @@ -621,9 +621,9 @@ static void ti_sn_bridge_set_video_timings(struct ti_sn_bridge *pdata) &pdata->bridge.encoder->crtc->state->adjusted_mode; u8 hsync_polarity = 0, vsync_polarity = 0; - if (mode->flags & DRM_MODE_FLAG_PHSYNC) + if (mode->flags & DRM_MODE_FLAG_NHSYNC) hsync_polarity = CHA_HSYNC_POLARITY; - if (mode->flags & DRM_MODE_FLAG_PVSYNC) + if (mode->flags & DRM_MODE_FLAG_NVSYNC) vsync_polarity = CHA_VSYNC_POLARITY; ti_sn_bridge_write_u16(pdata, SN_CHA_ACTIVE_LINE_LENGTH_LOW_REG, From 5ee6413d3dd972930af787b2c0c7aaeb379fa521 Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Tue, 22 Nov 2022 20:35:08 +0800 Subject: [PATCH 1254/1477] gpio: amd8111: Fix PCI device reference count leak [ Upstream commit 45fecdb9f658d9c82960c98240bc0770ade19aca ] for_each_pci_dev() is implemented by pci_get_device(). The comment of pci_get_device() says that it will increase the reference count for the returned pci_dev and also decrease the reference count for the input pci_dev @from if it is not NULL. If we break for_each_pci_dev() loop with pdev not NULL, we need to call pci_dev_put() to decrease the reference count. Add the missing pci_dev_put() after the 'out' label. Since pci_dev_put() can handle NULL input parameter, there is no problem for the 'Device not found' branch. For the normal path, add pci_dev_put() in amd_gpio_exit(). Fixes: f942a7de047d ("gpio: add a driver for GPIO pins found on AMD-8111 south bridge chips") Signed-off-by: Xiongfeng Wang Signed-off-by: Bartosz Golaszewski Signed-off-by: Sasha Levin --- drivers/gpio/gpio-amd8111.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpio/gpio-amd8111.c b/drivers/gpio/gpio-amd8111.c index fdcebe59510d..68d95051dd0e 100644 --- a/drivers/gpio/gpio-amd8111.c +++ b/drivers/gpio/gpio-amd8111.c @@ -231,7 +231,10 @@ found: ioport_unmap(gp.pm); goto out; } + return 0; + out: + pci_dev_put(pdev); return err; } @@ -239,6 +242,7 @@ static void __exit amd_gpio_exit(void) { gpiochip_remove(&gp.chip); ioport_unmap(gp.pm); + pci_dev_put(gp.pdev); } module_init(amd_gpio_init); From 6595c9208d97ff649eb3102a19bb6180abb955e7 Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Fri, 28 Oct 2022 22:00:00 +0900 Subject: [PATCH 1255/1477] e1000e: Fix TX dispatch condition [ Upstream commit eed913f6919e253f35d454b2f115f2a4db2b741a ] e1000_xmit_frame is expected to stop the queue and dispatch frames to hardware if there is not sufficient space for the next frame in the buffer, but sometimes it failed to do so because the estimated maximum size of frame was wrong. As the consequence, the later invocation of e1000_xmit_frame failed with NETDEV_TX_BUSY, and the frame in the buffer remained forever, resulting in a watchdog failure. This change fixes the estimated size by making it match with the condition for NETDEV_TX_BUSY. Apparently, the old estimation failed to account for the following lines which determines the space requirement for not causing NETDEV_TX_BUSY: ``` /* reserve a descriptor for the offload context */ if ((mss) || (skb->ip_summed == CHECKSUM_PARTIAL)) count++; count++; count += DIV_ROUND_UP(len, adapter->tx_fifo_limit); ``` This issue was found when running http-stress02 test included in Linux Test Project 20220930 on QEMU with the following commandline: ``` qemu-system-x86_64 -M q35,accel=kvm -m 8G -smp 8 -drive if=virtio,format=raw,file=root.img,file.locking=on -device e1000e,netdev=netdev -netdev tap,script=ifup,downscript=no,id=netdev ``` Fixes: bc7f75fa9788 ("[E1000E]: New pci-express e1000 driver (currently for ICH9 devices only)") Signed-off-by: Akihiko Odaki Tested-by: Gurucharan G (A Contingent worker at Intel) Tested-by: Naama Meir Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/e1000e/netdev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index d0c4de023112..ae0c9aaab48d 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -5937,9 +5937,9 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb, e1000_tx_queue(tx_ring, tx_flags, count); /* Make sure there is space in the ring for the next send. */ e1000_maybe_stop_tx(tx_ring, - (MAX_SKB_FRAGS * + ((MAX_SKB_FRAGS + 1) * DIV_ROUND_UP(PAGE_SIZE, - adapter->tx_fifo_limit) + 2)); + adapter->tx_fifo_limit) + 4)); if (!netdev_xmit_more() || netif_xmit_stopped(netdev_get_tx_queue(netdev, 0))) { From de918d9738c7aa6ad52a9ce04eae55a0f7a5c7a1 Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Fri, 25 Nov 2022 22:30:31 +0900 Subject: [PATCH 1256/1477] igb: Allocate MSI-X vector when testing [ Upstream commit 28e96556baca7056d11d9fb3cdd0aba4483e00d8 ] Without this change, the interrupt test fail with MSI-X environment: $ sudo ethtool -t enp0s2 offline [ 43.921783] igb 0000:00:02.0: offline testing starting [ 44.855824] igb 0000:00:02.0 enp0s2: igb: enp0s2 NIC Link is Down [ 44.961249] igb 0000:00:02.0 enp0s2: igb: enp0s2 NIC Link is Up 1000 Mbps Full Duplex, Flow Control: RX/TX [ 51.272202] igb 0000:00:02.0: testing shared interrupt [ 56.996975] igb 0000:00:02.0 enp0s2: igb: enp0s2 NIC Link is Up 1000 Mbps Full Duplex, Flow Control: RX/TX The test result is FAIL The test extra info: Register test (offline) 0 Eeprom test (offline) 0 Interrupt test (offline) 4 Loopback test (offline) 0 Link test (on/offline) 0 Here, "4" means an expected interrupt was not delivered. To fix this, route IRQs correctly to the first MSI-X vector by setting IVAR_MISC. Also, set bit 0 of EIMS so that the vector will not be masked. The interrupt test now runs properly with this change: $ sudo ethtool -t enp0s2 offline [ 42.762985] igb 0000:00:02.0: offline testing starting [ 50.141967] igb 0000:00:02.0: testing shared interrupt [ 56.163957] igb 0000:00:02.0 enp0s2: igb: enp0s2 NIC Link is Up 1000 Mbps Full Duplex, Flow Control: RX/TX The test result is PASS The test extra info: Register test (offline) 0 Eeprom test (offline) 0 Interrupt test (offline) 0 Loopback test (offline) 0 Link test (on/offline) 0 Fixes: 4eefa8f01314 ("igb: add single vector msi-x testing to interrupt test") Signed-off-by: Akihiko Odaki Reviewed-by: Maciej Fijalkowski Tested-by: Gurucharan G (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/igb/igb_ethtool.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 28baf203459a..5e3b0a5843a8 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -1413,6 +1413,8 @@ static int igb_intr_test(struct igb_adapter *adapter, u64 *data) *data = 1; return -1; } + wr32(E1000_IVAR_MISC, E1000_IVAR_VALID << 8); + wr32(E1000_EIMS, BIT(0)); } else if (adapter->flags & IGB_FLAG_HAS_MSI) { shared_int = false; if (request_irq(irq, From 6c788c0a250667089eebbd75be559c04876961b9 Mon Sep 17 00:00:00 2001 From: Guillaume BRUN Date: Wed, 16 Nov 2022 15:35:23 +0100 Subject: [PATCH 1257/1477] drm: bridge: dw_hdmi: fix preference of RGB modes over YUV420 [ Upstream commit d3d6b1bf85aefe0ebc0624574b3bb62f0693914c ] Cheap monitors sometimes advertise YUV modes they don't really have (HDMI specification mandates YUV support so even monitors without actual support will often wrongfully advertise it) which results in YUV matches and user forum complaints of a red tint to light colour display areas in common desktop environments. Moving the default RGB fall-back before YUV selection results in RGB mode matching in most cases, reducing complaints. Fixes: 6c3c719936da ("drm/bridge: synopsys: dw-hdmi: add bus format negociation") Signed-off-by: Guillaume BRUN Tested-by: Christian Hewitt Reviewed-by: Robert Foss Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20221116143523.2126-1-the.cheaterman@gmail.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/bridge/synopsys/dw-hdmi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c index 356c7d0bd035..2c3c743df950 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c @@ -2609,6 +2609,9 @@ static u32 *dw_hdmi_bridge_atomic_get_output_bus_fmts(struct drm_bridge *bridge, * if supported. In any case the default RGB888 format is added */ + /* Default 8bit RGB fallback */ + output_fmts[i++] = MEDIA_BUS_FMT_RGB888_1X24; + if (max_bpc >= 16 && info->bpc == 16) { if (info->color_formats & DRM_COLOR_FORMAT_YCRCB444) output_fmts[i++] = MEDIA_BUS_FMT_YUV16_1X48; @@ -2642,9 +2645,6 @@ static u32 *dw_hdmi_bridge_atomic_get_output_bus_fmts(struct drm_bridge *bridge, if (info->color_formats & DRM_COLOR_FORMAT_YCRCB444) output_fmts[i++] = MEDIA_BUS_FMT_YUV8_1X24; - /* Default 8bit RGB fallback */ - output_fmts[i++] = MEDIA_BUS_FMT_RGB888_1X24; - *num_output_fmts = i; return output_fmts; From 575a6266f63dbb3b8eb1da03671451f0d81b8034 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sun, 27 Nov 2022 10:24:11 +0900 Subject: [PATCH 1258/1477] af_unix: Get user_ns from in_skb in unix_diag_get_exact(). [ Upstream commit b3abe42e94900bdd045c472f9c9be620ba5ce553 ] Wei Chen reported a NULL deref in sk_user_ns() [0][1], and Paolo diagnosed the root cause: in unix_diag_get_exact(), the newly allocated skb does not have sk. [2] We must get the user_ns from the NETLINK_CB(in_skb).sk and pass it to sk_diag_fill(). [0]: BUG: kernel NULL pointer dereference, address: 0000000000000270 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 12bbce067 P4D 12bbce067 PUD 12bc40067 PMD 0 Oops: 0000 [#1] PREEMPT SMP CPU: 0 PID: 27942 Comm: syz-executor.0 Not tainted 6.1.0-rc5-next-20221118 #2 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-48-gd9c812dda519-prebuilt.qemu.org 04/01/2014 RIP: 0010:sk_user_ns include/net/sock.h:920 [inline] RIP: 0010:sk_diag_dump_uid net/unix/diag.c:119 [inline] RIP: 0010:sk_diag_fill+0x77d/0x890 net/unix/diag.c:170 Code: 89 ef e8 66 d4 2d fd c7 44 24 40 00 00 00 00 49 8d 7c 24 18 e8 54 d7 2d fd 49 8b 5c 24 18 48 8d bb 70 02 00 00 e8 43 d7 2d fd <48> 8b 9b 70 02 00 00 48 8d 7b 10 e8 33 d7 2d fd 48 8b 5b 10 48 8d RSP: 0018:ffffc90000d67968 EFLAGS: 00010246 RAX: ffff88812badaa48 RBX: 0000000000000000 RCX: ffffffff840d481d RDX: 0000000000000465 RSI: 0000000000000000 RDI: 0000000000000270 RBP: ffffc90000d679a8 R08: 0000000000000277 R09: 0000000000000000 R10: 0001ffffffffffff R11: 0001c90000d679a8 R12: ffff88812ac03800 R13: ffff88812c87c400 R14: ffff88812ae42210 R15: ffff888103026940 FS: 00007f08b4e6f700(0000) GS:ffff88813bc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000270 CR3: 000000012c58b000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: unix_diag_get_exact net/unix/diag.c:285 [inline] unix_diag_handler_dump+0x3f9/0x500 net/unix/diag.c:317 __sock_diag_cmd net/core/sock_diag.c:235 [inline] sock_diag_rcv_msg+0x237/0x250 net/core/sock_diag.c:266 netlink_rcv_skb+0x13e/0x250 net/netlink/af_netlink.c:2564 sock_diag_rcv+0x24/0x40 net/core/sock_diag.c:277 netlink_unicast_kernel net/netlink/af_netlink.c:1330 [inline] netlink_unicast+0x5e9/0x6b0 net/netlink/af_netlink.c:1356 netlink_sendmsg+0x739/0x860 net/netlink/af_netlink.c:1932 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg net/socket.c:734 [inline] ____sys_sendmsg+0x38f/0x500 net/socket.c:2476 ___sys_sendmsg net/socket.c:2530 [inline] __sys_sendmsg+0x197/0x230 net/socket.c:2559 __do_sys_sendmsg net/socket.c:2568 [inline] __se_sys_sendmsg net/socket.c:2566 [inline] __x64_sys_sendmsg+0x42/0x50 net/socket.c:2566 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x2b/0x70 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x4697f9 Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f08b4e6ec48 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 000000000077bf80 RCX: 00000000004697f9 RDX: 0000000000000000 RSI: 00000000200001c0 RDI: 0000000000000003 RBP: 00000000004d29e9 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 000000000077bf80 R13: 0000000000000000 R14: 000000000077bf80 R15: 00007ffdb36bc6c0 Modules linked in: CR2: 0000000000000270 [1]: https://lore.kernel.org/netdev/CAO4mrfdvyjFpokhNsiwZiP-wpdSD0AStcJwfKcKQdAALQ9_2Qw@mail.gmail.com/ [2]: https://lore.kernel.org/netdev/e04315e7c90d9a75613f3993c2baf2d344eef7eb.camel@redhat.com/ Fixes: cae9910e7344 ("net: Add UNIX_DIAG_UID to Netlink UNIX socket diagnostics.") Reported-by: syzbot Reported-by: Wei Chen Diagnosed-by: Paolo Abeni Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/unix/diag.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/net/unix/diag.c b/net/unix/diag.c index 9ff64f9df1f3..951b33fa8f5c 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -113,14 +113,16 @@ static int sk_diag_show_rqlen(struct sock *sk, struct sk_buff *nlskb) return nla_put(nlskb, UNIX_DIAG_RQLEN, sizeof(rql), &rql); } -static int sk_diag_dump_uid(struct sock *sk, struct sk_buff *nlskb) +static int sk_diag_dump_uid(struct sock *sk, struct sk_buff *nlskb, + struct user_namespace *user_ns) { - uid_t uid = from_kuid_munged(sk_user_ns(nlskb->sk), sock_i_uid(sk)); + uid_t uid = from_kuid_munged(user_ns, sock_i_uid(sk)); return nla_put(nlskb, UNIX_DIAG_UID, sizeof(uid_t), &uid); } static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req, - u32 portid, u32 seq, u32 flags, int sk_ino) + struct user_namespace *user_ns, + u32 portid, u32 seq, u32 flags, int sk_ino) { struct nlmsghdr *nlh; struct unix_diag_msg *rep; @@ -166,7 +168,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r goto out_nlmsg_trim; if ((req->udiag_show & UDIAG_SHOW_UID) && - sk_diag_dump_uid(sk, skb)) + sk_diag_dump_uid(sk, skb, user_ns)) goto out_nlmsg_trim; nlmsg_end(skb, nlh); @@ -178,7 +180,8 @@ out_nlmsg_trim: } static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req, - u32 portid, u32 seq, u32 flags) + struct user_namespace *user_ns, + u32 portid, u32 seq, u32 flags) { int sk_ino; @@ -189,7 +192,7 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct unix_diag_r if (!sk_ino) return 0; - return sk_diag_fill(sk, skb, req, portid, seq, flags, sk_ino); + return sk_diag_fill(sk, skb, req, user_ns, portid, seq, flags, sk_ino); } static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) @@ -217,7 +220,7 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) goto next; if (!(req->udiag_states & (1 << sk->sk_state))) goto next; - if (sk_diag_dump(sk, skb, req, + if (sk_diag_dump(sk, skb, req, sk_user_ns(skb->sk), NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI) < 0) @@ -285,7 +288,8 @@ again: if (!rep) goto out; - err = sk_diag_fill(sk, rep, req, NETLINK_CB(in_skb).portid, + err = sk_diag_fill(sk, rep, req, sk_user_ns(NETLINK_CB(in_skb).sk), + NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 0, req->udiag_ino); if (err < 0) { nlmsg_free(rep); From 80c69b31aa5b4586adaf29f4d1694111d3e26951 Mon Sep 17 00:00:00 2001 From: Ronak Doshi Date: Wed, 30 Nov 2022 00:21:46 -0800 Subject: [PATCH 1259/1477] vmxnet3: correctly report encapsulated LRO packet [ Upstream commit 40b8c2a1af03ba3e8da55a4490d646bfa845e71a ] Commit dacce2be3312 ("vmxnet3: add geneve and vxlan tunnel offload support") added support for encapsulation offload. However, the pathc did not report correctly the encapsulated packet which is LRO'ed by the hypervisor. This patch fixes this issue by using correct callback for the LRO'ed encapsulated packet. Fixes: dacce2be3312 ("vmxnet3: add geneve and vxlan tunnel offload support") Signed-off-by: Ronak Doshi Acked-by: Guolin Yang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/vmxnet3/vmxnet3_drv.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 6678a734cc4d..43a4bcdd92c1 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1356,6 +1356,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, }; u32 num_pkts = 0; bool skip_page_frags = false; + bool encap_lro = false; struct Vmxnet3_RxCompDesc *rcd; struct vmxnet3_rx_ctx *ctx = &rq->rx_ctx; u16 segCnt = 0, mss = 0; @@ -1496,13 +1497,18 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, if (VMXNET3_VERSION_GE_2(adapter) && rcd->type == VMXNET3_CDTYPE_RXCOMP_LRO) { struct Vmxnet3_RxCompDescExt *rcdlro; + union Vmxnet3_GenericDesc *gdesc; + rcdlro = (struct Vmxnet3_RxCompDescExt *)rcd; + gdesc = (union Vmxnet3_GenericDesc *)rcd; segCnt = rcdlro->segCnt; WARN_ON_ONCE(segCnt == 0); mss = rcdlro->mss; if (unlikely(segCnt <= 1)) segCnt = 0; + encap_lro = (le32_to_cpu(gdesc->dword[0]) & + (1UL << VMXNET3_RCD_HDR_INNER_SHIFT)); } else { segCnt = 0; } @@ -1570,7 +1576,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, vmxnet3_rx_csum(adapter, skb, (union Vmxnet3_GenericDesc *)rcd); skb->protocol = eth_type_trans(skb, adapter->netdev); - if (!rcd->tcp || + if ((!rcd->tcp && !encap_lro) || !(adapter->netdev->features & NETIF_F_LRO)) goto not_lro; @@ -1579,7 +1585,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, SKB_GSO_TCPV4 : SKB_GSO_TCPV6; skb_shinfo(skb)->gso_size = mss; skb_shinfo(skb)->gso_segs = segCnt; - } else if (segCnt != 0 || skb->len > mtu) { + } else if ((segCnt != 0 || skb->len > mtu) && !encap_lro) { u32 hlen; hlen = vmxnet3_get_hdr_len(adapter, skb, @@ -1608,6 +1614,7 @@ not_lro: napi_gro_receive(&rq->napi, skb); ctx->skb = NULL; + encap_lro = false; num_pkts++; } From 1717354d77f82c733ff5e828b897f3b1a7053d6d Mon Sep 17 00:00:00 2001 From: Wang ShaoBo Date: Wed, 9 Nov 2022 17:37:26 +0800 Subject: [PATCH 1260/1477] Bluetooth: 6LoWPAN: add missing hci_dev_put() in get_l2cap_conn() [ Upstream commit 747da1308bdd5021409974f9180f0d8ece53d142 ] hci_get_route() takes reference, we should use hci_dev_put() to release it when not need anymore. Fixes: 6b8d4a6a0314 ("Bluetooth: 6LoWPAN: Use connected oriented channel instead of fixed one") Signed-off-by: Wang ShaoBo Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/6lowpan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index cff4944d5b66..7601ce9143c1 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -1010,6 +1010,7 @@ static int get_l2cap_conn(char *buf, bdaddr_t *addr, u8 *addr_type, hci_dev_lock(hdev); hcon = hci_conn_hash_lookup_le(hdev, addr, *addr_type); hci_dev_unlock(hdev); + hci_dev_put(hdev); if (!hcon) return -ENOENT; From e7b950458156d410509a08c41930b75e72985938 Mon Sep 17 00:00:00 2001 From: Chen Zhongjin Date: Tue, 29 Nov 2022 17:25:56 +0800 Subject: [PATCH 1261/1477] Bluetooth: Fix not cleanup led when bt_init fails [ Upstream commit 2f3957c7eb4e07df944169a3e50a4d6790e1c744 ] bt_init() calls bt_leds_init() to register led, but if it fails later, bt_leds_cleanup() is not called to unregister it. This can cause panic if the argument "bluetooth-power" in text is freed and then another led_trigger_register() tries to access it: BUG: unable to handle page fault for address: ffffffffc06d3bc0 RIP: 0010:strcmp+0xc/0x30 Call Trace: led_trigger_register+0x10d/0x4f0 led_trigger_register_simple+0x7d/0x100 bt_init+0x39/0xf7 [bluetooth] do_one_initcall+0xd0/0x4e0 Fixes: e64c97b53bc6 ("Bluetooth: Add combined LED trigger for controller power") Signed-off-by: Chen Zhongjin Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/af_bluetooth.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 4ef6a54403aa..2f87f57e7a4f 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -736,7 +736,7 @@ static int __init bt_init(void) err = bt_sysfs_init(); if (err < 0) - return err; + goto cleanup_led; err = sock_register(&bt_sock_family_ops); if (err) @@ -772,6 +772,8 @@ unregister_socket: sock_unregister(PF_BLUETOOTH); cleanup_sysfs: bt_sysfs_cleanup(); +cleanup_led: + bt_leds_cleanup(); return err; } From bccda3ad0748958473eb730e29c0487007a98832 Mon Sep 17 00:00:00 2001 From: Artem Chernyshev Date: Thu, 1 Dec 2022 17:00:30 +0300 Subject: [PATCH 1262/1477] net: dsa: ksz: Check return value [ Upstream commit 3d8fdcbf1f42e2bb9ae8b8c0b6f202278c788a22 ] Return NULL if we got unexpected value from skb_trim_rcsum() in ksz_common_rcv() Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: bafe9ba7d908 ("net: dsa: ksz: Factor out common tag code") Signed-off-by: Artem Chernyshev Reviewed-by: Vladimir Oltean Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20221201140032.26746-1-artem.chernyshev@red-soft.ru Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/dsa/tag_ksz.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c index 4820dbcedfa2..230ddf45dff0 100644 --- a/net/dsa/tag_ksz.c +++ b/net/dsa/tag_ksz.c @@ -22,7 +22,8 @@ static struct sk_buff *ksz_common_rcv(struct sk_buff *skb, if (!skb->dev) return NULL; - pskb_trim_rcsum(skb, skb->len - len); + if (pskb_trim_rcsum(skb, skb->len - len)) + return NULL; skb->offload_fwd_mark = true; From 4c693330cec2ef89cb5f58d81e10d5d08f277526 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Thu, 1 Dec 2022 16:22:46 +0800 Subject: [PATCH 1263/1477] selftests: rtnetlink: correct xfrm policy rule in kci_test_ipsec_offload [ Upstream commit 85a0506c073332a3057f5a9635fa0d4db5a8e03b ] When testing in kci_test_ipsec_offload, srcip is configured as $dstip, it should add xfrm policy rule in instead of out. The test result of this patch is as follows: PASS: ipsec_offload Fixes: 2766a11161cc ("selftests: rtnetlink: add ipsec offload API test") Signed-off-by: Zhengchao Shao Acked-by: Hangbin Liu Link: https://lore.kernel.org/r/20221201082246.14131-1-shaozhengchao@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- tools/testing/selftests/net/rtnetlink.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index c9ce3dfa42ee..c3a905923ef2 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -782,7 +782,7 @@ kci_test_ipsec_offload() tmpl proto esp src $srcip dst $dstip spi 9 \ mode transport reqid 42 check_err $? - ip x p add dir out src $dstip/24 dst $srcip/24 \ + ip x p add dir in src $dstip/24 dst $srcip/24 \ tmpl proto esp src $dstip dst $srcip spi 9 \ mode transport reqid 42 check_err $? From 42c319635c0cf7eb36eccac6cda76532f47b61a3 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 30 Nov 2022 09:17:05 +0000 Subject: [PATCH 1264/1477] mac802154: fix missing INIT_LIST_HEAD in ieee802154_if_add() [ Upstream commit b3d72d3135d2ef68296c1ee174436efd65386f04 ] Kernel fault injection test reports null-ptr-deref as follows: BUG: kernel NULL pointer dereference, address: 0000000000000008 RIP: 0010:cfg802154_netdev_notifier_call+0x120/0x310 include/linux/list.h:114 Call Trace: raw_notifier_call_chain+0x6d/0xa0 kernel/notifier.c:87 call_netdevice_notifiers_info+0x6e/0xc0 net/core/dev.c:1944 unregister_netdevice_many_notify+0x60d/0xcb0 net/core/dev.c:1982 unregister_netdevice_queue+0x154/0x1a0 net/core/dev.c:10879 register_netdevice+0x9a8/0xb90 net/core/dev.c:10083 ieee802154_if_add+0x6ed/0x7e0 net/mac802154/iface.c:659 ieee802154_register_hw+0x29c/0x330 net/mac802154/main.c:229 mcr20a_probe+0xaaa/0xcb1 drivers/net/ieee802154/mcr20a.c:1316 ieee802154_if_add() allocates wpan_dev as netdev's private data, but not init the list in struct wpan_dev. cfg802154_netdev_notifier_call() manage the list when device register/unregister, and may lead to null-ptr-deref. Use INIT_LIST_HEAD() on it to initialize it correctly. Fixes: fcf39e6e88e9 ("ieee802154: add wpan_dev_list") Signed-off-by: Wei Yongjun Acked-by: Alexander Aring Link: https://lore.kernel.org/r/20221130091705.1831140-1-weiyongjun@huaweicloud.com Signed-off-by: Stefan Schmidt Signed-off-by: Sasha Levin --- net/mac802154/iface.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c index 1cf5ac09edcb..a08240fe68a7 100644 --- a/net/mac802154/iface.c +++ b/net/mac802154/iface.c @@ -661,6 +661,7 @@ ieee802154_if_add(struct ieee802154_local *local, const char *name, sdata->dev = ndev; sdata->wpan_dev.wpan_phy = local->hw.phy; sdata->local = local; + INIT_LIST_HEAD(&sdata->wpan_dev.list); /* setup type-dependent data */ ret = ieee802154_setup_sdata(sdata, type); From 51ba1820e736f3c9bc32807179ea8d8fa08964c9 Mon Sep 17 00:00:00 2001 From: Valentina Goncharenko Date: Thu, 1 Dec 2022 20:34:07 +0300 Subject: [PATCH 1265/1477] net: encx24j600: Add parentheses to fix precedence [ Upstream commit 167b3f2dcc62c271f3555b33df17e361bb1fa0ee ] In functions regmap_encx24j600_phy_reg_read() and regmap_encx24j600_phy_reg_write() in the conditions of the waiting cycles for filling the variable 'ret' it is necessary to add parentheses to prevent wrong assignment due to logical operations precedence. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: d70e53262f5c ("net: Microchip encx24j600 driver") Signed-off-by: Valentina Goncharenko Reviewed-by: Pavan Chebbi Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/microchip/encx24j600-regmap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/microchip/encx24j600-regmap.c b/drivers/net/ethernet/microchip/encx24j600-regmap.c index 81a8ccca7e5e..2e337c7a5773 100644 --- a/drivers/net/ethernet/microchip/encx24j600-regmap.c +++ b/drivers/net/ethernet/microchip/encx24j600-regmap.c @@ -359,7 +359,7 @@ static int regmap_encx24j600_phy_reg_read(void *context, unsigned int reg, goto err_out; usleep_range(26, 100); - while ((ret = regmap_read(ctx->regmap, MISTAT, &mistat) != 0) && + while (((ret = regmap_read(ctx->regmap, MISTAT, &mistat)) != 0) && (mistat & BUSY)) cpu_relax(); @@ -397,7 +397,7 @@ static int regmap_encx24j600_phy_reg_write(void *context, unsigned int reg, goto err_out; usleep_range(26, 100); - while ((ret = regmap_read(ctx->regmap, MISTAT, &mistat) != 0) && + while (((ret = regmap_read(ctx->regmap, MISTAT, &mistat)) != 0) && (mistat & BUSY)) cpu_relax(); From 3d3b30718ae3d651a786eba4ea1ba52dc2941397 Mon Sep 17 00:00:00 2001 From: Valentina Goncharenko Date: Thu, 1 Dec 2022 20:34:08 +0300 Subject: [PATCH 1266/1477] net: encx24j600: Fix invalid logic in reading of MISTAT register [ Upstream commit 25f427ac7b8d89b0259f86c0c6407b329df742b2 ] A loop for reading MISTAT register continues while regmap_read() fails and (mistat & BUSY), but if regmap_read() fails a value of mistat is undefined. The patch proposes to check for BUSY flag only when regmap_read() succeed. Compile test only. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: d70e53262f5c ("net: Microchip encx24j600 driver") Signed-off-by: Valentina Goncharenko Reviewed-by: Pavan Chebbi Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/microchip/encx24j600-regmap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/microchip/encx24j600-regmap.c b/drivers/net/ethernet/microchip/encx24j600-regmap.c index 2e337c7a5773..5693784eec5b 100644 --- a/drivers/net/ethernet/microchip/encx24j600-regmap.c +++ b/drivers/net/ethernet/microchip/encx24j600-regmap.c @@ -359,7 +359,7 @@ static int regmap_encx24j600_phy_reg_read(void *context, unsigned int reg, goto err_out; usleep_range(26, 100); - while (((ret = regmap_read(ctx->regmap, MISTAT, &mistat)) != 0) && + while (((ret = regmap_read(ctx->regmap, MISTAT, &mistat)) == 0) && (mistat & BUSY)) cpu_relax(); @@ -397,7 +397,7 @@ static int regmap_encx24j600_phy_reg_write(void *context, unsigned int reg, goto err_out; usleep_range(26, 100); - while (((ret = regmap_read(ctx->regmap, MISTAT, &mistat)) != 0) && + while (((ret = regmap_read(ctx->regmap, MISTAT, &mistat)) == 0) && (mistat & BUSY)) cpu_relax(); From e6860c889f4ad50b6ab696f5ea154295d72cf27a Mon Sep 17 00:00:00 2001 From: Lin Liu Date: Fri, 2 Dec 2022 08:52:48 +0000 Subject: [PATCH 1267/1477] xen-netfront: Fix NULL sring after live migration [ Upstream commit d50b7914fae04d840ce36491d22133070b18cca9 ] A NAPI is setup for each network sring to poll data to kernel The sring with source host is destroyed before live migration and new sring with target host is setup after live migration. The NAPI for the old sring is not deleted until setup new sring with target host after migration. With busy_poll/busy_read enabled, the NAPI can be polled before got deleted when resume VM. BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 IP: xennet_poll+0xae/0xd20 PGD 0 P4D 0 Oops: 0000 [#1] SMP PTI Call Trace: finish_task_switch+0x71/0x230 timerqueue_del+0x1d/0x40 hrtimer_try_to_cancel+0xb5/0x110 xennet_alloc_rx_buffers+0x2a0/0x2a0 napi_busy_loop+0xdb/0x270 sock_poll+0x87/0x90 do_sys_poll+0x26f/0x580 tracing_map_insert+0x1d4/0x2f0 event_hist_trigger+0x14a/0x260 finish_task_switch+0x71/0x230 __schedule+0x256/0x890 recalc_sigpending+0x1b/0x50 xen_sched_clock+0x15/0x20 __rb_reserve_next+0x12d/0x140 ring_buffer_lock_reserve+0x123/0x3d0 event_triggers_call+0x87/0xb0 trace_event_buffer_commit+0x1c4/0x210 xen_clocksource_get_cycles+0x15/0x20 ktime_get_ts64+0x51/0xf0 SyS_ppoll+0x160/0x1a0 SyS_ppoll+0x160/0x1a0 do_syscall_64+0x73/0x130 entry_SYSCALL_64_after_hwframe+0x41/0xa6 ... RIP: xennet_poll+0xae/0xd20 RSP: ffffb4f041933900 CR2: 0000000000000008 ---[ end trace f8601785b354351c ]--- xen frontend should remove the NAPIs for the old srings before live migration as the bond srings are destroyed There is a tiny window between the srings are set to NULL and the NAPIs are disabled, It is safe as the NAPI threads are still frozen at that time Signed-off-by: Lin Liu Fixes: 4ec2411980d0 ([NET]: Do not check netif_running() and carrier state in ->poll()) Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/xen-netfront.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 569f3c8e7b75..3d149890fa36 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1868,6 +1868,12 @@ static int netfront_resume(struct xenbus_device *dev) netif_tx_unlock_bh(info->netdev); xennet_disconnect_backend(info); + + rtnl_lock(); + if (info->queues) + xennet_destroy_queues(info); + rtnl_unlock(); + return 0; } From 146ebee8fcdb349d7ec0e49915e6cdafb92544ae Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 2 Dec 2022 12:58:26 +0300 Subject: [PATCH 1268/1477] net: mvneta: Prevent out of bounds read in mvneta_config_rss() [ Upstream commit e8b4fc13900b8e8be48debffd0dfd391772501f7 ] The pp->indir[0] value comes from the user. It is passed to: if (cpu_online(pp->rxq_def)) inside the mvneta_percpu_elect() function. It needs bounds checkeding to ensure that it is not beyond the end of the cpu bitmap. Fixes: cad5d847a093 ("net: mvneta: Fix the CPU choice in mvneta_percpu_elect") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/mvneta.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 74e266c0b8e1..6bfa0ac27be3 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -4767,6 +4767,9 @@ static int mvneta_config_rss(struct mvneta_port *pp) napi_disable(&pp->napi); } + if (pp->indir[0] >= nr_cpu_ids) + return -EINVAL; + pp->rxq_def = pp->indir[0]; /* Update unicast mapping */ From 215f3ac53b18515aea136aa6934ff98c271ed322 Mon Sep 17 00:00:00 2001 From: Michal Jaron Date: Mon, 24 Oct 2022 10:19:42 +0200 Subject: [PATCH 1269/1477] i40e: Fix not setting default xps_cpus after reset [ Upstream commit 82e0572b23029b380464fa9fdc125db9c1506d0a ] During tx rings configuration default XPS queue config is set and __I40E_TX_XPS_INIT_DONE is locked. __I40E_TX_XPS_INIT_DONE state is cleared and set again with default mapping only during queues build, it means after first setup or reset with queues rebuild. (i.e. ethtool -L combined ) After other resets (i.e. ethtool -t ) XPS_INIT_DONE is not cleared and those default maps cannot be set again. It results in cleared xps_cpus mapping until queues are not rebuild or mapping is not set by user. Add clearing __I40E_TX_XPS_INIT_DONE state during reset to let the driver set xps_cpus to defaults again after it was cleared. Fixes: 6f853d4f8e93 ("i40e: allow XPS with QoS enabled") Signed-off-by: Michal Jaron Signed-off-by: Kamil Maziarz Tested-by: Gurucharan (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/i40e/i40e_main.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index d7ddf9239e51..2c60d2a93330 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -10065,6 +10065,21 @@ static int i40e_rebuild_channels(struct i40e_vsi *vsi) return 0; } +/** + * i40e_clean_xps_state - clean xps state for every tx_ring + * @vsi: ptr to the VSI + **/ +static void i40e_clean_xps_state(struct i40e_vsi *vsi) +{ + int i; + + if (vsi->tx_rings) + for (i = 0; i < vsi->num_queue_pairs; i++) + if (vsi->tx_rings[i]) + clear_bit(__I40E_TX_XPS_INIT_DONE, + vsi->tx_rings[i]->state); +} + /** * i40e_prep_for_reset - prep for the core to reset * @pf: board private structure @@ -10096,8 +10111,10 @@ static void i40e_prep_for_reset(struct i40e_pf *pf, bool lock_acquired) rtnl_unlock(); for (v = 0; v < pf->num_alloc_vsi; v++) { - if (pf->vsi[v]) + if (pf->vsi[v]) { + i40e_clean_xps_state(pf->vsi[v]); pf->vsi[v]->seid = 0; + } } i40e_shutdown_adminq(&pf->hw); From 8329b65e34ef01dfdbd0ba28fd55fd46d0241042 Mon Sep 17 00:00:00 2001 From: Sylwester Dziedziuch Date: Mon, 31 Oct 2022 13:00:28 +0100 Subject: [PATCH 1270/1477] i40e: Fix for VF MAC address 0 [ Upstream commit 08501970472077ed5de346ad89943a37d1692e9b ] After spawning max VFs on a PF, some VFs were not getting resources and their MAC addresses were 0. This was caused by PF sleeping before flushing HW registers which caused VIRTCHNL_VFR_VFACTIVE to not be set in time for VF. Fix by adding a sleep after hw flush. Fixes: e4b433f4a741 ("i40e: reset all VFs in parallel when rebuilding PF") Signed-off-by: Sylwester Dziedziuch Signed-off-by: Jan Sokolowski Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 381b28a08746..bb2a79b70c3a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -1525,6 +1525,7 @@ bool i40e_reset_vf(struct i40e_vf *vf, bool flr) i40e_cleanup_reset_vf(vf); i40e_flush(hw); + usleep_range(20000, 40000); clear_bit(I40E_VF_STATE_RESETTING, &vf->vf_states); return true; @@ -1648,6 +1649,7 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) } i40e_flush(hw); + usleep_range(20000, 40000); clear_bit(__I40E_VF_DISABLE, pf->state); return true; From a2506b19d7a33c0dc2686368876f1d563430421f Mon Sep 17 00:00:00 2001 From: Przemyslaw Patynowski Date: Tue, 15 Nov 2022 09:49:25 +0100 Subject: [PATCH 1271/1477] i40e: Disallow ip4 and ip6 l4_4_bytes [ Upstream commit d64aaf3f7869f915fd120763d75f11d6b116424d ] Return -EOPNOTSUPP, when user requests l4_4_bytes for raw IP4 or IP6 flow director filters. Flow director does not support filtering on l4 bytes for PCTYPEs used by IP4 and IP6 filters. Without this patch, user could create filters with l4_4_bytes fields, which did not do any filtering on L4, but only on L3 fields. Fixes: 36777d9fa24c ("i40e: check current configured input set when adding ntuple filters") Signed-off-by: Przemyslaw Patynowski Signed-off-by: Kamil Maziarz Reviewed-by: Jacob Keller Tested-by: Gurucharan G (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 144c4824b5e8..520929f4d535 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -4234,11 +4234,7 @@ static int i40e_check_fdir_input_set(struct i40e_vsi *vsi, return -EOPNOTSUPP; /* First 4 bytes of L4 header */ - if (usr_ip4_spec->l4_4_bytes == htonl(0xFFFFFFFF)) - new_mask |= I40E_L4_SRC_MASK | I40E_L4_DST_MASK; - else if (!usr_ip4_spec->l4_4_bytes) - new_mask &= ~(I40E_L4_SRC_MASK | I40E_L4_DST_MASK); - else + if (usr_ip4_spec->l4_4_bytes) return -EOPNOTSUPP; /* Filtering on Type of Service is not supported. */ From 27eb2d7a1b9987b6d0429b7716b1ff3b82c4ffc9 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 2 Dec 2022 13:44:14 -0800 Subject: [PATCH 1272/1477] NFC: nci: Bounds check struct nfc_target arrays [ Upstream commit e329e71013c9b5a4535b099208493c7826ee4a64 ] While running under CONFIG_FORTIFY_SOURCE=y, syzkaller reported: memcpy: detected field-spanning write (size 129) of single field "target->sensf_res" at net/nfc/nci/ntf.c:260 (size 18) This appears to be a legitimate lack of bounds checking in nci_add_new_protocol(). Add the missing checks. Reported-by: syzbot+210e196cef4711b65139@syzkaller.appspotmail.com Link: https://lore.kernel.org/lkml/0000000000001c590f05ee7b3ff4@google.com Fixes: 019c4fbaa790 ("NFC: Add NCI multiple targets support") Signed-off-by: Kees Cook Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221202214410.never.693-kees@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/nfc/nci/ntf.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c index 33e1170817f0..f8b20cddd5c9 100644 --- a/net/nfc/nci/ntf.c +++ b/net/nfc/nci/ntf.c @@ -218,6 +218,8 @@ static int nci_add_new_protocol(struct nci_dev *ndev, target->sens_res = nfca_poll->sens_res; target->sel_res = nfca_poll->sel_res; target->nfcid1_len = nfca_poll->nfcid1_len; + if (target->nfcid1_len > ARRAY_SIZE(target->nfcid1)) + return -EPROTO; if (target->nfcid1_len > 0) { memcpy(target->nfcid1, nfca_poll->nfcid1, target->nfcid1_len); @@ -226,6 +228,8 @@ static int nci_add_new_protocol(struct nci_dev *ndev, nfcb_poll = (struct rf_tech_specific_params_nfcb_poll *)params; target->sensb_res_len = nfcb_poll->sensb_res_len; + if (target->sensb_res_len > ARRAY_SIZE(target->sensb_res)) + return -EPROTO; if (target->sensb_res_len > 0) { memcpy(target->sensb_res, nfcb_poll->sensb_res, target->sensb_res_len); @@ -234,6 +238,8 @@ static int nci_add_new_protocol(struct nci_dev *ndev, nfcf_poll = (struct rf_tech_specific_params_nfcf_poll *)params; target->sensf_res_len = nfcf_poll->sensf_res_len; + if (target->sensf_res_len > ARRAY_SIZE(target->sensf_res)) + return -EPROTO; if (target->sensf_res_len > 0) { memcpy(target->sensf_res, nfcf_poll->sensf_res, target->sensf_res_len); From ca26f45083d6e90c5ad7867b23a413347d3aeda5 Mon Sep 17 00:00:00 2001 From: Pankaj Raghav Date: Thu, 1 Dec 2022 13:52:34 +0100 Subject: [PATCH 1273/1477] nvme initialize core quirks before calling nvme_init_subsystem [ Upstream commit 6f2d71524bcfdeb1fcbd22a4a92a5b7b161ab224 ] A device might have a core quirk for NVME_QUIRK_IGNORE_DEV_SUBNQN (such as Samsung X5) but it would still give a: "missing or invalid SUBNQN field" warning as core quirks are filled after calling nvme_init_subnqn. Fill ctrl->quirks from struct core_quirks before calling nvme_init_subsystem to fix this. Tested on a Samsung X5. Fixes: ab9e00cc72fa ("nvme: track subsystems") Signed-off-by: Pankaj Raghav Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/host/core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index d9c78fe85cb3..e162f1dfbafe 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3092,10 +3092,6 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) if (!ctrl->identified) { int i; - ret = nvme_init_subsystem(ctrl, id); - if (ret) - goto out_free; - /* * Check for quirks. Quirk can depend on firmware version, * so, in principle, the set of quirks present can change @@ -3108,6 +3104,10 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) if (quirk_matches(id, &core_quirks[i])) ctrl->quirks |= core_quirks[i].quirks; } + + ret = nvme_init_subsystem(ctrl, id); + if (ret) + goto out_free; } memcpy(ctrl->subsys->firmware_rev, id->fr, sizeof(ctrl->subsys->firmware_rev)); From 696e34d54ca125e8d97e74401a3323a848b468b5 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Sat, 3 Dec 2022 00:17:39 +0800 Subject: [PATCH 1274/1477] net: stmmac: fix "snps,axi-config" node property parsing [ Upstream commit 61d4f140943c47c1386ed89f7260e00418dfad9d ] In dt-binding snps,dwmac.yaml, some properties under "snps,axi-config" node are named without "axi_" prefix, but the driver expects the prefix. Since the dt-binding has been there for a long time, we'd better make driver match the binding for compatibility. Fixes: afea03656add ("stmmac: rework DMA bus setting and introduce new platform AXI structure") Signed-off-by: Jisheng Zhang Link: https://lore.kernel.org/r/20221202161739.2203-1-jszhang@kernel.org Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index f70d8d1ce329..1ed74cfb61fc 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -108,10 +108,10 @@ static struct stmmac_axi *stmmac_axi_setup(struct platform_device *pdev) axi->axi_lpi_en = of_property_read_bool(np, "snps,lpi_en"); axi->axi_xit_frm = of_property_read_bool(np, "snps,xit_frm"); - axi->axi_kbbe = of_property_read_bool(np, "snps,axi_kbbe"); - axi->axi_fb = of_property_read_bool(np, "snps,axi_fb"); - axi->axi_mb = of_property_read_bool(np, "snps,axi_mb"); - axi->axi_rb = of_property_read_bool(np, "snps,axi_rb"); + axi->axi_kbbe = of_property_read_bool(np, "snps,kbbe"); + axi->axi_fb = of_property_read_bool(np, "snps,fb"); + axi->axi_mb = of_property_read_bool(np, "snps,mb"); + axi->axi_rb = of_property_read_bool(np, "snps,rb"); if (of_property_read_u32(np, "snps,wr_osr_lmt", &axi->axi_wr_osr_lmt)) axi->axi_wr_osr_lmt = 1; From a5cfbc199536d853abd45a9105fe097202c4f0ca Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Sat, 3 Dec 2022 11:28:58 +0800 Subject: [PATCH 1275/1477] ip_gre: do not report erspan version on GRE interface [ Upstream commit ee496694b9eea651ae1aa4c4667d886cdf74aa3b ] Although the type I ERSPAN is based on the barebones IP + GRE encapsulation and no extra ERSPAN header. Report erspan version on GRE interface looks unreasonable. Fix this by separating the erspan and gre fill info. IPv6 GRE does not have this info as IPv6 only supports erspan version 1 and 2. Reported-by: Jianlin Shi Fixes: f989d546a2d5 ("erspan: Add type I version 0 support.") Signed-off-by: Hangbin Liu Acked-by: William Tu Link: https://lore.kernel.org/r/20221203032858.3130339-1-liuhangbin@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/ipv4/ip_gre.c | 48 ++++++++++++++++++++++++++++------------------- 1 file changed, 29 insertions(+), 19 deletions(-) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 6ab5c50aa7a8..65ead8a74933 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1493,24 +1493,6 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) struct ip_tunnel_parm *p = &t->parms; __be16 o_flags = p->o_flags; - if (t->erspan_ver <= 2) { - if (t->erspan_ver != 0 && !t->collect_md) - o_flags |= TUNNEL_KEY; - - if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver)) - goto nla_put_failure; - - if (t->erspan_ver == 1) { - if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index)) - goto nla_put_failure; - } else if (t->erspan_ver == 2) { - if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir)) - goto nla_put_failure; - if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid)) - goto nla_put_failure; - } - } - if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || nla_put_be16(skb, IFLA_GRE_IFLAGS, gre_tnl_flags_to_gre_flags(p->i_flags)) || @@ -1551,6 +1533,34 @@ nla_put_failure: return -EMSGSIZE; } +static int erspan_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct ip_tunnel *t = netdev_priv(dev); + + if (t->erspan_ver <= 2) { + if (t->erspan_ver != 0 && !t->collect_md) + t->parms.o_flags |= TUNNEL_KEY; + + if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver)) + goto nla_put_failure; + + if (t->erspan_ver == 1) { + if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index)) + goto nla_put_failure; + } else if (t->erspan_ver == 2) { + if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir)) + goto nla_put_failure; + if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid)) + goto nla_put_failure; + } + } + + return ipgre_fill_info(skb, dev); + +nla_put_failure: + return -EMSGSIZE; +} + static void erspan_setup(struct net_device *dev) { struct ip_tunnel *t = netdev_priv(dev); @@ -1629,7 +1639,7 @@ static struct rtnl_link_ops erspan_link_ops __read_mostly = { .changelink = erspan_changelink, .dellink = ip_tunnel_dellink, .get_size = ipgre_get_size, - .fill_info = ipgre_fill_info, + .fill_info = erspan_fill_info, .get_link_net = ip_tunnel_get_link_net, }; From 8d1aed7a117a114707dd04a63fddc7fdcce18132 Mon Sep 17 00:00:00 2001 From: Yongqiang Liu Date: Sat, 3 Dec 2022 09:41:25 +0000 Subject: [PATCH 1276/1477] net: thunderx: Fix missing destroy_workqueue of nicvf_rx_mode_wq [ Upstream commit 42330a32933fb42180c52022804dcf09f47a2f99 ] The nicvf_probe() won't destroy workqueue when register_netdev() failed. Add destroy_workqueue err handle case to fix this issue. Fixes: 2ecbe4f4a027 ("net: thunderx: replace global nicvf_rx_mode_wq work queue for all VFs to private for each of them.") Signed-off-by: Yongqiang Liu Reviewed-by: Pavan Chebbi Link: https://lore.kernel.org/r/20221203094125.602812-1-liuyongqiang13@huawei.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/cavium/thunder/nicvf_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index c00f1a7ffc15..488da767cfdf 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -2258,7 +2258,7 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err = register_netdev(netdev); if (err) { dev_err(dev, "Failed to register netdevice\n"); - goto err_unregister_interrupts; + goto err_destroy_workqueue; } nic->msg_enable = debug; @@ -2267,6 +2267,8 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; +err_destroy_workqueue: + destroy_workqueue(nic->nicvf_rx_mode_wq); err_unregister_interrupts: nicvf_unregister_interrupts(nic); err_free_netdev: From 296a50aa8b2982117520713edc1375777a9f8506 Mon Sep 17 00:00:00 2001 From: Liu Jian Date: Sat, 3 Dec 2022 17:42:39 +0800 Subject: [PATCH 1277/1477] net: hisilicon: Fix potential use-after-free in hisi_femac_rx() [ Upstream commit 4640177049549de1a43e9bc49265f0cdfce08cfd ] The skb is delivered to napi_gro_receive() which may free it, after calling this, dereferencing skb may trigger use-after-free. Fixes: 542ae60af24f ("net: hisilicon: Add Fast Ethernet MAC driver") Signed-off-by: Liu Jian Link: https://lore.kernel.org/r/20221203094240.1240211-1-liujian56@huawei.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hisi_femac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hisi_femac.c b/drivers/net/ethernet/hisilicon/hisi_femac.c index 57c3bc4f7089..c16dfd869363 100644 --- a/drivers/net/ethernet/hisilicon/hisi_femac.c +++ b/drivers/net/ethernet/hisilicon/hisi_femac.c @@ -283,7 +283,7 @@ static int hisi_femac_rx(struct net_device *dev, int limit) skb->protocol = eth_type_trans(skb, dev); napi_gro_receive(&priv->napi, skb); dev->stats.rx_packets++; - dev->stats.rx_bytes += skb->len; + dev->stats.rx_bytes += len; next: pos = (pos + 1) % rxq->num; if (rx_pkts_num >= limit) From 93aaa4bb72e388f6a4887541fd3d18b84f1b5ddc Mon Sep 17 00:00:00 2001 From: Liu Jian Date: Sat, 3 Dec 2022 17:42:40 +0800 Subject: [PATCH 1278/1477] net: hisilicon: Fix potential use-after-free in hix5hd2_rx() [ Upstream commit 433c07a13f59856e4585e89e86b7d4cc59348fab ] The skb is delivered to napi_gro_receive() which may free it, after calling this, dereferencing skb may trigger use-after-free. Fixes: 57c5bc9ad7d7 ("net: hisilicon: add hix5hd2 mac driver") Signed-off-by: Liu Jian Link: https://lore.kernel.org/r/20221203094240.1240211-2-liujian56@huawei.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hix5hd2_gmac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c index 8b2bf85039f1..43f3146caf07 100644 --- a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c +++ b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c @@ -550,7 +550,7 @@ static int hix5hd2_rx(struct net_device *dev, int limit) skb->protocol = eth_type_trans(skb, dev); napi_gro_receive(&priv->napi, skb); dev->stats.rx_packets++; - dev->stats.rx_bytes += skb->len; + dev->stats.rx_bytes += len; next: pos = dma_ring_incr(pos, RX_DESC_NUM); } From 36e248269a16bd872631b76d4f0ec710f84e140d Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Sat, 3 Dec 2022 17:46:35 +0800 Subject: [PATCH 1279/1477] tipc: Fix potential OOB in tipc_link_proto_rcv() [ Upstream commit 743117a997bbd4840e827295c07e59bcd7f7caa3 ] Fix the potential risk of OOB if skb_linearize() fails in tipc_link_proto_rcv(). Fixes: 5cbb28a4bf65 ("tipc: linearize arriving NAME_DISTR and LINK_PROTO buffers") Signed-off-by: YueHaibing Link: https://lore.kernel.org/r/20221203094635.29024-1-yuehaibing@huawei.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/tipc/link.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index 064fdb8e50e1..c1e56d1f21b3 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -2188,7 +2188,9 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, if (tipc_own_addr(l->net) > msg_prevnode(hdr)) l->net_plane = msg_net_plane(hdr); - skb_linearize(skb); + if (skb_linearize(skb)) + goto exit; + hdr = buf_msg(skb); data = msg_data(hdr); From 5211e5ff9ddc67e2cbd5af78e09b8e7d85ca95f2 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sun, 4 Dec 2022 09:50:44 +0200 Subject: [PATCH 1280/1477] ipv4: Fix incorrect route flushing when source address is deleted [ Upstream commit f96a3d74554df537b6db5c99c27c80e7afadc8d1 ] Cited commit added the table ID to the FIB info structure, but did not prevent structures with different table IDs from being consolidated. This can lead to routes being flushed from a VRF when an address is deleted from a different VRF. Fix by taking the table ID into account when looking for a matching FIB info. This is already done for FIB info structures backed by a nexthop object in fib_find_info_nh(). Add test cases that fail before the fix: # ./fib_tests.sh -t ipv4_del_addr IPv4 delete address route tests Regular FIB info TEST: Route removed from VRF when source address deleted [ OK ] TEST: Route in default VRF not removed [ OK ] TEST: Route removed in default VRF when source address deleted [ OK ] TEST: Route in VRF is not removed by address delete [ OK ] Identical FIB info with different table ID TEST: Route removed from VRF when source address deleted [FAIL] TEST: Route in default VRF not removed [ OK ] RTNETLINK answers: File exists TEST: Route removed in default VRF when source address deleted [ OK ] TEST: Route in VRF is not removed by address delete [FAIL] Tests passed: 6 Tests failed: 2 And pass after: # ./fib_tests.sh -t ipv4_del_addr IPv4 delete address route tests Regular FIB info TEST: Route removed from VRF when source address deleted [ OK ] TEST: Route in default VRF not removed [ OK ] TEST: Route removed in default VRF when source address deleted [ OK ] TEST: Route in VRF is not removed by address delete [ OK ] Identical FIB info with different table ID TEST: Route removed from VRF when source address deleted [ OK ] TEST: Route in default VRF not removed [ OK ] TEST: Route removed in default VRF when source address deleted [ OK ] TEST: Route in VRF is not removed by address delete [ OK ] Tests passed: 8 Tests failed: 0 Fixes: 5a56a0b3a45d ("net: Don't delete routes in different VRFs") Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv4/fib_semantics.c | 1 + tools/testing/selftests/net/fib_tests.sh | 27 ++++++++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 52ec0c43e6b8..ab9fcc6231b8 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -423,6 +423,7 @@ static struct fib_info *fib_find_info(struct fib_info *nfi) nfi->fib_prefsrc == fi->fib_prefsrc && nfi->fib_priority == fi->fib_priority && nfi->fib_type == fi->fib_type && + nfi->fib_tb_id == fi->fib_tb_id && memcmp(nfi->fib_metrics, fi->fib_metrics, sizeof(u32) * RTAX_MAX) == 0 && !((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK) && diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index a7f53c2a9580..a7b40dc56cae 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -1622,13 +1622,19 @@ ipv4_del_addr_test() $IP addr add dev dummy1 172.16.104.1/24 $IP addr add dev dummy1 172.16.104.11/24 + $IP addr add dev dummy1 172.16.104.12/24 $IP addr add dev dummy2 172.16.104.1/24 $IP addr add dev dummy2 172.16.104.11/24 + $IP addr add dev dummy2 172.16.104.12/24 $IP route add 172.16.105.0/24 via 172.16.104.2 src 172.16.104.11 + $IP route add 172.16.106.0/24 dev lo src 172.16.104.12 $IP route add vrf red 172.16.105.0/24 via 172.16.104.2 src 172.16.104.11 + $IP route add vrf red 172.16.106.0/24 dev lo src 172.16.104.12 set +e # removing address from device in vrf should only remove route from vrf table + echo " Regular FIB info" + $IP addr del dev dummy2 172.16.104.11/24 $IP ro ls vrf red | grep -q 172.16.105.0/24 log_test $? 1 "Route removed from VRF when source address deleted" @@ -1646,6 +1652,27 @@ ipv4_del_addr_test() $IP ro ls vrf red | grep -q 172.16.105.0/24 log_test $? 0 "Route in VRF is not removed by address delete" + # removing address from device in vrf should only remove route from vrf + # table even when the associated fib info only differs in table ID + echo " Identical FIB info with different table ID" + + $IP addr del dev dummy2 172.16.104.12/24 + $IP ro ls vrf red | grep -q 172.16.106.0/24 + log_test $? 1 "Route removed from VRF when source address deleted" + + $IP ro ls | grep -q 172.16.106.0/24 + log_test $? 0 "Route in default VRF not removed" + + $IP addr add dev dummy2 172.16.104.12/24 + $IP route add vrf red 172.16.106.0/24 dev lo src 172.16.104.12 + + $IP addr del dev dummy1 172.16.104.12/24 + $IP ro ls | grep -q 172.16.106.0/24 + log_test $? 1 "Route removed in default VRF when source address deleted" + + $IP ro ls vrf red | grep -q 172.16.106.0/24 + log_test $? 0 "Route in VRF is not removed by address delete" + $IP li del dummy1 $IP li del dummy2 cleanup From 8e3f9ac00956442ecfbdd4e6c5c731b8ab6f77a0 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sun, 4 Dec 2022 09:50:45 +0200 Subject: [PATCH 1281/1477] ipv4: Fix incorrect route flushing when table ID 0 is used [ Upstream commit c0d999348e01df03e0a7f550351f3907fabbf611 ] Cited commit added the table ID to the FIB info structure, but did not properly initialize it when table ID 0 is used. This can lead to a route in the default VRF with a preferred source address not being flushed when the address is deleted. Consider the following example: # ip address add dev dummy1 192.0.2.1/28 # ip address add dev dummy1 192.0.2.17/28 # ip route add 198.51.100.0/24 via 192.0.2.2 src 192.0.2.17 metric 100 # ip route add table 0 198.51.100.0/24 via 192.0.2.2 src 192.0.2.17 metric 200 # ip route show 198.51.100.0/24 198.51.100.0/24 via 192.0.2.2 dev dummy1 src 192.0.2.17 metric 100 198.51.100.0/24 via 192.0.2.2 dev dummy1 src 192.0.2.17 metric 200 Both routes are installed in the default VRF, but they are using two different FIB info structures. One with a metric of 100 and table ID of 254 (main) and one with a metric of 200 and table ID of 0. Therefore, when the preferred source address is deleted from the default VRF, the second route is not flushed: # ip address del dev dummy1 192.0.2.17/28 # ip route show 198.51.100.0/24 198.51.100.0/24 via 192.0.2.2 dev dummy1 src 192.0.2.17 metric 200 Fix by storing a table ID of 254 instead of 0 in the route configuration structure. Add a test case that fails before the fix: # ./fib_tests.sh -t ipv4_del_addr IPv4 delete address route tests Regular FIB info TEST: Route removed from VRF when source address deleted [ OK ] TEST: Route in default VRF not removed [ OK ] TEST: Route removed in default VRF when source address deleted [ OK ] TEST: Route in VRF is not removed by address delete [ OK ] Identical FIB info with different table ID TEST: Route removed from VRF when source address deleted [ OK ] TEST: Route in default VRF not removed [ OK ] TEST: Route removed in default VRF when source address deleted [ OK ] TEST: Route in VRF is not removed by address delete [ OK ] Table ID 0 TEST: Route removed in default VRF when source address deleted [FAIL] Tests passed: 8 Tests failed: 1 And passes after: # ./fib_tests.sh -t ipv4_del_addr IPv4 delete address route tests Regular FIB info TEST: Route removed from VRF when source address deleted [ OK ] TEST: Route in default VRF not removed [ OK ] TEST: Route removed in default VRF when source address deleted [ OK ] TEST: Route in VRF is not removed by address delete [ OK ] Identical FIB info with different table ID TEST: Route removed from VRF when source address deleted [ OK ] TEST: Route in default VRF not removed [ OK ] TEST: Route removed in default VRF when source address deleted [ OK ] TEST: Route in VRF is not removed by address delete [ OK ] Table ID 0 TEST: Route removed in default VRF when source address deleted [ OK ] Tests passed: 9 Tests failed: 0 Fixes: 5a56a0b3a45d ("net: Don't delete routes in different VRFs") Reported-by: Donald Sharp Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv4/fib_frontend.c | 3 +++ tools/testing/selftests/net/fib_tests.sh | 10 ++++++++++ 2 files changed, 13 insertions(+) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index af8a4255cf1b..5f786ef662ea 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -830,6 +830,9 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, return -EINVAL; } + if (!cfg->fc_table) + cfg->fc_table = RT_TABLE_MAIN; + return 0; errout: return err; diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index a7b40dc56cae..0f3bf90e04d3 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -1623,11 +1623,13 @@ ipv4_del_addr_test() $IP addr add dev dummy1 172.16.104.1/24 $IP addr add dev dummy1 172.16.104.11/24 $IP addr add dev dummy1 172.16.104.12/24 + $IP addr add dev dummy1 172.16.104.13/24 $IP addr add dev dummy2 172.16.104.1/24 $IP addr add dev dummy2 172.16.104.11/24 $IP addr add dev dummy2 172.16.104.12/24 $IP route add 172.16.105.0/24 via 172.16.104.2 src 172.16.104.11 $IP route add 172.16.106.0/24 dev lo src 172.16.104.12 + $IP route add table 0 172.16.107.0/24 via 172.16.104.2 src 172.16.104.13 $IP route add vrf red 172.16.105.0/24 via 172.16.104.2 src 172.16.104.11 $IP route add vrf red 172.16.106.0/24 dev lo src 172.16.104.12 set +e @@ -1673,6 +1675,14 @@ ipv4_del_addr_test() $IP ro ls vrf red | grep -q 172.16.106.0/24 log_test $? 0 "Route in VRF is not removed by address delete" + # removing address from device in default vrf should remove route from + # the default vrf even when route was inserted with a table ID of 0. + echo " Table ID 0" + + $IP addr del dev dummy1 172.16.104.13/24 + $IP ro ls | grep -q 172.16.107.0/24 + log_test $? 1 "Route removed in default VRF when source address deleted" + $IP li del dummy1 $IP li del dummy2 cleanup From 4be43e46c3f945fc7dd9e23c73a7a66927a3b814 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Mon, 5 Dec 2022 09:21:32 +0800 Subject: [PATCH 1282/1477] net: dsa: sja1105: fix memory leak in sja1105_setup_devlink_regions() [ Upstream commit 78a9ea43fc1a7c06a420b132d2d47cbf4344a5df ] When dsa_devlink_region_create failed in sja1105_setup_devlink_regions(), priv->regions is not released. Fixes: bf425b82059e ("net: dsa: sja1105: expose static config as devlink region") Signed-off-by: Zhengchao Shao Reviewed-by: Vladimir Oltean Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20221205012132.2110979-1-shaozhengchao@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/dsa/sja1105/sja1105_devlink.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/dsa/sja1105/sja1105_devlink.c b/drivers/net/dsa/sja1105/sja1105_devlink.c index ec2ac91abcfa..8e3d185c8460 100644 --- a/drivers/net/dsa/sja1105/sja1105_devlink.c +++ b/drivers/net/dsa/sja1105/sja1105_devlink.c @@ -95,6 +95,8 @@ static int sja1105_setup_devlink_regions(struct dsa_switch *ds) if (IS_ERR(region)) { while (--i >= 0) dsa_devlink_region_destroy(priv->regions[i]); + + kfree(priv->regions); return PTR_ERR(region); } From cc668fddde4262f608baca2c9d85b9cf333e41c3 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 3 Dec 2022 18:37:21 -0500 Subject: [PATCH 1283/1477] tipc: call tipc_lxc_xmit without holding node_read_lock [ Upstream commit 88956177db179e4eba7cd590971961857d1565b8 ] When sending packets between nodes in netns, it calls tipc_lxc_xmit() for peer node to receive the packets where tipc_sk_mcast_rcv()/tipc_sk_rcv() might be called, and it's pretty much like in tipc_rcv(). Currently the local 'node rw lock' is held during calling tipc_lxc_xmit() to protect the peer_net not being freed by another thread. However, when receiving these packets, tipc_node_add_conn() might be called where the peer 'node rw lock' is acquired. Then a dead lock warning is triggered by lockdep detector, although it is not a real dead lock: WARNING: possible recursive locking detected -------------------------------------------- conn_server/1086 is trying to acquire lock: ffff8880065cb020 (&n->lock#2){++--}-{2:2}, \ at: tipc_node_add_conn.cold.76+0xaa/0x211 [tipc] but task is already holding lock: ffff8880065cd020 (&n->lock#2){++--}-{2:2}, \ at: tipc_node_xmit+0x285/0xb30 [tipc] other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&n->lock#2); lock(&n->lock#2); *** DEADLOCK *** May be due to missing lock nesting notation 4 locks held by conn_server/1086: #0: ffff8880036d1e40 (sk_lock-AF_TIPC){+.+.}-{0:0}, \ at: tipc_accept+0x9c0/0x10b0 [tipc] #1: ffff8880036d5f80 (sk_lock-AF_TIPC/1){+.+.}-{0:0}, \ at: tipc_accept+0x363/0x10b0 [tipc] #2: ffff8880065cd020 (&n->lock#2){++--}-{2:2}, \ at: tipc_node_xmit+0x285/0xb30 [tipc] #3: ffff888012e13370 (slock-AF_TIPC){+...}-{2:2}, \ at: tipc_sk_rcv+0x2da/0x1b40 [tipc] Call Trace: dump_stack_lvl+0x44/0x5b __lock_acquire.cold.77+0x1f2/0x3d7 lock_acquire+0x1d2/0x610 _raw_write_lock_bh+0x38/0x80 tipc_node_add_conn.cold.76+0xaa/0x211 [tipc] tipc_sk_finish_conn+0x21e/0x640 [tipc] tipc_sk_filter_rcv+0x147b/0x3030 [tipc] tipc_sk_rcv+0xbb4/0x1b40 [tipc] tipc_lxc_xmit+0x225/0x26b [tipc] tipc_node_xmit.cold.82+0x4a/0x102 [tipc] __tipc_sendstream+0x879/0xff0 [tipc] tipc_accept+0x966/0x10b0 [tipc] do_accept+0x37d/0x590 This patch avoids this warning by not holding the 'node rw lock' before calling tipc_lxc_xmit(). As to protect the 'peer_net', rcu_read_lock() should be enough, as in cleanup_net() when freeing the netns, it calls synchronize_rcu() before the free is continued. Also since tipc_lxc_xmit() is like the RX path in tipc_rcv(), it makes sense to call it under rcu_read_lock(). Note that the right lock order must be: rcu_read_lock(); tipc_node_read_lock(n); tipc_node_read_unlock(n); tipc_lxc_xmit(); rcu_read_unlock(); instead of: tipc_node_read_lock(n); rcu_read_lock(); tipc_node_read_unlock(n); tipc_lxc_xmit(); rcu_read_unlock(); and we have to call tipc_node_read_lock/unlock() twice in tipc_node_xmit(). Fixes: f73b12812a3d ("tipc: improve throughput between nodes in netns") Reported-by: Shuang Li Signed-off-by: Xin Long Link: https://lore.kernel.org/r/5bdd1f8fee9db695cfff4528a48c9b9d0523fb00.1670110641.git.lucien.xin@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/tipc/node.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/tipc/node.c b/net/tipc/node.c index 60059827563a..7589f2ac6fd0 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1660,6 +1660,7 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list, struct tipc_node *n; struct sk_buff_head xmitq; bool node_up = false; + struct net *peer_net; int bearer_id; int rc; @@ -1676,18 +1677,23 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list, return -EHOSTUNREACH; } + rcu_read_lock(); tipc_node_read_lock(n); node_up = node_is_up(n); - if (node_up && n->peer_net && check_net(n->peer_net)) { + peer_net = n->peer_net; + tipc_node_read_unlock(n); + if (node_up && peer_net && check_net(peer_net)) { /* xmit inner linux container */ - tipc_lxc_xmit(n->peer_net, list); + tipc_lxc_xmit(peer_net, list); if (likely(skb_queue_empty(list))) { - tipc_node_read_unlock(n); + rcu_read_unlock(); tipc_node_put(n); return 0; } } + rcu_read_unlock(); + tipc_node_read_lock(n); bearer_id = n->active_links[selector & 1]; if (unlikely(bearer_id == INVALID_BEARER_ID)) { tipc_node_read_unlock(n); From 87277bdf2c370ab2d07cfe77dfa9b37f82bbe1e5 Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Sun, 4 Dec 2022 14:09:08 +0800 Subject: [PATCH 1284/1477] ethernet: aeroflex: fix potential skb leak in greth_init_rings() [ Upstream commit 063a932b64db3317ec020c94466fe52923a15f60 ] The greth_init_rings() function won't free the newly allocated skb when dma_mapping_error() returns error, so add dev_kfree_skb() to fix it. Compile tested only. Fixes: d4c41139df6e ("net: Add Aeroflex Gaisler 10/100/1G Ethernet MAC driver") Signed-off-by: Zhang Changzhong Reviewed-by: Leon Romanovsky Link: https://lore.kernel.org/r/1670134149-29516-1-git-send-email-zhangchangzhong@huawei.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/aeroflex/greth.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/aeroflex/greth.c b/drivers/net/ethernet/aeroflex/greth.c index f4f50b3a472e..0d56cb4f5dd9 100644 --- a/drivers/net/ethernet/aeroflex/greth.c +++ b/drivers/net/ethernet/aeroflex/greth.c @@ -258,6 +258,7 @@ static int greth_init_rings(struct greth_private *greth) if (dma_mapping_error(greth->dev, dma_addr)) { if (netif_msg_ifup(greth)) dev_err(greth->dev, "Could not create initial DMA mapping\n"); + dev_kfree_skb(skb); goto cleanup; } greth->rx_skbuff[i] = skb; From a00444e25bbc3ff90314ebc72e9b4952b12211d9 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 7 Dec 2022 08:19:38 +0100 Subject: [PATCH 1285/1477] xen/netback: fix build warning [ Upstream commit 7dfa764e0223a324366a2a1fc056d4d9d4e95491 ] Commit ad7f402ae4f4 ("xen/netback: Ensure protocol headers don't fall in the non-linear area") introduced a (valid) build warning. There have even been reports of this problem breaking networking of Xen guests. Fixes: ad7f402ae4f4 ("xen/netback: Ensure protocol headers don't fall in the non-linear area") Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Reviewed-by: Ross Lagerwall Tested-by: Jason Andryuk Signed-off-by: Juergen Gross Signed-off-by: Sasha Levin --- drivers/net/xen-netback/netback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index fed0f7458e18..f9373a88cf37 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -530,7 +530,7 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue, const bool sharedslot = nr_frags && frag_get_pending_idx(&shinfo->frags[0]) == copy_pending_idx(skb, copy_count(skb) - 1); - int i, err; + int i, err = 0; for (i = 0; i < copy_count(skb); i++) { int newerr; From 3d59adad126d0f22e06506449f530fcc16277e61 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Wed, 7 Dec 2022 09:53:10 +0800 Subject: [PATCH 1286/1477] net: plip: don't call kfree_skb/dev_kfree_skb() under spin_lock_irq() [ Upstream commit 7d8c19bfc8ff3f78e5337107ca9246327fcb6b45 ] It is not allowed to call kfree_skb() or consume_skb() from hardware interrupt context or with interrupts being disabled. So replace kfree_skb/dev_kfree_skb() with dev_kfree_skb_irq() and dev_consume_skb_irq() under spin_lock_irq(). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Yang Yingliang Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20221207015310.2984909-1-yangyingliang@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/plip/plip.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/plip/plip.c b/drivers/net/plip/plip.c index 5a0e5a8a8917..22f7db87ed21 100644 --- a/drivers/net/plip/plip.c +++ b/drivers/net/plip/plip.c @@ -444,12 +444,12 @@ plip_bh_timeout_error(struct net_device *dev, struct net_local *nl, } rcv->state = PLIP_PK_DONE; if (rcv->skb) { - kfree_skb(rcv->skb); + dev_kfree_skb_irq(rcv->skb); rcv->skb = NULL; } snd->state = PLIP_PK_DONE; if (snd->skb) { - dev_kfree_skb(snd->skb); + dev_consume_skb_irq(snd->skb); snd->skb = NULL; } spin_unlock_irq(&nl->lock); From 8208d7e56b1e579320b9ff3712739ad2e63e1f86 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Dec 2022 10:13:51 +0000 Subject: [PATCH 1287/1477] ipv6: avoid use-after-free in ip6_fragment() [ Upstream commit 803e84867de59a1e5d126666d25eb4860cfd2ebe ] Blamed commit claimed rcu_read_lock() was held by ip6_fragment() callers. It seems to not be always true, at least for UDP stack. syzbot reported: BUG: KASAN: use-after-free in ip6_dst_idev include/net/ip6_fib.h:245 [inline] BUG: KASAN: use-after-free in ip6_fragment+0x2724/0x2770 net/ipv6/ip6_output.c:951 Read of size 8 at addr ffff88801d403e80 by task syz-executor.3/7618 CPU: 1 PID: 7618 Comm: syz-executor.3 Not tainted 6.1.0-rc6-syzkaller-00012-g4312098baf37 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/26/2022 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xd1/0x138 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:284 [inline] print_report+0x15e/0x45d mm/kasan/report.c:395 kasan_report+0xbf/0x1f0 mm/kasan/report.c:495 ip6_dst_idev include/net/ip6_fib.h:245 [inline] ip6_fragment+0x2724/0x2770 net/ipv6/ip6_output.c:951 __ip6_finish_output net/ipv6/ip6_output.c:193 [inline] ip6_finish_output+0x9a3/0x1170 net/ipv6/ip6_output.c:206 NF_HOOK_COND include/linux/netfilter.h:291 [inline] ip6_output+0x1f1/0x540 net/ipv6/ip6_output.c:227 dst_output include/net/dst.h:445 [inline] ip6_local_out+0xb3/0x1a0 net/ipv6/output_core.c:161 ip6_send_skb+0xbb/0x340 net/ipv6/ip6_output.c:1966 udp_v6_send_skb+0x82a/0x18a0 net/ipv6/udp.c:1286 udp_v6_push_pending_frames+0x140/0x200 net/ipv6/udp.c:1313 udpv6_sendmsg+0x18da/0x2c80 net/ipv6/udp.c:1606 inet6_sendmsg+0x9d/0xe0 net/ipv6/af_inet6.c:665 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg+0xd3/0x120 net/socket.c:734 sock_write_iter+0x295/0x3d0 net/socket.c:1108 call_write_iter include/linux/fs.h:2191 [inline] new_sync_write fs/read_write.c:491 [inline] vfs_write+0x9ed/0xdd0 fs/read_write.c:584 ksys_write+0x1ec/0x250 fs/read_write.c:637 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7fde3588c0d9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 f1 19 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fde365b6168 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 00007fde359ac050 RCX: 00007fde3588c0d9 RDX: 000000000000ffdc RSI: 00000000200000c0 RDI: 000000000000000a RBP: 00007fde358e7ae9 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 00007fde35acfb1f R14: 00007fde365b6300 R15: 0000000000022000 Allocated by task 7618: kasan_save_stack+0x22/0x40 mm/kasan/common.c:45 kasan_set_track+0x25/0x30 mm/kasan/common.c:52 __kasan_slab_alloc+0x82/0x90 mm/kasan/common.c:325 kasan_slab_alloc include/linux/kasan.h:201 [inline] slab_post_alloc_hook mm/slab.h:737 [inline] slab_alloc_node mm/slub.c:3398 [inline] slab_alloc mm/slub.c:3406 [inline] __kmem_cache_alloc_lru mm/slub.c:3413 [inline] kmem_cache_alloc+0x2b4/0x3d0 mm/slub.c:3422 dst_alloc+0x14a/0x1f0 net/core/dst.c:92 ip6_dst_alloc+0x32/0xa0 net/ipv6/route.c:344 ip6_rt_pcpu_alloc net/ipv6/route.c:1369 [inline] rt6_make_pcpu_route net/ipv6/route.c:1417 [inline] ip6_pol_route+0x901/0x1190 net/ipv6/route.c:2254 pol_lookup_func include/net/ip6_fib.h:582 [inline] fib6_rule_lookup+0x52e/0x6f0 net/ipv6/fib6_rules.c:121 ip6_route_output_flags_noref+0x2e6/0x380 net/ipv6/route.c:2625 ip6_route_output_flags+0x76/0x320 net/ipv6/route.c:2638 ip6_route_output include/net/ip6_route.h:98 [inline] ip6_dst_lookup_tail+0x5ab/0x1620 net/ipv6/ip6_output.c:1092 ip6_dst_lookup_flow+0x90/0x1d0 net/ipv6/ip6_output.c:1222 ip6_sk_dst_lookup_flow+0x553/0x980 net/ipv6/ip6_output.c:1260 udpv6_sendmsg+0x151d/0x2c80 net/ipv6/udp.c:1554 inet6_sendmsg+0x9d/0xe0 net/ipv6/af_inet6.c:665 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg+0xd3/0x120 net/socket.c:734 __sys_sendto+0x23a/0x340 net/socket.c:2117 __do_sys_sendto net/socket.c:2129 [inline] __se_sys_sendto net/socket.c:2125 [inline] __x64_sys_sendto+0xe1/0x1b0 net/socket.c:2125 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Freed by task 7599: kasan_save_stack+0x22/0x40 mm/kasan/common.c:45 kasan_set_track+0x25/0x30 mm/kasan/common.c:52 kasan_save_free_info+0x2e/0x40 mm/kasan/generic.c:511 ____kasan_slab_free mm/kasan/common.c:236 [inline] ____kasan_slab_free+0x160/0x1c0 mm/kasan/common.c:200 kasan_slab_free include/linux/kasan.h:177 [inline] slab_free_hook mm/slub.c:1724 [inline] slab_free_freelist_hook+0x8b/0x1c0 mm/slub.c:1750 slab_free mm/slub.c:3661 [inline] kmem_cache_free+0xee/0x5c0 mm/slub.c:3683 dst_destroy+0x2ea/0x400 net/core/dst.c:127 rcu_do_batch kernel/rcu/tree.c:2250 [inline] rcu_core+0x81f/0x1980 kernel/rcu/tree.c:2510 __do_softirq+0x1fb/0xadc kernel/softirq.c:571 Last potentially related work creation: kasan_save_stack+0x22/0x40 mm/kasan/common.c:45 __kasan_record_aux_stack+0xbc/0xd0 mm/kasan/generic.c:481 call_rcu+0x9d/0x820 kernel/rcu/tree.c:2798 dst_release net/core/dst.c:177 [inline] dst_release+0x7d/0xe0 net/core/dst.c:167 refdst_drop include/net/dst.h:256 [inline] skb_dst_drop include/net/dst.h:268 [inline] skb_release_head_state+0x250/0x2a0 net/core/skbuff.c:838 skb_release_all net/core/skbuff.c:852 [inline] __kfree_skb net/core/skbuff.c:868 [inline] kfree_skb_reason+0x151/0x4b0 net/core/skbuff.c:891 kfree_skb_list_reason+0x4b/0x70 net/core/skbuff.c:901 kfree_skb_list include/linux/skbuff.h:1227 [inline] ip6_fragment+0x2026/0x2770 net/ipv6/ip6_output.c:949 __ip6_finish_output net/ipv6/ip6_output.c:193 [inline] ip6_finish_output+0x9a3/0x1170 net/ipv6/ip6_output.c:206 NF_HOOK_COND include/linux/netfilter.h:291 [inline] ip6_output+0x1f1/0x540 net/ipv6/ip6_output.c:227 dst_output include/net/dst.h:445 [inline] ip6_local_out+0xb3/0x1a0 net/ipv6/output_core.c:161 ip6_send_skb+0xbb/0x340 net/ipv6/ip6_output.c:1966 udp_v6_send_skb+0x82a/0x18a0 net/ipv6/udp.c:1286 udp_v6_push_pending_frames+0x140/0x200 net/ipv6/udp.c:1313 udpv6_sendmsg+0x18da/0x2c80 net/ipv6/udp.c:1606 inet6_sendmsg+0x9d/0xe0 net/ipv6/af_inet6.c:665 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg+0xd3/0x120 net/socket.c:734 sock_write_iter+0x295/0x3d0 net/socket.c:1108 call_write_iter include/linux/fs.h:2191 [inline] new_sync_write fs/read_write.c:491 [inline] vfs_write+0x9ed/0xdd0 fs/read_write.c:584 ksys_write+0x1ec/0x250 fs/read_write.c:637 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Second to last potentially related work creation: kasan_save_stack+0x22/0x40 mm/kasan/common.c:45 __kasan_record_aux_stack+0xbc/0xd0 mm/kasan/generic.c:481 call_rcu+0x9d/0x820 kernel/rcu/tree.c:2798 dst_release net/core/dst.c:177 [inline] dst_release+0x7d/0xe0 net/core/dst.c:167 refdst_drop include/net/dst.h:256 [inline] skb_dst_drop include/net/dst.h:268 [inline] __dev_queue_xmit+0x1b9d/0x3ba0 net/core/dev.c:4211 dev_queue_xmit include/linux/netdevice.h:3008 [inline] neigh_resolve_output net/core/neighbour.c:1552 [inline] neigh_resolve_output+0x51b/0x840 net/core/neighbour.c:1532 neigh_output include/net/neighbour.h:546 [inline] ip6_finish_output2+0x56c/0x1530 net/ipv6/ip6_output.c:134 __ip6_finish_output net/ipv6/ip6_output.c:195 [inline] ip6_finish_output+0x694/0x1170 net/ipv6/ip6_output.c:206 NF_HOOK_COND include/linux/netfilter.h:291 [inline] ip6_output+0x1f1/0x540 net/ipv6/ip6_output.c:227 dst_output include/net/dst.h:445 [inline] NF_HOOK include/linux/netfilter.h:302 [inline] NF_HOOK include/linux/netfilter.h:296 [inline] mld_sendpack+0xa09/0xe70 net/ipv6/mcast.c:1820 mld_send_cr net/ipv6/mcast.c:2121 [inline] mld_ifc_work+0x720/0xdc0 net/ipv6/mcast.c:2653 process_one_work+0x9bf/0x1710 kernel/workqueue.c:2289 worker_thread+0x669/0x1090 kernel/workqueue.c:2436 kthread+0x2e8/0x3a0 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:306 The buggy address belongs to the object at ffff88801d403dc0 which belongs to the cache ip6_dst_cache of size 240 The buggy address is located 192 bytes inside of 240-byte region [ffff88801d403dc0, ffff88801d403eb0) The buggy address belongs to the physical page: page:ffffea00007500c0 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x1d403 memcg:ffff888022f49c81 flags: 0xfff00000000200(slab|node=0|zone=1|lastcpupid=0x7ff) raw: 00fff00000000200 ffffea0001ef6580 dead000000000002 ffff88814addf640 raw: 0000000000000000 00000000800c000c 00000001ffffffff ffff888022f49c81 page dumped because: kasan: bad access detected page_owner tracks the page as allocated page last allocated via order 0, migratetype Unmovable, gfp_mask 0x112a20(GFP_ATOMIC|__GFP_NOWARN|__GFP_NORETRY|__GFP_HARDWALL), pid 3719, tgid 3719 (kworker/0:6), ts 136223432244, free_ts 136222971441 prep_new_page mm/page_alloc.c:2539 [inline] get_page_from_freelist+0x10b5/0x2d50 mm/page_alloc.c:4288 __alloc_pages+0x1cb/0x5b0 mm/page_alloc.c:5555 alloc_pages+0x1aa/0x270 mm/mempolicy.c:2285 alloc_slab_page mm/slub.c:1794 [inline] allocate_slab+0x213/0x300 mm/slub.c:1939 new_slab mm/slub.c:1992 [inline] ___slab_alloc+0xa91/0x1400 mm/slub.c:3180 __slab_alloc.constprop.0+0x56/0xa0 mm/slub.c:3279 slab_alloc_node mm/slub.c:3364 [inline] slab_alloc mm/slub.c:3406 [inline] __kmem_cache_alloc_lru mm/slub.c:3413 [inline] kmem_cache_alloc+0x31a/0x3d0 mm/slub.c:3422 dst_alloc+0x14a/0x1f0 net/core/dst.c:92 ip6_dst_alloc+0x32/0xa0 net/ipv6/route.c:344 icmp6_dst_alloc+0x71/0x680 net/ipv6/route.c:3261 mld_sendpack+0x5de/0xe70 net/ipv6/mcast.c:1809 mld_send_cr net/ipv6/mcast.c:2121 [inline] mld_ifc_work+0x720/0xdc0 net/ipv6/mcast.c:2653 process_one_work+0x9bf/0x1710 kernel/workqueue.c:2289 worker_thread+0x669/0x1090 kernel/workqueue.c:2436 kthread+0x2e8/0x3a0 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:306 page last free stack trace: reset_page_owner include/linux/page_owner.h:24 [inline] free_pages_prepare mm/page_alloc.c:1459 [inline] free_pcp_prepare+0x65c/0xd90 mm/page_alloc.c:1509 free_unref_page_prepare mm/page_alloc.c:3387 [inline] free_unref_page+0x1d/0x4d0 mm/page_alloc.c:3483 __unfreeze_partials+0x17c/0x1a0 mm/slub.c:2586 qlink_free mm/kasan/quarantine.c:168 [inline] qlist_free_all+0x6a/0x170 mm/kasan/quarantine.c:187 kasan_quarantine_reduce+0x184/0x210 mm/kasan/quarantine.c:294 __kasan_slab_alloc+0x66/0x90 mm/kasan/common.c:302 kasan_slab_alloc include/linux/kasan.h:201 [inline] slab_post_alloc_hook mm/slab.h:737 [inline] slab_alloc_node mm/slub.c:3398 [inline] kmem_cache_alloc_node+0x304/0x410 mm/slub.c:3443 __alloc_skb+0x214/0x300 net/core/skbuff.c:497 alloc_skb include/linux/skbuff.h:1267 [inline] netlink_alloc_large_skb net/netlink/af_netlink.c:1191 [inline] netlink_sendmsg+0x9a6/0xe10 net/netlink/af_netlink.c:1896 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg+0xd3/0x120 net/socket.c:734 __sys_sendto+0x23a/0x340 net/socket.c:2117 __do_sys_sendto net/socket.c:2129 [inline] __se_sys_sendto net/socket.c:2125 [inline] __x64_sys_sendto+0xe1/0x1b0 net/socket.c:2125 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Fixes: 1758fd4688eb ("ipv6: remove unnecessary dst_hold() in ip6_fragment()") Reported-by: syzbot+8c0ac31aa9681abb9e2d@syzkaller.appspotmail.com Signed-off-by: Eric Dumazet Cc: Wei Wang Cc: Martin KaFai Lau Link: https://lore.kernel.org/r/20221206101351.2037285-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv6/ip6_output.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index fadad8e83521..e427f5040a08 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -919,6 +919,9 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, if (err < 0) goto fail; + /* We prevent @rt from being freed. */ + rcu_read_lock(); + for (;;) { /* Prepare header of the next frame, * before previous one went down. */ @@ -942,6 +945,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, if (err == 0) { IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGOKS); + rcu_read_unlock(); return 0; } @@ -949,6 +953,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGFAILS); + rcu_read_unlock(); return err; slow_path_clean: From 6b03e41767c7125d36c2fc4b59dd3ccc5da0738e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 7 Dec 2022 10:06:31 +0300 Subject: [PATCH 1288/1477] net: mvneta: Fix an out of bounds check [ Upstream commit cdd97383e19d4afe29adc3376025a15ae3bab3a3 ] In an earlier commit, I added a bounds check to prevent an out of bounds read and a WARN(). On further discussion and consideration that check was probably too aggressive. Instead of returning -EINVAL, a better fix would be to just prevent the out of bounds read but continue the process. Background: The value of "pp->rxq_def" is a number between 0-7 by default, or even higher depending on the value of "rxq_number", which is a module parameter. If the value is more than the number of available CPUs then it will trigger the WARN() in cpu_max_bits_warn(). Fixes: e8b4fc13900b ("net: mvneta: Prevent out of bounds read in mvneta_config_rss()") Signed-off-by: Dan Carpenter Reviewed-by: Leon Romanovsky Link: https://lore.kernel.org/r/Y5A7d1E5ccwHTYPf@kadam Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/mvneta.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 6bfa0ac27be3..f5567d485e91 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -4140,7 +4140,7 @@ static void mvneta_percpu_elect(struct mvneta_port *pp) /* Use the cpu associated to the rxq when it is online, in all * the other cases, use the cpu 0 which can't be offline. */ - if (cpu_online(pp->rxq_def)) + if (pp->rxq_def < nr_cpu_ids && cpu_online(pp->rxq_def)) elected_cpu = pp->rxq_def; max_cpu = num_present_cpus(); @@ -4767,9 +4767,6 @@ static int mvneta_config_rss(struct mvneta_port *pp) napi_disable(&pp->napi); } - if (pp->indir[0] >= nr_cpu_ids) - return -EINVAL; - pp->rxq_def = pp->indir[0]; /* Update unicast mapping */ From cf0e42310648a23188a411f7287dd95599086fce Mon Sep 17 00:00:00 2001 From: Emeel Hakim Date: Wed, 7 Dec 2022 12:16:18 +0200 Subject: [PATCH 1289/1477] macsec: add missing attribute validation for offload [ Upstream commit 38099024e51ee37dee5f0f577ca37175c932e3f7 ] Add missing attribute validation for IFLA_MACSEC_OFFLOAD to the netlink policy. Fixes: 791bb3fcafce ("net: macsec: add support for specifying offload upon link creation") Signed-off-by: Emeel Hakim Reviewed-by: Jiri Pirko Reviewed-by: Sabrina Dubroca Link: https://lore.kernel.org/r/20221207101618.989-1-ehakim@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/macsec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 3e564158c401..eb029456b594 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -3680,6 +3680,7 @@ static const struct nla_policy macsec_rtnl_policy[IFLA_MACSEC_MAX + 1] = { [IFLA_MACSEC_SCB] = { .type = NLA_U8 }, [IFLA_MACSEC_REPLAY_PROTECT] = { .type = NLA_U8 }, [IFLA_MACSEC_VALIDATION] = { .type = NLA_U8 }, + [IFLA_MACSEC_OFFLOAD] = { .type = NLA_U8 }, }; static void macsec_free_netdev(struct net_device *dev) From 4fd6f84e0a0c432d1acc56b7ae76e7676587081d Mon Sep 17 00:00:00 2001 From: Frank Jungclaus Date: Wed, 30 Nov 2022 21:22:42 +0100 Subject: [PATCH 1290/1477] can: esd_usb: Allow REC and TEC to return to zero [ Upstream commit 918ee4911f7a41fb4505dff877c1d7f9f64eb43e ] We don't get any further EVENT from an esd CAN USB device for changes on REC or TEC while those counters converge to 0 (with ecc == 0). So when handling the "Back to Error Active"-event force txerr = rxerr = 0, otherwise the berr-counters might stay on values like 95 forever. Also, to make life easier during the ongoing development a netdev_dbg() has been introduced to allow dumping error events send by an esd CAN USB device. Fixes: 96d8e90382dc ("can: Add driver for esd CAN-USB/2 device") Signed-off-by: Frank Jungclaus Link: https://lore.kernel.org/all/20221130202242.3998219-2-frank.jungclaus@esd.eu Cc: stable@vger.kernel.org Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- drivers/net/can/usb/esd_usb2.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/can/usb/esd_usb2.c b/drivers/net/can/usb/esd_usb2.c index 8847942a8d97..73c5343e609b 100644 --- a/drivers/net/can/usb/esd_usb2.c +++ b/drivers/net/can/usb/esd_usb2.c @@ -227,6 +227,10 @@ static void esd_usb2_rx_event(struct esd_usb2_net_priv *priv, u8 rxerr = msg->msg.rx.data[2]; u8 txerr = msg->msg.rx.data[3]; + netdev_dbg(priv->netdev, + "CAN_ERR_EV_EXT: dlc=%#02x state=%02x ecc=%02x rec=%02x tec=%02x\n", + msg->msg.rx.dlc, state, ecc, rxerr, txerr); + skb = alloc_can_err_skb(priv->netdev, &cf); if (skb == NULL) { stats->rx_dropped++; @@ -253,6 +257,8 @@ static void esd_usb2_rx_event(struct esd_usb2_net_priv *priv, break; default: priv->can.state = CAN_STATE_ERROR_ACTIVE; + txerr = 0; + rxerr = 0; break; } } else { From 931578be69875087a62524da69964d575426d287 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 14 Dec 2022 11:32:05 +0100 Subject: [PATCH 1291/1477] Linux 5.10.159 Link: https://lore.kernel.org/r/20221212130924.863767275@linuxfoundation.org Tested-by: Pavel Machek (CIP) Tested-by: Jon Hunter Tested-by: Florian Fainelli Tested-by: Shuah Khan Tested-by: Guenter Roeck Tested-by: Salvatore Bonaccorso Tested-by: Sudip Mukherjee Link: https://lore.kernel.org/r/20221213150409.357752716@linuxfoundation.org Tested-by: Pavel Machek (CIP) Tested-by: Florian Fainelli Tested-by: Jon Hunter Tested-by: Guenter Roeck Tested-by: Linux Kernel Functional Testing Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f3d1f07b6a6f..bb9fab281555 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 10 -SUBLEVEL = 158 +SUBLEVEL = 159 EXTRAVERSION = NAME = Dare mighty things From d31626cbea6182ab3c0dd616e0afb96e0b73b28d Mon Sep 17 00:00:00 2001 From: Dan Vacura Date: Thu, 8 Dec 2022 15:30:21 -0600 Subject: [PATCH 1292/1477] ANDROID: usb: gadget: uvc: remove duplicate code in unbind The uvc_function_unbind() was calling the same code two times, increasing a timeout that may occur. The duplicate code looks to have come in during the merge of 5.10.117. Remove the duplicate code. Bug: 261895714 Change-Id: I8957048bfad4a9e01baea033de9b628362b2d991 Signed-off-by: Dan Vacura --- drivers/usb/gadget/function/f_uvc.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/drivers/usb/gadget/function/f_uvc.c b/drivers/usb/gadget/function/f_uvc.c index 607ea0c1bfb3..1fc00cce83fb 100644 --- a/drivers/usb/gadget/function/f_uvc.c +++ b/drivers/usb/gadget/function/f_uvc.c @@ -906,18 +906,6 @@ static void uvc_function_unbind(struct usb_configuration *c, uvcg_dbg(f, "done waiting with ret: %ld\n", wait_ret); } - /* If we know we're connected via v4l2, then there should be a cleanup - * of the device from userspace either via UVC_EVENT_DISCONNECT or - * though the video device removal uevent. Allow some time for the - * application to close out before things get deleted. - */ - if (uvc->func_connected) { - uvcg_dbg(f, "waiting for clean disconnect\n"); - wait_ret = wait_event_interruptible_timeout(uvc->func_connected_queue, - uvc->func_connected == false, msecs_to_jiffies(500)); - uvcg_dbg(f, "done waiting with ret: %ld\n", wait_ret); - } - device_remove_file(&uvc->vdev.dev, &dev_attr_function_name); video_unregister_device(&uvc->vdev); v4l2_device_unregister(&uvc->v4l2_dev); From 970862a96c0d157cbad044406e0062674857d1a8 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 20 Oct 2020 21:13:55 -0700 Subject: [PATCH 1293/1477] x86/smpboot: Move rcu_cpu_starting() earlier commit 29368e09392123800e5e2bf0f3eda91f16972e52 upstream. The call to rcu_cpu_starting() in mtrr_ap_init() is not early enough in the CPU-hotplug onlining process, which results in lockdep splats as follows: ============================= WARNING: suspicious RCU usage 5.9.0+ #268 Not tainted ----------------------------- kernel/kprobes.c:300 RCU-list traversed in non-reader section!! other info that might help us debug this: RCU used illegally from offline CPU! rcu_scheduler_active = 1, debug_locks = 1 no locks held by swapper/1/0. stack backtrace: CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.9.0+ #268 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.10.2-1ubuntu1 04/01/2014 Call Trace: dump_stack+0x77/0x97 __is_insn_slot_addr+0x15d/0x170 kernel_text_address+0xba/0xe0 ? get_stack_info+0x22/0xa0 __kernel_text_address+0x9/0x30 show_trace_log_lvl+0x17d/0x380 ? dump_stack+0x77/0x97 dump_stack+0x77/0x97 __lock_acquire+0xdf7/0x1bf0 lock_acquire+0x258/0x3d0 ? vprintk_emit+0x6d/0x2c0 _raw_spin_lock+0x27/0x40 ? vprintk_emit+0x6d/0x2c0 vprintk_emit+0x6d/0x2c0 printk+0x4d/0x69 start_secondary+0x1c/0x100 secondary_startup_64_no_verify+0xb8/0xbb This is avoided by moving the call to rcu_cpu_starting up near the beginning of the start_secondary() function. Note that the raw_smp_processor_id() is required in order to avoid calling into lockdep before RCU has declared the CPU to be watched for readers. Link: https://lore.kernel.org/lkml/160223032121.7002.1269740091547117869.tip-bot2@tip-bot2/ Reported-by: Qian Cai Suggested-by: Peter Zijlstra Signed-off-by: Paul E. McKenney Cc: Joel Fernandes Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/mtrr/mtrr.c | 2 -- arch/x86/kernel/smpboot.c | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.c b/arch/x86/kernel/cpu/mtrr/mtrr.c index 6a80f36b5d59..5f436cb4f7c4 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.c +++ b/arch/x86/kernel/cpu/mtrr/mtrr.c @@ -794,8 +794,6 @@ void mtrr_ap_init(void) if (!use_intel() || mtrr_aps_delayed_init) return; - rcu_cpu_starting(smp_processor_id()); - /* * Ideally we should hold mtrr_mutex here to avoid mtrr entries * changed, but this routine will be called in cpu boot time, diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 8baff500914e..e8e5515fb7e9 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -229,6 +229,7 @@ static void notrace start_secondary(void *unused) #endif cpu_init_exception_handling(); cpu_init(); + rcu_cpu_starting(raw_smp_processor_id()); x86_cpuinit.early_percpu_clock_init(); smp_callin(); From ed9673394979b7a5dff10ba878178054625beda9 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 13 Dec 2022 15:13:40 +0200 Subject: [PATCH 1294/1477] vfs: fix copy_file_range() regression in cross-fs copies commit 868f9f2f8e004bfe0d3935b1976f625b2924893b upstream. [backport comments for pre v5.15: - This commit has a bug fixed by commit 10bc8e4af659 ("vfs: fix copy_file_range() averts filesystem freeze protection") - ksmbd mentions are irrelevant - ksmbd hunks were dropped ] A regression has been reported by Nicolas Boichat, found while using the copy_file_range syscall to copy a tracefs file. Before commit 5dae222a5ff0 ("vfs: allow copy_file_range to copy across devices") the kernel would return -EXDEV to userspace when trying to copy a file across different filesystems. After this commit, the syscall doesn't fail anymore and instead returns zero (zero bytes copied), as this file's content is generated on-the-fly and thus reports a size of zero. Another regression has been reported by He Zhe - the assertion of WARN_ON_ONCE(ret == -EOPNOTSUPP) can be triggered from userspace when copying from a sysfs file whose read operation may return -EOPNOTSUPP. Since we do not have test coverage for copy_file_range() between any two types of filesystems, the best way to avoid these sort of issues in the future is for the kernel to be more picky about filesystems that are allowed to do copy_file_range(). This patch restores some cross-filesystem copy restrictions that existed prior to commit 5dae222a5ff0 ("vfs: allow copy_file_range to copy across devices"), namely, cross-sb copy is not allowed for filesystems that do not implement ->copy_file_range(). Filesystems that do implement ->copy_file_range() have full control of the result - if this method returns an error, the error is returned to the user. Before this change this was only true for fs that did not implement the ->remap_file_range() operation (i.e. nfsv3). Filesystems that do not implement ->copy_file_range() still fall-back to the generic_copy_file_range() implementation when the copy is within the same sb. This helps the kernel can maintain a more consistent story about which filesystems support copy_file_range(). nfsd and ksmbd servers are modified to fall-back to the generic_copy_file_range() implementation in case vfs_copy_file_range() fails with -EOPNOTSUPP or -EXDEV, which preserves behavior of server-side-copy. fall-back to generic_copy_file_range() is not implemented for the smb operation FSCTL_DUPLICATE_EXTENTS_TO_FILE, which is arguably a correct change of behavior. Fixes: 5dae222a5ff0 ("vfs: allow copy_file_range to copy across devices") Link: https://lore.kernel.org/linux-fsdevel/20210212044405.4120619-1-drinkcat@chromium.org/ Link: https://lore.kernel.org/linux-fsdevel/CANMq1KDZuxir2LM5jOTm0xx+BnvW=ZmpsG47CyHFJwnw7zSX6Q@mail.gmail.com/ Link: https://lore.kernel.org/linux-fsdevel/20210126135012.1.If45b7cdc3ff707bc1efa17f5366057d60603c45f@changeid/ Link: https://lore.kernel.org/linux-fsdevel/20210630161320.29006-1-lhenriques@suse.de/ Reported-by: Nicolas Boichat Reported-by: kernel test robot Signed-off-by: Luis Henriques Fixes: 64bf5ff58dff ("vfs: no fallback for ->copy_file_range") Link: https://lore.kernel.org/linux-fsdevel/20f17f64-88cb-4e80-07c1-85cb96c83619@windriver.com/ Reported-by: He Zhe Tested-by: Namjae Jeon Tested-by: Luis Henriques Signed-off-by: Amir Goldstein Signed-off-by: Linus Torvalds Link: https://bugzilla.kernel.org/show_bug.cgi?id=216800 Signed-off-by: Amir Goldstein Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/vfs.c | 8 ++++- fs/read_write.c | 77 ++++++++++++++++++++++++++++--------------------- 2 files changed, 51 insertions(+), 34 deletions(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index a4ae1fcd2ab1..745f0663b5b6 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -570,6 +570,7 @@ out_err: ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst, u64 dst_pos, u64 count) { + ssize_t ret; /* * Limit copy to 4MB to prevent indefinitely blocking an nfsd @@ -580,7 +581,12 @@ ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst, * limit like this and pipeline multiple COPY requests. */ count = min_t(u64, count, 1 << 22); - return vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0); + ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0); + + if (ret == -EOPNOTSUPP || ret == -EXDEV) + ret = generic_copy_file_range(src, src_pos, dst, dst_pos, + count, 0); + return ret; } __be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp, diff --git a/fs/read_write.c b/fs/read_write.c index 75f764b43418..68604b3c3994 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1388,28 +1388,6 @@ ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in, } EXPORT_SYMBOL(generic_copy_file_range); -static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - size_t len, unsigned int flags) -{ - /* - * Although we now allow filesystems to handle cross sb copy, passing - * a file of the wrong filesystem type to filesystem driver can result - * in an attempt to dereference the wrong type of ->private_data, so - * avoid doing that until we really have a good reason. NFS defines - * several different file_system_type structures, but they all end up - * using the same ->copy_file_range() function pointer. - */ - if (file_out->f_op->copy_file_range && - file_out->f_op->copy_file_range == file_in->f_op->copy_file_range) - return file_out->f_op->copy_file_range(file_in, pos_in, - file_out, pos_out, - len, flags); - - return generic_copy_file_range(file_in, pos_in, file_out, pos_out, len, - flags); -} - /* * Performs necessary checks before doing a file copy * @@ -1431,6 +1409,24 @@ static int generic_copy_file_checks(struct file *file_in, loff_t pos_in, if (ret) return ret; + /* + * We allow some filesystems to handle cross sb copy, but passing + * a file of the wrong filesystem type to filesystem driver can result + * in an attempt to dereference the wrong type of ->private_data, so + * avoid doing that until we really have a good reason. + * + * nfs and cifs define several different file_system_type structures + * and several different sets of file_operations, but they all end up + * using the same ->copy_file_range() function pointer. + */ + if (file_out->f_op->copy_file_range) { + if (file_in->f_op->copy_file_range != + file_out->f_op->copy_file_range) + return -EXDEV; + } else if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) { + return -EXDEV; + } + /* Don't touch certain kinds of inodes */ if (IS_IMMUTABLE(inode_out)) return -EPERM; @@ -1496,26 +1492,41 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, file_start_write(file_out); /* - * Try cloning first, this is supported by more file systems, and - * more efficient if both clone and copy are supported (e.g. NFS). + * Cloning is supported by more file systems, so we implement copy on + * same sb using clone, but for filesystems where both clone and copy + * are supported (e.g. nfs,cifs), we only call the copy method. */ + if (file_out->f_op->copy_file_range) { + ret = file_out->f_op->copy_file_range(file_in, pos_in, + file_out, pos_out, + len, flags); + goto done; + } + if (file_in->f_op->remap_file_range && file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) { - loff_t cloned; - - cloned = file_in->f_op->remap_file_range(file_in, pos_in, + ret = file_in->f_op->remap_file_range(file_in, pos_in, file_out, pos_out, min_t(loff_t, MAX_RW_COUNT, len), REMAP_FILE_CAN_SHORTEN); - if (cloned > 0) { - ret = cloned; + if (ret > 0) goto done; - } } - ret = do_copy_file_range(file_in, pos_in, file_out, pos_out, len, - flags); - WARN_ON_ONCE(ret == -EOPNOTSUPP); + /* + * We can get here for same sb copy of filesystems that do not implement + * ->copy_file_range() in case filesystem does not support clone or in + * case filesystem supports clone but rejected the clone request (e.g. + * because it was not block aligned). + * + * In both cases, fall back to kernel copy so we are able to maintain a + * consistent story about which filesystems support copy_file_range() + * and which filesystems do not, that will allow userspace tools to + * make consistent desicions w.r.t using copy_file_range(). + */ + ret = generic_copy_file_range(file_in, pos_in, file_out, pos_out, len, + flags); + done: if (ret > 0) { fsnotify_access(file_in); From 965d93fb39b99348d6c327853afd4708b610e132 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 13 Dec 2022 15:13:41 +0200 Subject: [PATCH 1295/1477] vfs: fix copy_file_range() averts filesystem freeze protection commit 10bc8e4af65946b727728d7479c028742321b60a upstream. [backport comments for pre v5.15: - ksmbd mentions are irrelevant - ksmbd hunks were dropped - sb_write_started() is missing - assert was dropped ] Commit 868f9f2f8e00 ("vfs: fix copy_file_range() regression in cross-fs copies") removed fallback to generic_copy_file_range() for cross-fs cases inside vfs_copy_file_range(). To preserve behavior of nfsd and ksmbd server-side-copy, the fallback to generic_copy_file_range() was added in nfsd and ksmbd code, but that call is missing sb_start_write(), fsnotify hooks and more. Ideally, nfsd and ksmbd would pass a flag to vfs_copy_file_range() that will take care of the fallback, but that code would be subtle and we got vfs_copy_file_range() logic wrong too many times already. Instead, add a flag to explicitly request vfs_copy_file_range() to perform only generic_copy_file_range() and let nfsd and ksmbd use this flag only in the fallback path. This choise keeps the logic changes to minimum in the non-nfsd/ksmbd code paths to reduce the risk of further regressions. Fixes: 868f9f2f8e00 ("vfs: fix copy_file_range() regression in cross-fs copies") Tested-by: Namjae Jeon Tested-by: Luis Henriques Signed-off-by: Amir Goldstein Signed-off-by: Al Viro Signed-off-by: Amir Goldstein Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/vfs.c | 4 ++-- fs/read_write.c | 17 +++++++++++++---- include/linux/fs.h | 8 ++++++++ 3 files changed, 23 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 745f0663b5b6..b09ead06a249 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -584,8 +584,8 @@ ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst, ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0); if (ret == -EOPNOTSUPP || ret == -EXDEV) - ret = generic_copy_file_range(src, src_pos, dst, dst_pos, - count, 0); + ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count, + COPY_FILE_SPLICE); return ret; } diff --git a/fs/read_write.c b/fs/read_write.c index 68604b3c3994..0066acb6b380 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1419,7 +1419,9 @@ static int generic_copy_file_checks(struct file *file_in, loff_t pos_in, * and several different sets of file_operations, but they all end up * using the same ->copy_file_range() function pointer. */ - if (file_out->f_op->copy_file_range) { + if (flags & COPY_FILE_SPLICE) { + /* cross sb splice is allowed */ + } else if (file_out->f_op->copy_file_range) { if (file_in->f_op->copy_file_range != file_out->f_op->copy_file_range) return -EXDEV; @@ -1469,8 +1471,9 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, size_t len, unsigned int flags) { ssize_t ret; + bool splice = flags & COPY_FILE_SPLICE; - if (flags != 0) + if (flags & ~COPY_FILE_SPLICE) return -EINVAL; ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len, @@ -1496,14 +1499,14 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, * same sb using clone, but for filesystems where both clone and copy * are supported (e.g. nfs,cifs), we only call the copy method. */ - if (file_out->f_op->copy_file_range) { + if (!splice && file_out->f_op->copy_file_range) { ret = file_out->f_op->copy_file_range(file_in, pos_in, file_out, pos_out, len, flags); goto done; } - if (file_in->f_op->remap_file_range && + if (!splice && file_in->f_op->remap_file_range && file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) { ret = file_in->f_op->remap_file_range(file_in, pos_in, file_out, pos_out, @@ -1523,6 +1526,8 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, * consistent story about which filesystems support copy_file_range() * and which filesystems do not, that will allow userspace tools to * make consistent desicions w.r.t using copy_file_range(). + * + * We also get here if caller (e.g. nfsd) requested COPY_FILE_SPLICE. */ ret = generic_copy_file_range(file_in, pos_in, file_out, pos_out, len, flags); @@ -1577,6 +1582,10 @@ SYSCALL_DEFINE6(copy_file_range, int, fd_in, loff_t __user *, off_in, pos_out = f_out.file->f_pos; } + ret = -EINVAL; + if (flags != 0) + goto out; + ret = vfs_copy_file_range(f_in.file, pos_in, f_out.file, pos_out, len, flags); if (ret > 0) { diff --git a/include/linux/fs.h b/include/linux/fs.h index df54acdd3554..ebfc0b2b4969 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1817,6 +1817,14 @@ struct dir_context { */ #define REMAP_FILE_ADVISORY (REMAP_FILE_CAN_SHORTEN) +/* + * These flags control the behavior of vfs_copy_file_range(). + * They are not available to the user via syscall. + * + * COPY_FILE_SPLICE: call splice direct instead of fs clone/copy ops + */ +#define COPY_FILE_SPLICE (1 << 0) + struct iov_iter; struct file_operations { From eb6313c12955c58c3d3d40f086c22e44ca1c9a1b Mon Sep 17 00:00:00 2001 From: Jialiang Wang Date: Wed, 10 Aug 2022 15:30:57 +0800 Subject: [PATCH 1296/1477] nfp: fix use-after-free in area_cache_get() commit 02e1a114fdb71e59ee6770294166c30d437bf86a upstream. area_cache_get() is used to distribute cache->area and set cache->id, and if cache->id is not 0 and cache->area->kref refcount is 0, it will release the cache->area by nfp_cpp_area_release(). area_cache_get() set cache->id before cpp->op->area_init() and nfp_cpp_area_acquire(). But if area_init() or nfp_cpp_area_acquire() fails, the cache->id is is already set but the refcount is not increased as expected. At this time, calling the nfp_cpp_area_release() will cause use-after-free. To avoid the use-after-free, set cache->id after area_init() and nfp_cpp_area_acquire() complete successfully. Note: This vulnerability is triggerable by providing emulated device equipped with specified configuration. BUG: KASAN: use-after-free in nfp6000_area_init (drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c:760) Write of size 4 at addr ffff888005b7f4a0 by task swapper/0/1 Call Trace: nfp6000_area_init (drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c:760) area_cache_get.constprop.8 (drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c:884) Allocated by task 1: nfp_cpp_area_alloc_with_name (drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c:303) nfp_cpp_area_cache_add (drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c:802) nfp6000_init (drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c:1230) nfp_cpp_from_operations (drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c:1215) nfp_pci_probe (drivers/net/ethernet/netronome/nfp/nfp_main.c:744) Freed by task 1: kfree (mm/slub.c:4562) area_cache_get.constprop.8 (drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c:873) nfp_cpp_read (drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c:924 drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c:973) nfp_cpp_readl (drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpplib.c:48) Signed-off-by: Jialiang Wang Reviewed-by: Yinjun Zhang Acked-by: Simon Horman Link: https://lore.kernel.org/r/20220810073057.4032-1-wangjialiang0806@163.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c index 6ef48eb3a77d..b163489489e9 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c @@ -874,7 +874,6 @@ area_cache_get(struct nfp_cpp *cpp, u32 id, } /* Adjust the start address to be cache size aligned */ - cache->id = id; cache->addr = addr & ~(u64)(cache->size - 1); /* Re-init to the new ID and address */ @@ -894,6 +893,8 @@ area_cache_get(struct nfp_cpp *cpp, u32 id, return NULL; } + cache->id = id; + exit: /* Adjust offset */ *offset = addr - cache->addr; From 17f386e6b7695afdb10474431dfd754c92feaedd Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 11 Nov 2020 17:22:32 +0100 Subject: [PATCH 1297/1477] fuse: always revalidate if exclusive create commit df8629af293493757beccac2d3168fe5a315636e upstream. Failure to do so may result in EEXIST even if the file only exists in the cache and not in the filesystem. The atomic nature of O_EXCL mandates that the cached state should be ignored and existence verified anew. Reported-by: Ken Schalk Signed-off-by: Miklos Szeredi Signed-off-by: Wu Bo Signed-off-by: Greg Kroah-Hartman --- fs/fuse/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 8e95a75a4559..80a9e50392a0 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -205,7 +205,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) if (inode && fuse_is_bad(inode)) goto invalid; else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) || - (flags & LOOKUP_REVAL)) { + (flags & (LOOKUP_EXCL | LOOKUP_REVAL))) { struct fuse_entry_out outarg; FUSE_ARGS(args); struct fuse_forget_link *forget; From 75454b4bbfc7e6a4dd8338556f36ea9107ddf61a Mon Sep 17 00:00:00 2001 From: Bing-Jhong Billy Jheng Date: Thu, 15 Dec 2022 06:43:56 -0800 Subject: [PATCH 1298/1477] io_uring: add missing item types for splice request Splice is like read/write and should grab current->nsproxy, denoted by IO_WQ_WORK_FILES as it refers to current->files as well Signed-off-by: Bing-Jhong Billy Jheng Reviewed-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- fs/io_uring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index c5c22b067cd8..84758e512a04 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -936,7 +936,7 @@ static const struct io_op_def io_op_defs[] = { .needs_file = 1, .hash_reg_file = 1, .unbound_nonreg_file = 1, - .work_flags = IO_WQ_WORK_BLKCG, + .work_flags = IO_WQ_WORK_BLKCG | IO_WQ_WORK_FILES, }, [IORING_OP_PROVIDE_BUFFERS] = {}, [IORING_OP_REMOVE_BUFFERS] = {}, From a49c1a7307752ed5e371373f4db6a426857d4eed Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Sat, 7 May 2022 20:14:13 +0800 Subject: [PATCH 1299/1477] ASoC: fsl_micfil: explicitly clear software reset bit [ Upstream commit 292709b9cf3ba470af94b62c9bb60284cc581b79 ] SRES is self-cleared bit, but REG_MICFIL_CTRL1 is defined as non volatile register, it still remain in regmap cache after set, then every update of REG_MICFIL_CTRL1, software reset happens. to avoid this, clear it explicitly. Signed-off-by: Shengjiu Wang Link: https://lore.kernel.org/r/1651925654-32060-1-git-send-email-shengjiu.wang@nxp.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/fsl/fsl_micfil.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sound/soc/fsl/fsl_micfil.c b/sound/soc/fsl/fsl_micfil.c index efc5daf53bba..ead4bfa13561 100644 --- a/sound/soc/fsl/fsl_micfil.c +++ b/sound/soc/fsl/fsl_micfil.c @@ -190,6 +190,17 @@ static int fsl_micfil_reset(struct device *dev) return ret; } + /* + * SRES is self-cleared bit, but REG_MICFIL_CTRL1 is defined + * as non-volatile register, so SRES still remain in regmap + * cache after set, that every update of REG_MICFIL_CTRL1, + * software reset happens. so clear it explicitly. + */ + ret = regmap_clear_bits(micfil->regmap, REG_MICFIL_CTRL1, + MICFIL_CTRL1_SRES); + if (ret) + return ret; + return 0; } From 344739dc56f1b3e33e6a3170b89731d450455df6 Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Sat, 7 May 2022 20:14:14 +0800 Subject: [PATCH 1300/1477] ASoC: fsl_micfil: explicitly clear CHnF flags [ Upstream commit b776c4a4618ec1b5219d494c423dc142f23c4e8f ] There may be failure when start 1 channel recording after 8 channels recording. The reason is that the CHnF flags are not cleared successfully by software reset. This issue is triggerred by the change of clearing software reset bit. CHnF flags are write 1 clear bits. Clear them by force write. Signed-off-by: Shengjiu Wang Link: https://lore.kernel.org/r/1651925654-32060-2-git-send-email-shengjiu.wang@nxp.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/fsl/fsl_micfil.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/soc/fsl/fsl_micfil.c b/sound/soc/fsl/fsl_micfil.c index ead4bfa13561..6c794605e33c 100644 --- a/sound/soc/fsl/fsl_micfil.c +++ b/sound/soc/fsl/fsl_micfil.c @@ -201,6 +201,14 @@ static int fsl_micfil_reset(struct device *dev) if (ret) return ret; + /* + * Set SRES should clear CHnF flags, But even add delay here + * the CHnF may not be cleared sometimes, so clear CHnF explicitly. + */ + ret = regmap_write_bits(micfil->regmap, REG_MICFIL_STAT, 0xFF, 0xFF); + if (ret) + return ret; + return 0; } From 50b5f6d4d9d2d69a7498c44fd8b26e13d73d3d98 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 11 May 2022 14:41:37 +0100 Subject: [PATCH 1301/1477] ASoC: ops: Check bounds for second channel in snd_soc_put_volsw_sx() [ Upstream commit 97eea946b93961fffd29448dcda7398d0d51c4b2 ] The bounds checks in snd_soc_put_volsw_sx() are only being applied to the first channel, meaning it is possible to write out of bounds values to the second channel in stereo controls. Add appropriate checks. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220511134137.169575-2-broonie@kernel.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/soc-ops.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c index 5fdd96e77ef3..fe93458d864a 100644 --- a/sound/soc/soc-ops.c +++ b/sound/soc/soc-ops.c @@ -447,6 +447,12 @@ int snd_soc_put_volsw_sx(struct snd_kcontrol *kcontrol, if (snd_soc_volsw_is_stereo(mc)) { val_mask = mask << rshift; val2 = (ucontrol->value.integer.value[1] + min) & mask; + + if (mc->platform_max && val2 > mc->platform_max) + return -EINVAL; + if (val2 > max) + return -EINVAL; + val2 = val2 << rshift; err = snd_soc_component_update_bits(component, reg2, val_mask, From 5cb4abb0caa586859e56c71a6c44882e451a980a Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Wed, 16 Nov 2022 15:23:48 +0800 Subject: [PATCH 1302/1477] libbpf: Use page size as max_entries when probing ring buffer map [ Upstream commit 689eb2f1ba46b4b02195ac2a71c55b96d619ebf8 ] Using page size as max_entries when probing ring buffer map, else the probe may fail on host with 64KB page size (e.g., an ARM64 host). After the fix, the output of "bpftool feature" on above host will be correct. Before : eBPF map_type ringbuf is NOT available eBPF map_type user_ringbuf is NOT available After : eBPF map_type ringbuf is available eBPF map_type user_ringbuf is available Signed-off-by: Hou Tao Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20221116072351.1168938-2-houtao@huaweicloud.com Signed-off-by: Sasha Levin --- tools/lib/bpf/libbpf_probes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index d38284a3aaf0..13393f0eab25 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -244,7 +244,7 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex) case BPF_MAP_TYPE_RINGBUF: key_size = 0; value_size = 0; - max_entries = 4096; + max_entries = sysconf(_SC_PAGE_SIZE); break; case BPF_MAP_TYPE_UNSPEC: case BPF_MAP_TYPE_HASH: From 434b5236710f40f09c52f7073dc269d2904ce232 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Tue, 22 Nov 2022 00:38:55 +0100 Subject: [PATCH 1303/1477] pinctrl: meditatek: Startup with the IRQs disabled [ Upstream commit 11780e37565db4dd064d3243ca68f755c13f65b4 ] If the system is restarted via kexec(), the peripherals do not start with a known state. If the previous system had enabled an IRQs we will receive unexected IRQs that can lock the system. [ 28.109251] watchdog: BUG: soft lockup - CPU#0 stuck for 26s! [swapper/0:0] [ 28.109263] Modules linked in: [ 28.109273] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.15.79-14458-g4b9edf7b1ac6 #1 9f2e76613148af94acccd64c609a552fb4b4354b [ 28.109284] Hardware name: Google Elm (DT) [ 28.109290] pstate: 40400005 (nZcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 28.109298] pc : __do_softirq+0xa0/0x388 [ 28.109309] lr : __do_softirq+0x70/0x388 [ 28.109316] sp : ffffffc008003ee0 [ 28.109321] x29: ffffffc008003f00 x28: 000000000000000a x27: 0000000000000080 [ 28.109334] x26: 0000000000000001 x25: ffffffefa7b350c0 x24: ffffffefa7b47480 [ 28.109346] x23: ffffffefa7b3d000 x22: 0000000000000000 x21: ffffffefa7b0fa40 [ 28.109358] x20: ffffffefa7b005b0 x19: ffffffefa7b47480 x18: 0000000000065b6b [ 28.109370] x17: ffffffefa749c8b0 x16: 000000000000018c x15: 00000000000001b8 [ 28.109382] x14: 00000000000d3b6b x13: 0000000000000006 x12: 0000000000057e91 [ 28.109394] x11: 0000000000000000 x10: 0000000000000000 x9 : ffffffefa7b47480 [ 28.109406] x8 : 00000000000000e0 x7 : 000000000f424000 x6 : 0000000000000000 [ 28.109418] x5 : ffffffefa7dfaca0 x4 : ffffffefa7dfadf0 x3 : 000000000000000f [ 28.109429] x2 : 0000000000000000 x1 : 0000000000000100 x0 : 0000000001ac65c5 [ 28.109441] Call trace: [ 28.109447] __do_softirq+0xa0/0x388 [ 28.109454] irq_exit+0xc0/0xe0 [ 28.109464] handle_domain_irq+0x68/0x90 [ 28.109473] gic_handle_irq+0xac/0xf0 [ 28.109480] call_on_irq_stack+0x28/0x50 [ 28.109488] do_interrupt_handler+0x44/0x58 [ 28.109496] el1_interrupt+0x30/0x58 [ 28.109506] el1h_64_irq_handler+0x18/0x24 [ 28.109512] el1h_64_irq+0x7c/0x80 [ 28.109519] arch_local_irq_enable+0xc/0x18 [ 28.109529] default_idle_call+0x40/0x140 [ 28.109539] do_idle+0x108/0x290 [ 28.109547] cpu_startup_entry+0x2c/0x30 [ 28.109554] rest_init+0xe8/0xf8 [ 28.109562] arch_call_rest_init+0x18/0x24 [ 28.109571] start_kernel+0x338/0x42c [ 28.109578] __primary_switched+0xbc/0xc4 [ 28.109588] Kernel panic - not syncing: softlockup: hung tasks Signed-off-by: Ricardo Ribalda Link: https://lore.kernel.org/r/20221122-mtk-pinctrl-v1-1-bedf5655a3d2@chromium.org Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/mediatek/mtk-eint.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/mediatek/mtk-eint.c b/drivers/pinctrl/mediatek/mtk-eint.c index 22736f60c16c..64a32d3ca481 100644 --- a/drivers/pinctrl/mediatek/mtk-eint.c +++ b/drivers/pinctrl/mediatek/mtk-eint.c @@ -278,12 +278,15 @@ static struct irq_chip mtk_eint_irq_chip = { static unsigned int mtk_eint_hw_init(struct mtk_eint *eint) { - void __iomem *reg = eint->base + eint->regs->dom_en; + void __iomem *dom_en = eint->base + eint->regs->dom_en; + void __iomem *mask_set = eint->base + eint->regs->mask_set; unsigned int i; for (i = 0; i < eint->hw->ap_num; i += 32) { - writel(0xffffffff, reg); - reg += 4; + writel(0xffffffff, dom_en); + writel(0xffffffff, mask_set); + dom_en += 4; + mask_set += 4; } return 0; From 683837f2f69d5ebd5e770d5096e3f65c237db4f9 Mon Sep 17 00:00:00 2001 From: Heiko Schocher Date: Wed, 23 Nov 2022 08:16:36 +0100 Subject: [PATCH 1304/1477] can: sja1000: fix size of OCR_MODE_MASK define [ Upstream commit 26e8f6a75248247982458e8237b98c9fb2ffcf9d ] bitfield mode in ocr register has only 2 bits not 3, so correct the OCR_MODE_MASK define. Signed-off-by: Heiko Schocher Link: https://lore.kernel.org/all/20221123071636.2407823-1-hs@denx.de Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- include/linux/can/platform/sja1000.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/can/platform/sja1000.h b/include/linux/can/platform/sja1000.h index 5755ae5a4712..6a869682c120 100644 --- a/include/linux/can/platform/sja1000.h +++ b/include/linux/can/platform/sja1000.h @@ -14,7 +14,7 @@ #define OCR_MODE_TEST 0x01 #define OCR_MODE_NORMAL 0x02 #define OCR_MODE_CLOCK 0x03 -#define OCR_MODE_MASK 0x07 +#define OCR_MODE_MASK 0x03 #define OCR_TX0_INVERT 0x04 #define OCR_TX0_PULLDOWN 0x08 #define OCR_TX0_PULLUP 0x10 From 4db1d19b74e013ba26dae0e9e6025d670afc8759 Mon Sep 17 00:00:00 2001 From: Yasushi SHOJI Date: Fri, 25 Nov 2022 00:25:03 +0900 Subject: [PATCH 1305/1477] can: mcba_usb: Fix termination command argument [ Upstream commit 1a8e3bd25f1e789c8154e11ea24dc3ec5a4c1da0 ] Microchip USB Analyzer can activate the internal termination resistors by setting the "termination" option ON, or OFF to to deactivate them. As I've observed, both with my oscilloscope and captured USB packets below, you must send "0" to turn it ON, and "1" to turn it OFF. From the schematics in the user's guide, I can confirm that you must drive the CAN_RES signal LOW "0" to activate the resistors. Reverse the argument value of usb_msg.termination to fix this. These are the two commands sequence, ON then OFF. > No. Time Source Destination Protocol Length Info > 1 0.000000 host 1.3.1 USB 46 URB_BULK out > > Frame 1: 46 bytes on wire (368 bits), 46 bytes captured (368 bits) > USB URB > Leftover Capture Data: a80000000000000000000000000000000000a8 > > No. Time Source Destination Protocol Length Info > 2 4.372547 host 1.3.1 USB 46 URB_BULK out > > Frame 2: 46 bytes on wire (368 bits), 46 bytes captured (368 bits) > USB URB > Leftover Capture Data: a80100000000000000000000000000000000a9 Signed-off-by: Yasushi SHOJI Link: https://lore.kernel.org/all/20221124152504.125994-1-yashi@spacecubics.com Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- drivers/net/can/usb/mcba_usb.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/can/usb/mcba_usb.c b/drivers/net/can/usb/mcba_usb.c index 21063335ab59..c07e327929ba 100644 --- a/drivers/net/can/usb/mcba_usb.c +++ b/drivers/net/can/usb/mcba_usb.c @@ -47,6 +47,10 @@ #define MCBA_VER_REQ_USB 1 #define MCBA_VER_REQ_CAN 2 +/* Drive the CAN_RES signal LOW "0" to activate R24 and R25 */ +#define MCBA_VER_TERMINATION_ON 0 +#define MCBA_VER_TERMINATION_OFF 1 + #define MCBA_SIDL_EXID_MASK 0x8 #define MCBA_DLC_MASK 0xf #define MCBA_DLC_RTR_MASK 0x40 @@ -469,7 +473,7 @@ static void mcba_usb_process_ka_usb(struct mcba_priv *priv, priv->usb_ka_first_pass = false; } - if (msg->termination_state) + if (msg->termination_state == MCBA_VER_TERMINATION_ON) priv->can.termination = MCBA_TERMINATION_ENABLED; else priv->can.termination = MCBA_TERMINATION_DISABLED; @@ -789,9 +793,9 @@ static int mcba_set_termination(struct net_device *netdev, u16 term) }; if (term == MCBA_TERMINATION_ENABLED) - usb_msg.termination = 1; + usb_msg.termination = MCBA_VER_TERMINATION_ON; else - usb_msg.termination = 0; + usb_msg.termination = MCBA_VER_TERMINATION_OFF; mcba_usb_xmit_cmd(priv, (struct mcba_usb_msg *)&usb_msg); From 77ebf88e003140f10625d998b572ad1dde76d0c1 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Fri, 25 Nov 2022 16:23:48 +0000 Subject: [PATCH 1306/1477] ASoC: cs42l51: Correct PGA Volume minimum value [ Upstream commit 3d1bb6cc1a654c8693a85b1d262e610196edec8b ] The table in the datasheet actually shows the volume values in the wrong order, with the two -3dB values being reversed. This appears to have caused the lower of the two values to be used in the driver when the higher should have been, correct this mixup. Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20221125162348.1288005-2-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/cs42l51.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/cs42l51.c b/sound/soc/codecs/cs42l51.c index fc6a2bc311b4..c61b17dc2af8 100644 --- a/sound/soc/codecs/cs42l51.c +++ b/sound/soc/codecs/cs42l51.c @@ -146,7 +146,7 @@ static const struct snd_kcontrol_new cs42l51_snd_controls[] = { 0, 0xA0, 96, adc_att_tlv), SOC_DOUBLE_R_SX_TLV("PGA Volume", CS42L51_ALC_PGA_CTL, CS42L51_ALC_PGB_CTL, - 0, 0x19, 30, pga_tlv), + 0, 0x1A, 30, pga_tlv), SOC_SINGLE("Playback Deemphasis Switch", CS42L51_DAC_CTL, 3, 1, 0), SOC_SINGLE("Auto-Mute Switch", CS42L51_DAC_CTL, 2, 1, 0), SOC_SINGLE("Soft Ramp Switch", CS42L51_DAC_CTL, 1, 1, 0), From 74b139c63f0775cf79266e9d9546c62b73fb3385 Mon Sep 17 00:00:00 2001 From: Lei Rao Date: Tue, 29 Nov 2022 17:48:11 +0800 Subject: [PATCH 1307/1477] nvme-pci: clear the prp2 field when not used [ Upstream commit a56ea6147facce4ac1fc38675455f9733d96232b ] If the prp2 field is not filled in nvme_setup_prp_simple(), the prp2 field is garbage data. According to nvme spec, the prp2 is reserved if the data transfer does not cross a memory page boundary, so clear it to zero if it is not used. Signed-off-by: Lei Rao Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/host/pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 089f39103584..c222d7bf6ce1 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -817,6 +817,8 @@ static blk_status_t nvme_setup_prp_simple(struct nvme_dev *dev, cmnd->dptr.prp1 = cpu_to_le64(iod->first_dma); if (bv->bv_len > first_prp_len) cmnd->dptr.prp2 = cpu_to_le64(iod->first_dma + first_prp_len); + else + cmnd->dptr.prp2 = 0; return BLK_STS_OK; } From 54c15f67cb72a5ab856d15d3a887a4d8474e44be Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Fri, 25 Nov 2022 16:23:47 +0000 Subject: [PATCH 1308/1477] ASoC: ops: Correct bounds check for second channel on SX controls commit f33bcc506050f89433a52a3052054d4ebd37b1c1 upstream. Currently the check against the max value for the control is being applied after the value has had the minimum applied and been masked. But the max value simply indicates the number of volume levels on an SX control, and as such should just be applied on the raw value. Fixes: 97eea946b939 ("ASoC: ops: Check bounds for second channel in snd_soc_put_volsw_sx()") Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20221125162348.1288005-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/soc-ops.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c index fe93458d864a..daecd386d5ec 100644 --- a/sound/soc/soc-ops.c +++ b/sound/soc/soc-ops.c @@ -445,14 +445,15 @@ int snd_soc_put_volsw_sx(struct snd_kcontrol *kcontrol, return err; if (snd_soc_volsw_is_stereo(mc)) { - val_mask = mask << rshift; - val2 = (ucontrol->value.integer.value[1] + min) & mask; + val2 = ucontrol->value.integer.value[1]; if (mc->platform_max && val2 > mc->platform_max) return -EINVAL; if (val2 > max) return -EINVAL; + val_mask = mask << rshift; + val2 = (val2 + min) & mask; val2 = val2 << rshift; err = snd_soc_component_update_bits(component, reg2, val_mask, From a2428a8dcb4f3eb80e7d38dba0bf71e4ff20cecd Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 19 Dec 2022 12:27:32 +0100 Subject: [PATCH 1309/1477] Linux 5.10.160 Link: https://lore.kernel.org/r/20221215172906.638553794@linuxfoundation.org Tested-by: Pavel Machek (CIP) Tested-by: Shuah Khan Tested-by: Rudi Heitbaum Tested-by: Allen Pais Tested-by: Linux Kernel Functional Testing Tested-by: Salvatore Bonaccorso Tested-by: Jon Hunter Tested-by: Sudip Mukherjee Tested-by: Florian Fainelli Tested-by: Guenter Roeck Tested-by: Hulk Robot Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index bb9fab281555..6f7dae2f1a4e 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 10 -SUBLEVEL = 159 +SUBLEVEL = 160 EXTRAVERSION = NAME = Dare mighty things From 7d1d982f57843d0b494f903f90be7cb75b572261 Mon Sep 17 00:00:00 2001 From: Luca Stefani Date: Thu, 22 Dec 2022 14:10:49 +0100 Subject: [PATCH 1310/1477] UPSTREAM: pstore: Properly assign mem_type property If mem-type is specified in the device tree it would end up overriding the record_size field instead of populating mem_type. As record_size is currently parsed after the improper assignment with default size 0 it continued to work as expected regardless of the value found in the device tree. Simply changing the target field of the struct is enough to get mem-type working as expected. Fixes: 9d843e8fafc7 ("pstore: Add mem_type property DT parsing support") Cc: stable@vger.kernel.org Signed-off-by: Luca Stefani Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20221222131049.286288-1-luca@osomprivacy.com Fixes: 1c4893edfe02 ("FROMGIT: pstore: Add mem_type property DT parsing support") Change-Id: Iea62cd7b26effc158500f097474d4df316285ac4 (cherry picked from commit beca3e311a49cd3c55a056096531737d7afa4361 ) Signed-off-by: Luca Stefani --- fs/pstore/ram.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 7183fe4b5f72..df2950500334 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -679,7 +679,7 @@ static int ramoops_parse_dt(struct platform_device *pdev, field = value; \ } - parse_u32("mem-type", pdata->record_size, pdata->mem_type); + parse_u32("mem-type", pdata->mem_type, pdata->mem_type); parse_u32("record-size", pdata->record_size, 0); parse_u32("console-size", pdata->console_size, 0); parse_u32("ftrace-size", pdata->ftrace_size, 0); From 0761373d2f04c6a7a6c4ffe0d7eb12e9666ce239 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Mon, 29 Nov 2021 18:53:09 -0800 Subject: [PATCH 1311/1477] UPSTREAM: usb: dwc3: gadget: Skip checking Update Transfer status If we're not setting CMDACT (from "No Response" Update Transfer command), then there's no point in checking for the command status. So skip it. This can reduce a register read delay and improve performance. Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/3dc31cf11581ae3ee82d9202dda3fc17d897d786.1638240306.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman (cherry picked from commit bc27117c7cdd4b57d0f079620b483f3b229074bd) Bug: 263189538 Change-Id: Ia06408ffeeb561683662f00938bd9ab2f866eb89 Signed-off-by: Krishna Kurapati --- drivers/usb/dwc3/gadget.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index b222ffd8f8ef..89c8c4202aac 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -357,6 +357,12 @@ int dwc3_send_gadget_ep_cmd(struct dwc3_ep *dep, unsigned int cmd, cmd |= DWC3_DEPCMD_CMDACT; dwc3_writel(dep->regs, DWC3_DEPCMD, cmd); + + if (!(cmd & DWC3_DEPCMD_CMDACT)) { + ret = 0; + goto skip_status; + } + do { reg = dwc3_readl(dep->regs, DWC3_DEPCMD); if (!(reg & DWC3_DEPCMD_CMDACT)) { @@ -398,6 +404,7 @@ int dwc3_send_gadget_ep_cmd(struct dwc3_ep *dep, unsigned int cmd, cmd_status = -ETIMEDOUT; } +skip_status: trace_dwc3_gadget_ep_cmd(dep, cmd, params, cmd_status); if (DWC3_DEPCMD_CMD(cmd) == DWC3_DEPCMD_STARTTRANSFER) { From 15b61c98d6dc9fae6f63d68d49b0e97c45edc3f6 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Mon, 29 Nov 2021 18:53:16 -0800 Subject: [PATCH 1312/1477] UPSTREAM: usb: dwc3: gadget: Ignore Update Transfer cmd params The controller doesn't check for Update Transfer command parameters DEPCMDPAR{0,1,2}. Writing to these registers is unnecessary. Ignoring this improves performance slightly by removing the register write delay. Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/997d9ebf38c6bba920d4ee77bd8c77bf81978a55.1638240306.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman (cherry picked from commit a02a26eb0aeaa2859a8b2f182bd5b5f7228ee1fe) Bug: 263189538 Change-Id: Ie30380e71dfd7febb71eb0c3c725f336f4d6da9a Signed-off-by: Krishna Kurapati --- drivers/usb/dwc3/gadget.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 89c8c4202aac..b3e096744d48 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -331,9 +331,17 @@ int dwc3_send_gadget_ep_cmd(struct dwc3_ep *dep, unsigned int cmd, } } - dwc3_writel(dep->regs, DWC3_DEPCMDPAR0, params->param0); - dwc3_writel(dep->regs, DWC3_DEPCMDPAR1, params->param1); - dwc3_writel(dep->regs, DWC3_DEPCMDPAR2, params->param2); + /* + * For some commands such as Update Transfer command, DEPCMDPARn + * registers are reserved. Since the driver often sends Update Transfer + * command, don't write to DEPCMDPARn to avoid register write delays and + * improve performance. + */ + if (DWC3_DEPCMD_CMD(cmd) != DWC3_DEPCMD_UPDATETRANSFER) { + dwc3_writel(dep->regs, DWC3_DEPCMDPAR0, params->param0); + dwc3_writel(dep->regs, DWC3_DEPCMDPAR1, params->param1); + dwc3_writel(dep->regs, DWC3_DEPCMDPAR2, params->param2); + } /* * Synopsys Databook 2.60a states in section 6.3.2.5.6 of that if we're From d099b9a70e5382c5bbf131d71c0383cca0bc8d73 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Mon, 29 Nov 2021 18:53:22 -0800 Subject: [PATCH 1313/1477] UPSTREAM: usb: dwc3: gadget: Skip reading GEVNTSIZn The driver knows what it needs to set for GEVNTSIZn, and the controller doesn't modify this register unless there's a hard reset. To save a few microseconds of register read in read-modify-write operation, simply do register write with the expected values. This can improve performance when there are many interrupts generated, which the driver needs to check and handle. Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/efddf4ee5821c4bc5ae7ad90d629ec7a0ebcbf9a.1638240306.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 76c4c95dac0da03f64d8b59a8834faf8020af8dc) Bug: 263189538 Change-Id: I7dec7b568f12b1529ab1e52a600056be98910f02 Signed-off-by: Krishna Kurapati --- drivers/usb/dwc3/gadget.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index b3e096744d48..4de2f1d3d35f 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -4201,7 +4201,6 @@ static irqreturn_t dwc3_process_event_buf(struct dwc3_event_buffer *evt) struct dwc3 *dwc = evt->dwc; irqreturn_t ret = IRQ_NONE; int left; - u32 reg; left = evt->count; @@ -4232,9 +4231,8 @@ static irqreturn_t dwc3_process_event_buf(struct dwc3_event_buffer *evt) ret = IRQ_HANDLED; /* Unmask interrupt */ - reg = dwc3_readl(dwc->regs, DWC3_GEVNTSIZ(0)); - reg &= ~DWC3_GEVNTSIZ_INTMASK; - dwc3_writel(dwc->regs, DWC3_GEVNTSIZ(0), reg); + dwc3_writel(dwc->regs, DWC3_GEVNTSIZ(0), + DWC3_GEVNTSIZ_SIZE(evt->length)); if (dwc->imod_interval) { dwc3_writel(dwc->regs, DWC3_GEVNTCOUNT(0), DWC3_GEVNTCOUNT_EHB); @@ -4268,7 +4266,6 @@ static irqreturn_t dwc3_check_event_buf(struct dwc3_event_buffer *evt) struct dwc3 *dwc = evt->dwc; u32 amount; u32 count; - u32 reg; if (pm_runtime_suspended(dwc->dev)) { pm_runtime_get(dwc->dev); @@ -4295,9 +4292,8 @@ static irqreturn_t dwc3_check_event_buf(struct dwc3_event_buffer *evt) evt->flags |= DWC3_EVENT_PENDING; /* Mask interrupt */ - reg = dwc3_readl(dwc->regs, DWC3_GEVNTSIZ(0)); - reg |= DWC3_GEVNTSIZ_INTMASK; - dwc3_writel(dwc->regs, DWC3_GEVNTSIZ(0), reg); + dwc3_writel(dwc->regs, DWC3_GEVNTSIZ(0), + DWC3_GEVNTSIZ_INTMASK | DWC3_GEVNTSIZ_SIZE(evt->length)); amount = min(count, evt->length - evt->lpos); memcpy(evt->cache + evt->lpos, evt->buf + evt->lpos, amount); From 33b5613a43848bc4fb0d401ba22ce2789136c69a Mon Sep 17 00:00:00 2001 From: Michael Grzeschik Date: Sun, 6 Mar 2022 22:12:50 +0100 Subject: [PATCH 1314/1477] UPSTREAM: usb: dwc3: gadget: ep_queue simplify isoc start condition To improve reading the code this patch moves the cases to start_isoc or return the function under one common condition check. Reviewed-by: Thinh Nguyen Signed-off-by: Michael Grzeschik Link: https://lore.kernel.org/r/20220306211251.2281335-2-m.grzeschik@pengutronix.de Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 26d27a1080a7d0e8f0c1a56dc50aae336f9525d1) Bug: 263189538 Change-Id: I60ada6edcc6f1a505f2a3d86fc4a6d8b7d22f089 Signed-off-by: Krishna Kurapati --- drivers/usb/dwc3/gadget.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 4de2f1d3d35f..f03646b827f1 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1900,13 +1900,11 @@ static int __dwc3_gadget_ep_queue(struct dwc3_ep *dep, struct dwc3_request *req) * errors which will force us issue EndTransfer command. */ if (usb_endpoint_xfer_isoc(dep->endpoint.desc)) { - if (!(dep->flags & DWC3_EP_PENDING_REQUEST) && - !(dep->flags & DWC3_EP_TRANSFER_STARTED)) - return 0; - - if ((dep->flags & DWC3_EP_PENDING_REQUEST)) { - if (!(dep->flags & DWC3_EP_TRANSFER_STARTED)) + if (!(dep->flags & DWC3_EP_TRANSFER_STARTED)) { + if ((dep->flags & DWC3_EP_PENDING_REQUEST)) return __dwc3_gadget_start_isoc(dep); + + return 0; } } From 802b4745f707458ddc7c0b3cbc0eef1da83d9e2f Mon Sep 17 00:00:00 2001 From: Krishna Kurapati Date: Wed, 21 Dec 2022 21:37:56 +0530 Subject: [PATCH 1315/1477] UPSTREAM: usb: dwc3: gadget: move cmd_endtransfer to extra function This patch adds the extra function __dwc3_stop_active_transfer to consolidate the same codepath. Signed-off-by: Michael Grzeschik Link: https://lore.kernel.org/r/20220306211251.2281335-3-m.grzeschik@pengutronix.de Signed-off-by: Greg Kroah-Hartman (cherry picked from commit e192cc7b52399d1b073f88cd3ba128b74d3a57f1) Bug: 263189538 Change-Id: Icbc4c63e380e2a88973e739b5513528cc5dbde66 Signed-off-by: Krishna Kurapati --- drivers/usb/dwc3/gadget.c | 70 +++++++++++++++++++++------------------ 1 file changed, 37 insertions(+), 33 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index f03646b827f1..9bfcfc7ccc38 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1653,6 +1653,40 @@ static int __dwc3_gadget_get_frame(struct dwc3 *dwc) return DWC3_DSTS_SOFFN(reg); } +/** + * __dwc3_stop_active_transfer - stop the current active transfer + * @dep: isoc endpoint + * @force: set forcerm bit in the command + * @interrupt: command complete interrupt after End Transfer command + * + * When setting force, the ForceRM bit will be set. In that case + * the controller won't update the TRB progress on command + * completion. It also won't clear the HWO bit in the TRB. + * The command will also not complete immediately in that case. + */ +static int __dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force, bool interrupt) +{ + struct dwc3_gadget_ep_cmd_params params; + u32 cmd; + int ret; + + cmd = DWC3_DEPCMD_ENDTRANSFER; + cmd |= force ? DWC3_DEPCMD_HIPRI_FORCERM : 0; + cmd |= interrupt ? DWC3_DEPCMD_CMDIOC : 0; + cmd |= DWC3_DEPCMD_PARAM(dep->resource_index); + memset(¶ms, 0, sizeof(params)); + ret = dwc3_send_gadget_ep_cmd(dep, cmd, ¶ms); + WARN_ON_ONCE(ret); + dep->resource_index = 0; + + if (!interrupt) + dep->flags &= ~DWC3_EP_TRANSFER_STARTED; + else if (!ret) + dep->flags |= DWC3_EP_END_TRANSFER_PENDING; + + return ret; +} + /** * dwc3_gadget_start_isoc_quirk - workaround invalid frame number * @dep: isoc endpoint @@ -1828,21 +1862,8 @@ static int __dwc3_gadget_start_isoc(struct dwc3_ep *dep) * status, issue END_TRANSFER command and retry on the next XferNotReady * event. */ - if (ret == -EAGAIN) { - struct dwc3_gadget_ep_cmd_params params; - u32 cmd; - - cmd = DWC3_DEPCMD_ENDTRANSFER | - DWC3_DEPCMD_CMDIOC | - DWC3_DEPCMD_PARAM(dep->resource_index); - - dep->resource_index = 0; - memset(¶ms, 0, sizeof(params)); - - ret = dwc3_send_gadget_ep_cmd(dep, cmd, ¶ms); - if (!ret) - dep->flags |= DWC3_EP_END_TRANSFER_PENDING; - } + if (ret == -EAGAIN) + ret = __dwc3_stop_active_transfer(dep, false, true); return ret; } @@ -3648,10 +3669,6 @@ static void dwc3_reset_gadget(struct dwc3 *dwc) void dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force, bool interrupt) { - struct dwc3_gadget_ep_cmd_params params; - u32 cmd; - int ret; - if (!(dep->flags & DWC3_EP_TRANSFER_STARTED) || (dep->flags & DWC3_EP_DELAY_STOP) || (dep->flags & DWC3_EP_END_TRANSFER_PENDING)) @@ -3683,20 +3700,7 @@ void dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force, * * This mode is NOT available on the DWC_usb31 IP. */ - - cmd = DWC3_DEPCMD_ENDTRANSFER; - cmd |= force ? DWC3_DEPCMD_HIPRI_FORCERM : 0; - cmd |= interrupt ? DWC3_DEPCMD_CMDIOC : 0; - cmd |= DWC3_DEPCMD_PARAM(dep->resource_index); - memset(¶ms, 0, sizeof(params)); - ret = dwc3_send_gadget_ep_cmd(dep, cmd, ¶ms); - WARN_ON_ONCE(ret); - dep->resource_index = 0; - - if (!interrupt) - dep->flags &= ~DWC3_EP_TRANSFER_STARTED; - else - dep->flags |= DWC3_EP_END_TRANSFER_PENDING; + __dwc3_stop_active_transfer(dep, force, interrupt); } EXPORT_SYMBOL_GPL(dwc3_stop_active_transfer); From a79e848e5299ab2a3c86f4e83e2e3946e5208639 Mon Sep 17 00:00:00 2001 From: Krishna Kurapati Date: Wed, 21 Dec 2022 21:51:36 +0530 Subject: [PATCH 1316/1477] UPSTREAM: usb: dwc3: ep0: Don't prepare beyond Setup stage Since we can't guarantee that the host won't send new Setup packet before going through the device-initiated disconnect, don't prepare beyond the Setup stage and keep the device in EP0_SETUP_PHASE. This ensures that the device-initated disconnect sequence can go through gracefully. Note that the controller won't service the End Transfer command if it can't DMA out the Setup packet. Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/6bacec56ecabb2c6e49a09cedfcac281fdc97de0.16505938 29.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman (cherry picked from commit c96683798e272366866a5c0ce3073c0b5a256db7) Bug: 263189538 Change-Id: Ice2d6837acebd312e732527504f4013f33a8fe6d Signed-off-by: Krishna Kurapati --- drivers/usb/dwc3/ep0.c | 2 +- drivers/usb/dwc3/gadget.c | 30 +++++++++++++++++------------- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c index 9b6ebc3c902d..5af4d3794e88 100644 --- a/drivers/usb/dwc3/ep0.c +++ b/drivers/usb/dwc3/ep0.c @@ -813,7 +813,7 @@ static void dwc3_ep0_inspect_setup(struct dwc3 *dwc, int ret = -EINVAL; u32 len; - if (!dwc->gadget_driver) + if (!dwc->gadget_driver || !dwc->connected) goto out; trace_dwc3_ctrl_req(ctrl); diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 9bfcfc7ccc38..c6398dfd1627 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2493,6 +2493,23 @@ static int dwc3_gadget_soft_disconnect(struct dwc3 *dwc) spin_lock_irqsave(&dwc->lock, flags); dwc->connected = false; + /* + * Per databook, when we want to stop the gadget, if a control transfer + * is still in process, complete it and get the core into setup phase. + */ + if (dwc->ep0state != EP0_SETUP_PHASE) { + int ret; + + reinit_completion(&dwc->ep0_in_setup); + + spin_unlock_irqrestore(&dwc->lock, flags); + ret = wait_for_completion_timeout(&dwc->ep0_in_setup, + msecs_to_jiffies(DWC3_PULL_UP_TIMEOUT)); + spin_lock_irqsave(&dwc->lock, flags); + if (ret == 0) + dev_warn(dwc->dev, "timed out waiting for SETUP phase\n"); + } + /* * In the Synopsys DesignWare Cores USB3 Databook Rev. 3.30a * Section 4.1.8 Table 4-7, it states that for a device-initiated @@ -2524,19 +2541,6 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on) vdwc->softconnect = is_on; - /* - * Per databook, when we want to stop the gadget, if a control transfer - * is still in process, complete it and get the core into setup phase. - */ - if (!is_on && dwc->ep0state != EP0_SETUP_PHASE) { - reinit_completion(&dwc->ep0_in_setup); - - ret = wait_for_completion_timeout(&dwc->ep0_in_setup, - msecs_to_jiffies(DWC3_PULL_UP_TIMEOUT)); - if (ret == 0) - dev_warn(dwc->dev, "timed out waiting for SETUP phase\n"); - } - /* * Avoid issuing a runtime resume if the device is already in the * suspended state during gadget disconnect. DWC3 gadget was already From 4678fabb6e69f56d1616c4e3b77aedbf87efd641 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Thu, 21 Apr 2022 19:22:57 -0700 Subject: [PATCH 1317/1477] UPSTREAM: usb: dwc3: gadget: Only End Transfer for ep0 data phase The driver shouldn't be able to issue End Transfer to the control endpoint at anytime. Typically we should only do so in error cases such as invalid/unexpected direction of Data Phase as described in the control transfer flow of the programming guide. It _may_ end started data phase during controller deinitialization from soft disconnect or driver removal. However, that should not happen because the driver should be maintained in EP0_SETUP_PHASE during driver tear-down. On soft-connect, the controller should be reset from a soft-reset and there should be no issue starting the control endpoint. Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/3c6643678863a26702e4115e9e19d7d94a30d49c.1650593829.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman (cherry picked from commit ace17b6ee4f92ab0375d12a1b42494f8590a96b6) Bug: 263189538 Change-Id: Ifd58ee4b1f213db72d6bcc7137c96ee2a399e21c Signed-off-by: Krishna Kurapati --- drivers/usb/dwc3/gadget.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index c6398dfd1627..22840e1373b9 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -3673,6 +3673,17 @@ static void dwc3_reset_gadget(struct dwc3 *dwc) void dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force, bool interrupt) { + struct dwc3 *dwc = dep->dwc; + + /* + * Only issue End Transfer command to the control endpoint of a started + * Data Phase. Typically we should only do so in error cases such as + * invalid/unexpected direction as described in the control transfer + * flow of the programming guide. + */ + if (dep->number <= 1 && dwc->ep0state != EP0_DATA_PHASE) + return; + if (!(dep->flags & DWC3_EP_TRANSFER_STARTED) || (dep->flags & DWC3_EP_DELAY_STOP) || (dep->flags & DWC3_EP_END_TRANSFER_PENDING)) From 7b90f7b05e1991454bd7c5aac259d0d12807e71c Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Thu, 21 Apr 2022 19:23:03 -0700 Subject: [PATCH 1318/1477] UPSTREAM: usb: dwc3: gadget: Delay issuing End Transfer If the controller hasn't DMA'ed the Setup data from its fifo, it won't process the End Transfer command. Polling for the command completion may block the driver from servicing the Setup phase and cause a timeout. Previously we only check and delay issuing End Transfer in the case of endpoint dequeue. Let's do that for all End Transfer scenarios. Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/2fcf3b5d90068d549589a57a27a79f76c6769b04.1650593829.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman (cherry picked from commit f66eef8fb8989a7193cafc3870f7c7b2b97f16cb) Bug: 263189538 Change-Id: I58dc40ff7defdd03f52da900ee0b179ffe55d2cc Signed-off-by: Krishna Kurapati --- drivers/usb/dwc3/gadget.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 22840e1373b9..83b4d3cd0f0c 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2045,16 +2045,6 @@ static int dwc3_gadget_ep_dequeue(struct usb_ep *ep, if (r == req) { struct dwc3_request *t; - /* - * If a Setup packet is received but yet to DMA out, the controller will - * not process the End Transfer command of any endpoint. Polling of its - * DEPCMD.CmdAct may block setting up TRB for Setup packet, causing a - * timeout. Delay issuing the End Transfer command until the Setup TRB is - * prepared. - */ - if (dwc->ep0state != EP0_SETUP_PHASE && !dwc->delayed_status) - dep->flags |= DWC3_EP_DELAY_STOP; - /* wait until it is processed */ dwc3_stop_active_transfer(dep, true, true); @@ -3689,6 +3679,18 @@ void dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force, (dep->flags & DWC3_EP_END_TRANSFER_PENDING)) return; + /* + * If a Setup packet is received but yet to DMA out, the controller will + * not process the End Transfer command of any endpoint. Polling of its + * DEPCMD.CmdAct may block setting up TRB for Setup packet, causing a + * timeout. Delay issuing the End Transfer command until the Setup TRB is + * prepared. + */ + if (dwc->ep0state != EP0_SETUP_PHASE && !dwc->delayed_status) { + dep->flags |= DWC3_EP_DELAY_STOP; + return; + } + /* * NOTICE: We are violating what the Databook says about the * EndTransfer command. Ideally we would _always_ wait for the From 767a360826692da89ef2335ae62b81ee4a007416 Mon Sep 17 00:00:00 2001 From: Wesley Cheng Date: Mon, 23 May 2022 14:39:48 -0700 Subject: [PATCH 1319/1477] UPSTREAM: usb: dwc3: gadget: Fix IN endpoint max packet size allocation The current logic to assign the max packet limit for IN endpoints attempts to take the default HW value and apply the optimal endpoint settings based on it. However, if the default value reports a TxFIFO size large enough for only one max packet, it will divide the value and assign a smaller ep max packet limit. For example, if the default TxFIFO size fits 1024B, current logic will assign 1024/3 = 341B to ep max packet size. If function drivers attempt to request for an endpoint with a wMaxPacketSize of 1024B (SS BULK max packet size) then it will fail, as the gadget is unable to find an endpoint which can fit the requested size. Functionally, if the TxFIFO has enough space to fit one max packet, it will be sufficient, at least when initializing the endpoints. Fixes: d94ea5319813 ("usb: dwc3: gadget: Properly set maxpacket limit") Cc: stable Signed-off-by: Wesley Cheng Link: https://lore.kernel.org/r/20220523213948.22142-1-quic_wcheng@quicinc.com Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 9c1e916960c1192e746bf615e4dae25423473a64) Bug: 263189538 Change-Id: I56995b3d5bbfa8dc61a0e4084d6109d90951244d Signed-off-by: Krishna Kurapati --- drivers/usb/dwc3/gadget.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 83b4d3cd0f0c..29ed10733254 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2965,6 +2965,7 @@ static int dwc3_gadget_init_in_endpoint(struct dwc3_ep *dep) struct dwc3 *dwc = dep->dwc; u32 mdwidth; int size; + int maxpacket; mdwidth = dwc3_mdwidth(dwc); @@ -2977,21 +2978,24 @@ static int dwc3_gadget_init_in_endpoint(struct dwc3_ep *dep) else size = DWC31_GTXFIFOSIZ_TXFDEP(size); - /* FIFO Depth is in MDWDITH bytes. Multiply */ - size *= mdwidth; - /* - * To meet performance requirement, a minimum TxFIFO size of 3x - * MaxPacketSize is recommended for endpoints that support burst and a - * minimum TxFIFO size of 2x MaxPacketSize for endpoints that don't - * support burst. Use those numbers and we can calculate the max packet - * limit as below. + * maxpacket size is determined as part of the following, after assuming + * a mult value of one maxpacket: + * DWC3 revision 280A and prior: + * fifo_size = mult * (max_packet / mdwidth) + 1; + * maxpacket = mdwidth * (fifo_size - 1); + * + * DWC3 revision 290A and onwards: + * fifo_size = mult * ((max_packet + mdwidth)/mdwidth + 1) + 1 + * maxpacket = mdwidth * ((fifo_size - 1) - 1) - mdwidth; */ - if (dwc->maximum_speed >= USB_SPEED_SUPER) - size /= 3; + if (DWC3_VER_IS_PRIOR(DWC3, 290A)) + maxpacket = mdwidth * (size - 1); else - size /= 2; + maxpacket = mdwidth * ((size - 1) - 1) - mdwidth; + /* Functionally, space for one max packet is sufficient */ + size = min_t(int, maxpacket, 1024); usb_ep_set_maxpacket_limit(&dep->endpoint, size); dep->endpoint.max_streams = 16; From 75a4f0b5e1f4cb19b69472d465edc5f3a7c5ea73 Mon Sep 17 00:00:00 2001 From: Wesley Cheng Date: Mon, 11 Jul 2022 18:44:03 -0700 Subject: [PATCH 1320/1477] UPSTREAM: usb: dwc3: ep0: Properly handle setup_packet_pending scenario in data stage During a 3 stage SETUP transfer, if the host sends another SETUP token before completing the status phase, it signifies that the host has aborted the current control transfer. Currently, if a setup_packet_pending is received, there are no subsequent calls to dwc3_ep0_out_start() to fetch the new SETUP packet. This leads to a stall on EP0, as host does not expect another STATUS phase as it has aborted the current transfer. Fix this issue by explicitly stalling and restarting EP0, as well as resetting the trb_enqueue indexes. (without this, there is a chance the SETUP TRB is set up on trb_endqueue == 1) Signed-off-by: Wesley Cheng Link: https://lore.kernel.org/r/20220712014403.2977-1-quic_wcheng@quicinc.com Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 5e76ee96be8f7bbf9416a5edddc8c064e7e7c6ac) Bug: 263189538 Change-Id: Ia0cc555ca8cd94048308406d975b3b4f4aa6ca54 Signed-off-by: Krishna Kurapati --- drivers/usb/dwc3/ep0.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c index 5af4d3794e88..7f01513df0f2 100644 --- a/drivers/usb/dwc3/ep0.c +++ b/drivers/usb/dwc3/ep0.c @@ -239,6 +239,8 @@ void dwc3_ep0_stall_and_restart(struct dwc3 *dwc) dwc3_gadget_giveback(dep, req, -ECONNRESET); } + dwc->eps[0]->trb_enqueue = 0; + dwc->eps[1]->trb_enqueue = 0; dwc->ep0state = EP0_SETUP_PHASE; dwc3_ep0_out_start(dwc); } @@ -1139,6 +1141,11 @@ static void dwc3_ep0_xfernotready(struct dwc3 *dwc, if (dwc->ep0_next_event != DWC3_EP0_NRDY_STATUS) return; + if (dwc->setup_packet_pending) { + dwc3_ep0_stall_and_restart(dwc); + return; + } + dwc->ep0state = EP0_STATUS_PHASE; if (dwc->delayed_status) { From d53fb78733b6686fde47ef96a064d6b8b8a77bcd Mon Sep 17 00:00:00 2001 From: Michael Grzeschik Date: Wed, 20 Jul 2022 23:35:23 +0200 Subject: [PATCH 1321/1477] UPSTREAM: usb: dwc3: gadget: conditionally remove requests The functions stop_active_transfers and ep_disable are both calling remove_requests. This functions in both cases will giveback the requests with status ESHUTDOWN, which also represents an physical disconnection. For ep_disable this is not true. This patch adds the status parameter to remove_requests and sets the status to ECONNRESET on ep_disable. Signed-off-by: Michael Grzeschik Link: https://lore.kernel.org/r/20220720213523.1055897-1-m.grzeschik@pengutronix.de Signed-off-by: Greg Kroah-Hartman (cherry picked from commit b44c0e7fef51ee7e8ca8c6efbf706f5613787100) Bug: 263189538 Change-Id: I1bd7a42e6f2f99a0ce021ef3c94dc630ae9260df Signed-off-by: Krishna Kurapati --- drivers/usb/dwc3/gadget.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 29ed10733254..8611f3c59677 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -966,7 +966,7 @@ out: return 0; } -static void dwc3_remove_requests(struct dwc3 *dwc, struct dwc3_ep *dep) +static void dwc3_remove_requests(struct dwc3 *dwc, struct dwc3_ep *dep, int status) { struct dwc3_request *req; @@ -976,19 +976,19 @@ static void dwc3_remove_requests(struct dwc3 *dwc, struct dwc3_ep *dep) while (!list_empty(&dep->started_list)) { req = next_request(&dep->started_list); - dwc3_gadget_giveback(dep, req, -ESHUTDOWN); + dwc3_gadget_giveback(dep, req, status); } while (!list_empty(&dep->pending_list)) { req = next_request(&dep->pending_list); - dwc3_gadget_giveback(dep, req, -ESHUTDOWN); + dwc3_gadget_giveback(dep, req, status); } while (!list_empty(&dep->cancelled_list)) { req = next_request(&dep->cancelled_list); - dwc3_gadget_giveback(dep, req, -ESHUTDOWN); + dwc3_gadget_giveback(dep, req, status); } } @@ -1023,7 +1023,7 @@ static int __dwc3_gadget_ep_disable(struct dwc3_ep *dep) dep->endpoint.desc = NULL; } - dwc3_remove_requests(dwc, dep); + dwc3_remove_requests(dwc, dep, -ECONNRESET); dep->stream_capable = false; dep->type = 0; @@ -2326,7 +2326,7 @@ static void dwc3_stop_active_transfers(struct dwc3 *dwc) if (!dep) continue; - dwc3_remove_requests(dwc, dep); + dwc3_remove_requests(dwc, dep, -ESHUTDOWN); } } From b44330f50ced30c4bdbfbd5d0bf42ad13948987b Mon Sep 17 00:00:00 2001 From: Udipto Goswami Date: Thu, 15 Dec 2022 10:59:05 +0530 Subject: [PATCH 1322/1477] UPSTREAM: usb: gadget: f_fs: Prevent race during ffs_ep0_queue_wait While performing fast composition switch, there is a possibility that the process of ffs_ep0_write/ffs_ep0_read get into a race condition due to ep0req being freed up from functionfs_unbind. Consider the scenario that the ffs_ep0_write calls the ffs_ep0_queue_wait by taking a lock &ffs->ev.waitq.lock. However, the functionfs_unbind isn't bounded so it can go ahead and mark the ep0req to NULL, and since there is no NULL check in ffs_ep0_queue_wait we will end up in use-after-free. Fix this by making a serialized execution between the two functions using a mutex_lock(ffs->mutex). Fixes: ddf8abd25994 ("USB: f_fs: the FunctionFS driver") Signed-off-by: Udipto Goswami Tested-by: Krishna Kurapati Link: https://lore.kernel.org/r/20221215052906.8993-2-quic_ugoswami@quicinc.com Signed-off-by: Greg Kroah-Hartman Bug: 265837309 (cherry picked from commit 6a19da111057f69214b97c62fb0ac59023970850 https://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git/ usb-linus) Change-Id: I1d71b96709a557caee72d1c84e3ee43969a89c13 Signed-off-by: Prashanth K --- drivers/usb/gadget/function/f_fs.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index ecf298bc49ed..42a37a803849 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -279,6 +279,9 @@ static int __ffs_ep0_queue_wait(struct ffs_data *ffs, char *data, size_t len) struct usb_request *req = ffs->ep0req; int ret; + if (!req) + return -EINVAL; + req->zero = len < le16_to_cpu(ffs->ev.setup.wLength); spin_unlock_irq(&ffs->ev.waitq.lock); @@ -1891,10 +1894,12 @@ static void functionfs_unbind(struct ffs_data *ffs) ENTER(); if (!WARN_ON(!ffs->gadget)) { + mutex_lock(&ffs->mutex); usb_ep_free_request(ffs->gadget->ep0, ffs->ep0req); ffs->ep0req = NULL; ffs->gadget = NULL; clear_bit(FFS_FL_BOUND, &ffs->flags); + mutex_unlock(&ffs->mutex); ffs_data_put(ffs); } } From 56583c9472979ca4388313085f921f79047b51d8 Mon Sep 17 00:00:00 2001 From: Udipto Goswami Date: Thu, 15 Dec 2022 10:59:06 +0530 Subject: [PATCH 1323/1477] UPSTREAM: usb: gadget: f_fs: Ensure ep0req is dequeued before free_request As per the documentation, function usb_ep_free_request guarantees the request will not be queued or no longer be re-queued (or otherwise used). However, with the current implementation it doesn't make sure that the request in ep0 isn't reused. Fix this by dequeuing the ep0req on functionfs_unbind before freeing the request to align with the definition. Fixes: ddf8abd25994 ("USB: f_fs: the FunctionFS driver") Signed-off-by: Udipto Goswami Tested-by: Krishna Kurapati Link: https://lore.kernel.org/r/20221215052906.8993-3-quic_ugoswami@quicinc.com Signed-off-by: Greg Kroah-Hartman Bug: 265837309 (cherry picked from commit ce405d561b020e5a46340eb5146805a625dcacee https://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git/ usb-linus) Change-Id: Idbc5f13cedac59cbc893d229383b01662f4e4a03 Signed-off-by: Prashanth K --- drivers/usb/gadget/function/f_fs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 42a37a803849..f49f8706fbd3 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -1894,6 +1894,8 @@ static void functionfs_unbind(struct ffs_data *ffs) ENTER(); if (!WARN_ON(!ffs->gadget)) { + /* dequeue before freeing ep0req */ + usb_ep_dequeue(ffs->gadget->ep0, ffs->ep0req); mutex_lock(&ffs->mutex); usb_ep_free_request(ffs->gadget->ep0, ffs->ep0req); ffs->ep0req = NULL; From 3519fc246df0ea4f3f53053b41213591d5490313 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Fri, 18 Feb 2022 17:29:44 -0800 Subject: [PATCH 1324/1477] BACKPORT: kasan: split kasan_*enabled() functions into a separate header In an upcoming commit we are going to need to call kasan_hw_tags_enabled() from arch/arm64/include/asm/mte.h. This would create a circular dependency between headers if KASAN_GENERIC or KASAN_SW_TAGS is enabled: linux/kasan.h -> linux/pgtable.h -> asm/pgtable.h -> asm/mte.h -> linux/kasan.h. Break the cycle by introducing a new header linux/kasan-enabled.h with the kasan_*enabled() functions that can be included from asm/mte.h. Link: https://linux-review.googlesource.com/id/I5b0d96c6ed0026fc790899e14d42b2fac6ab568e Signed-off-by: Peter Collingbourne Reviewed-by: Andrey Konovalov Link: https://lore.kernel.org/r/20220219012945.894950-1-pcc@google.com Signed-off-by: Will Deacon Change-Id: I984365534f5ed76ed0ff79fce62b09afbbc92982 Bug: 265364138 (cherry picked from commit f9b5e46f4097eb298f68e5b02f70697a90a44739) [Zhenhua: Resolved minor conflicts in include/linux/kasan.h] Change-Id: I631ae7d32ed560dd22917a4691c498e234b4f51d Signed-off-by: Jaewon Kim Signed-off-by: Zhenhua Huang --- include/linux/kasan-enabled.h | 33 +++++++++++++++++++++++++++++++++ include/linux/kasan.h | 24 ++---------------------- 2 files changed, 35 insertions(+), 22 deletions(-) create mode 100644 include/linux/kasan-enabled.h diff --git a/include/linux/kasan-enabled.h b/include/linux/kasan-enabled.h new file mode 100644 index 000000000000..4b6615375022 --- /dev/null +++ b/include/linux/kasan-enabled.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_KASAN_ENABLED_H +#define _LINUX_KASAN_ENABLED_H + +#ifdef CONFIG_KASAN_HW_TAGS + +DECLARE_STATIC_KEY_FALSE(kasan_flag_enabled); + +static __always_inline bool kasan_enabled(void) +{ + return static_branch_likely(&kasan_flag_enabled); +} + +static inline bool kasan_hw_tags_enabled(void) +{ + return kasan_enabled(); +} + +#else /* CONFIG_KASAN_HW_TAGS */ + +static inline bool kasan_enabled(void) +{ + return IS_ENABLED(CONFIG_KASAN); +} + +static inline bool kasan_hw_tags_enabled(void) +{ + return false; +} + +#endif /* CONFIG_KASAN_HW_TAGS */ + +#endif /* LINUX_KASAN_ENABLED_H */ diff --git a/include/linux/kasan.h b/include/linux/kasan.h index b59675cc19b7..99dc14b5ad44 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -3,6 +3,8 @@ #define _LINUX_KASAN_H #include +#include +#include #include #include @@ -82,33 +84,11 @@ static inline void kasan_disable_current(void) {} #ifdef CONFIG_KASAN_HW_TAGS -DECLARE_STATIC_KEY_FALSE(kasan_flag_enabled); - -static __always_inline bool kasan_enabled(void) -{ - return static_branch_likely(&kasan_flag_enabled); -} - -static inline bool kasan_hw_tags_enabled(void) -{ - return kasan_enabled(); -} - void kasan_alloc_pages(struct page *page, unsigned int order, gfp_t flags); void kasan_free_pages(struct page *page, unsigned int order); #else /* CONFIG_KASAN_HW_TAGS */ -static inline bool kasan_enabled(void) -{ - return IS_ENABLED(CONFIG_KASAN); -} - -static inline bool kasan_hw_tags_enabled(void) -{ - return false; -} - static __always_inline void kasan_alloc_pages(struct page *page, unsigned int order, gfp_t flags) { From ce6cc743caf2cbfbc3f9747cfe2de84f0f527466 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Tue, 1 Mar 2022 15:45:18 +0000 Subject: [PATCH 1325/1477] UPSTREAM: kasan: fix a missing header include of static_keys.h The kasan-enabled.h header relies on static keys, so make sure to include the header to avoid compilation errors (with JUMP_LABEL=n). It fixes the following: ./include/linux/kasan-enabled.h:9:1: warning: data definition has no type or storage class 9 | DECLARE_STATIC_KEY_FALSE(kasan_flag_enabled); | ^~~~~~~~~~~~~~~~~~~~~~~~ error: type defaults to 'int' in declaration of 'DECLARE_STATIC_KEY_FALSE' [-Werror=implicit-int] Fixes: f9b5e46f4097eb29 ("kasan: split kasan_*enabled() functions into a separate header") Cc: Peter Collingbourne Cc: Mark Rutland Cc: Catalin Marinas Cc: Will Deacon Acked-by: Andrey Konovalov Signed-off-by: Joey Gouly Link: https://lore.kernel.org/r/20220301154518.19456-1-joey.gouly@arm.com Signed-off-by: Will Deacon Bug: 265364138 (cherry picked from commit d8fd5a1e78db375f2246d43df7833fec07a221cd) Change-Id: Id33a67919113839503630b7364af1bdea3cfcedf Signed-off-by: Jaewon Kim Signed-off-by: Zhenhua Huang --- include/linux/kasan-enabled.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/kasan-enabled.h b/include/linux/kasan-enabled.h index 4b6615375022..6f612d69ea0c 100644 --- a/include/linux/kasan-enabled.h +++ b/include/linux/kasan-enabled.h @@ -2,6 +2,8 @@ #ifndef _LINUX_KASAN_ENABLED_H #define _LINUX_KASAN_ENABLED_H +#include + #ifdef CONFIG_KASAN_HW_TAGS DECLARE_STATIC_KEY_FALSE(kasan_flag_enabled); From b10e8ec405d8fd60fd4cc80b9108a758b196a0c2 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Mon, 9 May 2022 18:20:53 -0700 Subject: [PATCH 1326/1477] BACKPORT: printk: stop including cache.h from printk.h An inclusion of cache.h in printk.h was added in 2014 in commit c28aa1f0a847 ("printk/cache: mark printk_once test variable __read_mostly") in order to bring in the definition of __read_mostly. The usage of __read_mostly was later removed in commit 3ec25826ae33 ("printk: Tie printk_once / printk_deferred_once into .data.once for reset") which made the inclusion of cache.h unnecessary, so remove it. We have a small amount of code that depended on the inclusion of cache.h from printk.h; fix that code to include the appropriate header. This fixes a circular inclusion on arm64 (linux/printk.h -> linux/cache.h -> asm/cache.h -> linux/kasan-enabled.h -> linux/static_key.h -> linux/jump_label.h -> linux/bug.h -> asm/bug.h -> linux/printk.h) that would otherwise be introduced by the next patch. Build tested using {allyesconfig,defconfig} x {arm64,x86_64}. Link: https://linux-review.googlesource.com/id/I8fd51f72c9ef1f2d6afd3b2cbc875aa4792c1fba Link: https://lkml.kernel.org/r/20220427195820.1716975-1-pcc@google.com Signed-off-by: Peter Collingbourne Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Catalin Marinas Cc: David Rientjes Cc: Dmitry Vyukov Cc: Eric W. Biederman Cc: Herbert Xu Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Joonsoo Kim Cc: Kees Cook Cc: Pekka Enberg Cc: Roman Gushchin Cc: Vlastimil Babka Signed-off-by: Andrew Morton Bug: 265364138 (cherry picked from commit 534aa1dc975ac883ad89110534585a96630802a0) [Zhenhua: Resolved minor compile issue in include/linux/debug_locks.h] Change-Id: I46182e781b64561a1ebd5405628a317d4f6cb789 Signed-off-by: Jaewon Kim Signed-off-by: Zhenhua Huang --- arch/arm64/include/asm/mte-kasan.h | 1 + arch/arm64/include/asm/percpu.h | 1 + arch/csky/include/asm/processor.h | 2 +- include/linux/debug_locks.h | 1 + include/linux/jump_label.h | 2 ++ include/linux/printk.h | 1 - kernel/bpf/bpf_lru_list.h | 1 + 7 files changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/mte-kasan.h b/arch/arm64/include/asm/mte-kasan.h index 30ef96cc168e..6537d2deb728 100644 --- a/arch/arm64/include/asm/mte-kasan.h +++ b/arch/arm64/include/asm/mte-kasan.h @@ -6,6 +6,7 @@ #define __ASM_MTE_KASAN_H #include +#include #include #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h index 8f1661603b78..b9ba19dbdb69 100644 --- a/arch/arm64/include/asm/percpu.h +++ b/arch/arm64/include/asm/percpu.h @@ -10,6 +10,7 @@ #include #include #include +#include static inline void set_my_cpu_offset(unsigned long off) { diff --git a/arch/csky/include/asm/processor.h b/arch/csky/include/asm/processor.h index 4800f6563abb..baf9388d5952 100644 --- a/arch/csky/include/asm/processor.h +++ b/arch/csky/include/asm/processor.h @@ -6,9 +6,9 @@ #include #include +#include #include #include -#include #include #include #include diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h index edb5c186b0b7..ad06852cbd5d 100644 --- a/include/linux/debug_locks.h +++ b/include/linux/debug_locks.h @@ -5,6 +5,7 @@ #include #include #include +#include struct task_struct; diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index e203fdb020c4..a1b067f0c6ad 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -190,6 +190,8 @@ struct module; #ifdef BUILD_FIPS140_KO +#include + static inline int static_key_count(struct static_key *key) { return atomic_read(&key->enabled); diff --git a/include/linux/printk.h b/include/linux/printk.h index f589b8b60806..14d13ecaa8f8 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -6,7 +6,6 @@ #include #include #include -#include #include extern const char linux_banner[]; diff --git a/kernel/bpf/bpf_lru_list.h b/kernel/bpf/bpf_lru_list.h index 6b12f06ee18c..4ea227c9c1ad 100644 --- a/kernel/bpf/bpf_lru_list.h +++ b/kernel/bpf/bpf_lru_list.h @@ -4,6 +4,7 @@ #ifndef __BPF_LRU_LIST_H_ #define __BPF_LRU_LIST_H_ +#include #include #include From ca53b8f1b44ac5a5a9eec34e4c88c8c2cba0d49d Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Mon, 9 May 2022 18:20:53 -0700 Subject: [PATCH 1327/1477] BACKPORT: mm: make minimum slab alignment a runtime property When CONFIG_KASAN_HW_TAGS is enabled we currently increase the minimum slab alignment to 16. This happens even if MTE is not supported in hardware or disabled via kasan=off, which creates an unnecessary memory overhead in those cases. Eliminate this overhead by making the minimum slab alignment a runtime property and only aligning to 16 if KASAN is enabled at runtime. On a DragonBoard 845c (non-MTE hardware) with a kernel built with CONFIG_KASAN_HW_TAGS, waiting for quiescence after a full Android boot I see the following Slab measurements in /proc/meminfo (median of 3 reboots): Before: 169020 kB After: 167304 kB [akpm@linux-foundation.org: make slab alignment type `unsigned int' to avoid casting] Link: https://linux-review.googlesource.com/id/I752e725179b43b144153f4b6f584ceb646473ead Link: https://lkml.kernel.org/r/20220427195820.1716975-2-pcc@google.com Signed-off-by: Peter Collingbourne Reviewed-by: Andrey Konovalov Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Tested-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Acked-by: David Rientjes Reviewed-by: Catalin Marinas Acked-by: Vlastimil Babka Cc: Pekka Enberg Cc: Roman Gushchin Cc: Joonsoo Kim Cc: Herbert Xu Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Eric W. Biederman Cc: Kees Cook Signed-off-by: Andrew Morton Bug: 265364138 (cherry picked from commit d949a8155d139aa890795b802004a196b7f00598) [Zhenhua: fold 587cfd8e66df3515 ("ANDROID: fix alignment of struct shash_desc member") into this change, to keep ABI compatibility] Change-Id: I3749f8de65ef3619724e68a9affb4eefd1ebe737 Signed-off-by: Jaewon Kim Signed-off-by: Zhenhua Huang --- arch/arm64/include/asm/cache.h | 19 +++++++++++++------ include/crypto/hash.h | 2 +- include/linux/slab.h | 12 ++++++++++++ mm/slab.c | 7 +++---- mm/slab_common.c | 3 +-- mm/slob.c | 16 +++++++++++----- 6 files changed, 41 insertions(+), 18 deletions(-) diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index a074459f8f2f..7c2181c72116 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -6,6 +6,7 @@ #define __ASM_CACHE_H #include +#include #define CTR_L1IP_SHIFT 14 #define CTR_L1IP_MASK 3 @@ -49,15 +50,21 @@ */ #define ARCH_DMA_MINALIGN (128) -#ifdef CONFIG_KASAN_SW_TAGS -#define ARCH_SLAB_MINALIGN (1ULL << KASAN_SHADOW_SCALE_SHIFT) -#elif defined(CONFIG_KASAN_HW_TAGS) -#define ARCH_SLAB_MINALIGN MTE_GRANULE_SIZE -#endif - #ifndef __ASSEMBLY__ #include +#include + +#ifdef CONFIG_KASAN_SW_TAGS +#define ARCH_SLAB_MINALIGN (1ULL << KASAN_SHADOW_SCALE_SHIFT) +#elif defined(CONFIG_KASAN_HW_TAGS) +static inline unsigned int arch_slab_minalign(void) +{ + return kasan_hw_tags_enabled() ? MTE_GRANULE_SIZE : + __alignof__(unsigned long long); +} +#define arch_slab_minalign() arch_slab_minalign() +#endif #define ICACHEF_ALIASING 0 #define ICACHEF_VPIPT 1 diff --git a/include/crypto/hash.h b/include/crypto/hash.h index b2bc1e46e86a..14d75caa233d 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -149,7 +149,7 @@ struct ahash_alg { struct shash_desc { struct crypto_shash *tfm; - void *__ctx[] __aligned(ARCH_SLAB_MINALIGN); + void *__ctx[] __aligned(UL(16)); }; #define HASH_MAX_DIGESTSIZE 64 diff --git a/include/linux/slab.h b/include/linux/slab.h index dd6897f62010..36217130c3ae 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -217,6 +217,18 @@ static inline void __check_heap_object(const void *ptr, unsigned long n, #define ARCH_SLAB_MINALIGN __alignof__(unsigned long long) #endif +/* + * Arches can define this function if they want to decide the minimum slab + * alignment at runtime. The value returned by the function must be a power + * of two and >= ARCH_SLAB_MINALIGN. + */ +#ifndef arch_slab_minalign +static inline unsigned int arch_slab_minalign(void) +{ + return ARCH_SLAB_MINALIGN; +} +#endif + /* * kmalloc and friends return ARCH_KMALLOC_MINALIGN aligned * pointers. kmem_cache_alloc and friends return ARCH_SLAB_MINALIGN diff --git a/mm/slab.c b/mm/slab.c index 731b868a65c8..aa4ef18ddfb6 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3016,10 +3016,9 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, objp += obj_offset(cachep); if (cachep->ctor && cachep->flags & SLAB_POISON) cachep->ctor(objp); - if (ARCH_SLAB_MINALIGN && - ((unsigned long)objp & (ARCH_SLAB_MINALIGN-1))) { - pr_err("0x%px: not aligned to ARCH_SLAB_MINALIGN=%d\n", - objp, (int)ARCH_SLAB_MINALIGN); + if ((unsigned long)objp & (arch_slab_minalign() - 1)) { + pr_err("0x%px: not aligned to arch_slab_minalign()=%u\n", objp, + arch_slab_minalign()); } return objp; } diff --git a/mm/slab_common.c b/mm/slab_common.c index c751b18f7e60..05135ebb6159 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -155,8 +155,7 @@ static unsigned int calculate_alignment(slab_flags_t flags, align = max(align, ralign); } - if (align < ARCH_SLAB_MINALIGN) - align = ARCH_SLAB_MINALIGN; + align = max(align, arch_slab_minalign()); return ALIGN(align, sizeof(void *)); } diff --git a/mm/slob.c b/mm/slob.c index 7cc9805c8091..37072a7d2f42 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -469,9 +469,11 @@ static __always_inline void * __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller) { unsigned int *m; - int minalign = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); + unsigned int minalign; void *ret; + minalign = max_t(unsigned int, ARCH_KMALLOC_MINALIGN, + arch_slab_minalign()); gfp &= gfp_allowed_mask; fs_reclaim_acquire(gfp); @@ -485,7 +487,7 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller) * kmalloc()'d objects. */ if (is_power_of_2(size)) - align = max(minalign, (int) size); + align = max_t(unsigned int, minalign, size); if (!size) return ZERO_SIZE_PTR; @@ -547,8 +549,11 @@ void kfree(const void *block) sp = virt_to_page(block); if (PageSlab(sp)) { - int align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); + unsigned int align = max_t(unsigned int, + ARCH_KMALLOC_MINALIGN, + arch_slab_minalign()); unsigned int *m = (unsigned int *)(block - align); + slob_free(m, *m + align); } else { unsigned int order = compound_order(sp); @@ -564,7 +569,7 @@ EXPORT_SYMBOL(kfree); size_t __ksize(const void *block) { struct page *sp; - int align; + unsigned int align; unsigned int *m; BUG_ON(!block); @@ -575,7 +580,8 @@ size_t __ksize(const void *block) if (unlikely(!PageSlab(sp))) return page_size(sp); - align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); + align = max_t(unsigned int, ARCH_KMALLOC_MINALIGN, + arch_slab_minalign()); m = (unsigned int *)(block - align); return SLOB_UNITS(*m) * SLOB_UNIT; } From e12e360999d84e5e56952c6c027803f662567d76 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 18 Jan 2023 15:49:24 +0000 Subject: [PATCH 1328/1477] ANDROID: usb: f_accessory: Check buffer size when initialised via composite When communicating with accessory devices via USBFS, the initialisation call-stack looks like: ConfigFS > Gadget ConfigFS > UDC > Gadget ConfigFS > Composite Eventually ending up in composite_dev_prepare() where memory for the data buffer is allocated and initialised. The default size used for the allocation is USB_COMP_EP0_BUFSIZ (4k). When handling bulk transfers, acc_ctrlrequest() needs to be able to handle buffers up to BULK_BUFFER_SIZE (16k). Instead of adding new generic attributes to 'struct usb_request' to track the size of the allocated buffer, we can simply split off the affected thread of execution to travel via a knowledgeable abstracted function acc_ctrlrequest_composite() where we can complete the necessary specific checks. Bug: 264029575 Signed-off-by: Lee Jones Change-Id: Ia1280f85499621d3fa57f7262b4a2c80f4be7773 --- drivers/usb/gadget/configfs.c | 4 ++-- drivers/usb/gadget/function/f_accessory.c | 20 ++++++++++++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index 21dd268175c6..b6f86463a26e 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -16,7 +16,7 @@ #include #ifdef CONFIG_USB_CONFIGFS_F_ACC -extern int acc_ctrlrequest(struct usb_composite_dev *cdev, +extern int acc_ctrlrequest_composite(struct usb_composite_dev *cdev, const struct usb_ctrlrequest *ctrl); void acc_disconnect(void); #endif @@ -1572,7 +1572,7 @@ static int android_setup(struct usb_gadget *gadget, #ifdef CONFIG_USB_CONFIGFS_F_ACC if (value < 0) - value = acc_ctrlrequest(cdev, c); + value = acc_ctrlrequest_composite(cdev, c); #endif if (value < 0) diff --git a/drivers/usb/gadget/function/f_accessory.c b/drivers/usb/gadget/function/f_accessory.c index 3510f6d39f0c..7d35d6c58e47 100644 --- a/drivers/usb/gadget/function/f_accessory.c +++ b/drivers/usb/gadget/function/f_accessory.c @@ -1085,6 +1085,26 @@ err: } EXPORT_SYMBOL_GPL(acc_ctrlrequest); +int acc_ctrlrequest_composite(struct usb_composite_dev *cdev, + const struct usb_ctrlrequest *ctrl) +{ + u16 w_length = le16_to_cpu(ctrl->wLength); + + if (w_length > USB_COMP_EP0_BUFSIZ) { + if (ctrl->bRequestType & USB_DIR_IN) { + /* Cast away the const, we are going to overwrite on purpose. */ + __le16 *temp = (__le16 *)&ctrl->wLength; + + *temp = cpu_to_le16(USB_COMP_EP0_BUFSIZ); + w_length = USB_COMP_EP0_BUFSIZ; + } else { + return -EINVAL; + } + } + return acc_ctrlrequest(cdev, ctrl); +} +EXPORT_SYMBOL_GPL(acc_ctrlrequest_composite); + static int __acc_function_bind(struct usb_configuration *c, struct usb_function *f, bool configfs) From 75d066563906b46e5960369be698032df1dea03a Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 5 Nov 2021 16:50:45 +0000 Subject: [PATCH 1329/1477] BACKPORT: arm64/bpf: Remove 128MB limit for BPF JIT programs (Backport: resolve conflicts due to missing f4693c2716b35 and also drop in_bpf_jit from fixup_exception the same way 5.15 backport 9c82ce593626 does it.) Commit 91fc957c9b1d ("arm64/bpf: don't allocate BPF JIT programs in module memory") restricts BPF JIT program allocation to a 128MB region to ensure BPF programs are still in branching range of each other. However this restriction should not apply to the aarch64 JIT, since BPF_JMP | BPF_CALL are implemented as a 64-bit move into a register and then a BLR instruction - which has the effect of being able to call anything without proximity limitation. The practical reason to relax this restriction on JIT memory is that 128MB of JIT memory can be quickly exhausted, especially where PAGE_SIZE is 64KB - one page is needed per program. In cases where seccomp filters are applied to multiple VMs on VM launch - such filters are classic BPF but converted to BPF - this can severely limit the number of VMs that can be launched. In a world where we support BPF JIT always on, turning off the JIT isn't always an option either. Fixes: 91fc957c9b1d ("arm64/bpf: don't allocate BPF JIT programs in module memory") Suggested-by: Ard Biesheuvel Signed-off-by: Russell King Signed-off-by: Daniel Borkmann Tested-by: Alan Maguire Link: https://lore.kernel.org/bpf/1636131046-5982-2-git-send-email-alan.maguire@oracle.com (cherry picked from commit b89ddf4cca43f1269093942cf5c4e457fd45c335) Bug: 252919296 Change-Id: Iec7d0b2bba001df94c2e21fcd5883ff002111cd5 Signed-off-by: Andrey Konovalov --- arch/arm64/include/asm/extable.h | 9 --------- arch/arm64/include/asm/memory.h | 5 +---- arch/arm64/kernel/traps.c | 2 +- arch/arm64/mm/extable.c | 13 +++++++++---- arch/arm64/mm/ptdump.c | 2 -- arch/arm64/net/bpf_jit_comp.c | 7 ++----- 6 files changed, 13 insertions(+), 25 deletions(-) diff --git a/arch/arm64/include/asm/extable.h b/arch/arm64/include/asm/extable.h index b15eb4a3e6b2..840a35ed92ec 100644 --- a/arch/arm64/include/asm/extable.h +++ b/arch/arm64/include/asm/extable.h @@ -22,15 +22,6 @@ struct exception_table_entry #define ARCH_HAS_RELATIVE_EXTABLE -static inline bool in_bpf_jit(struct pt_regs *regs) -{ - if (!IS_ENABLED(CONFIG_BPF_JIT)) - return false; - - return regs->pc >= BPF_JIT_REGION_START && - regs->pc < BPF_JIT_REGION_END; -} - #ifdef CONFIG_BPF_JIT int arm64_bpf_fixup_exception(const struct exception_table_entry *ex, struct pt_regs *regs); diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index b864fdb79df0..a1f0e752d1cd 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -44,11 +44,8 @@ #define _PAGE_OFFSET(va) (-(UL(1) << (va))) #define PAGE_OFFSET (_PAGE_OFFSET(VA_BITS)) #define KIMAGE_VADDR (MODULES_END) -#define BPF_JIT_REGION_START (KASAN_SHADOW_END) -#define BPF_JIT_REGION_SIZE (SZ_128M) -#define BPF_JIT_REGION_END (BPF_JIT_REGION_START + BPF_JIT_REGION_SIZE) #define MODULES_END (MODULES_VADDR + MODULES_VSIZE) -#define MODULES_VADDR (BPF_JIT_REGION_END) +#define MODULES_VADDR (KASAN_SHADOW_END) #define MODULES_VSIZE (SZ_128M) #define VMEMMAP_START (-VMEMMAP_SIZE - SZ_2M) #define VMEMMAP_END (VMEMMAP_START + VMEMMAP_SIZE) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index acc7e7647cbb..a6ba4362e36d 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -931,7 +931,7 @@ static struct break_hook bug_break_hook = { static int reserved_fault_handler(struct pt_regs *regs, unsigned int esr) { pr_err("%s generated an invalid instruction at %pS!\n", - in_bpf_jit(regs) ? "BPF JIT" : "Kernel text patching", + "Kernel text patching", (void *)instruction_pointer(regs)); /* We cannot handle this */ diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c index aa0060178343..60a8b6a8a42b 100644 --- a/arch/arm64/mm/extable.c +++ b/arch/arm64/mm/extable.c @@ -9,14 +9,19 @@ int fixup_exception(struct pt_regs *regs) { const struct exception_table_entry *fixup; + unsigned long addr; - fixup = search_exception_tables(instruction_pointer(regs)); + addr = instruction_pointer(regs); + + /* Search the BPF tables first, these are formatted differently */ + fixup = search_bpf_extables(addr); + if (fixup) + return arm64_bpf_fixup_exception(fixup, regs); + + fixup = search_exception_tables(addr); if (!fixup) return 0; - if (in_bpf_jit(regs)) - return arm64_bpf_fixup_exception(fixup, regs); - regs->pc = (unsigned long)&fixup->fixup + fixup->fixup; return 1; } diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c index 04137a8f3d2d..bf8ddeac5d8f 100644 --- a/arch/arm64/mm/ptdump.c +++ b/arch/arm64/mm/ptdump.c @@ -41,8 +41,6 @@ static struct addr_marker address_markers[] = { { 0 /* KASAN_SHADOW_START */, "Kasan shadow start" }, { KASAN_SHADOW_END, "Kasan shadow end" }, #endif - { BPF_JIT_REGION_START, "BPF start" }, - { BPF_JIT_REGION_END, "BPF end" }, { MODULES_VADDR, "Modules start" }, { MODULES_END, "Modules end" }, { VMALLOC_START, "vmalloc() area" }, diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 9097e90d07e2..86a252da69ab 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -1148,15 +1148,12 @@ out: u64 bpf_jit_alloc_exec_limit(void) { - return BPF_JIT_REGION_SIZE; + return VMALLOC_END - VMALLOC_START; } void *bpf_jit_alloc_exec(unsigned long size) { - return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START, - BPF_JIT_REGION_END, GFP_KERNEL, - PAGE_KERNEL, 0, NUMA_NO_NODE, - __builtin_return_address(0)); + return vmalloc(size); } void bpf_jit_free_exec(void *addr) From d30de909327e561759a700c4c13c53bc16f89dc3 Mon Sep 17 00:00:00 2001 From: Steve Muckle Date: Tue, 7 Sep 2021 20:53:43 +0000 Subject: [PATCH 1330/1477] ANDROID: add TEST_MAPPING for net/, include/net Run CtsNetTestCases in presubmit for changes in net/ and include/net/. Bug: 186664401 Change-Id: I3cf942bd0418ad55a6559d6933927b0da86da595 Signed-off-by: Steve Muckle (cherry picked from commit 55415d7022a8835d8979e09a74da3df29c9091ec) --- include/net/TEST_MAPPING | 7 +++++++ net/TEST_MAPPING | 7 +++++++ 2 files changed, 14 insertions(+) create mode 100644 include/net/TEST_MAPPING create mode 100644 net/TEST_MAPPING diff --git a/include/net/TEST_MAPPING b/include/net/TEST_MAPPING new file mode 100644 index 000000000000..c8614a6ad3d2 --- /dev/null +++ b/include/net/TEST_MAPPING @@ -0,0 +1,7 @@ +{ + "presubmit": [ + { + "name": "CtsNetTestCases" + } + ] +} diff --git a/net/TEST_MAPPING b/net/TEST_MAPPING new file mode 100644 index 000000000000..c8614a6ad3d2 --- /dev/null +++ b/net/TEST_MAPPING @@ -0,0 +1,7 @@ +{ + "presubmit": [ + { + "name": "CtsNetTestCases" + } + ] +} From 283eb356fdb026a43e44036d2a3f6d7a5071450d Mon Sep 17 00:00:00 2001 From: Dezhi Huang Date: Tue, 27 Dec 2022 10:59:11 +0800 Subject: [PATCH 1331/1477] ANDROID: GKI: add symbol list file for honor add abi_gki_aarch64_honor Leaf changes summary: 2 artifacts changed Changed leaf types summary: 0 leaf type changed Removed/Changed/Added functions summary: 0 Removed, 0 Changed, 1 Added functions Removed/Changed/Added variables summary: 0 Removed, 0 Changed, 1 Added variables 1 Added functions: [A] 'function int __traceiter_android_rvh_dma_buf_stats_teardown(void*, dma_buf_sysfs_entry*, bool*)' 1 Added variables: [A] 'tracepoint __tracepoint_android_rvh_dma_buf_stats_teardown' Bug: 254192604 Change-Id: I1bb1c6be5c3d5da56563f668132d18184be094eb Signed-off-by: Dezhi Huang Signed-off-by: Giuliano Procida --- android/abi_gki_aarch64.xml | 9 +++++++++ android/abi_gki_aarch64_honor | 3 +++ build.config.gki.aarch64 | 1 + 3 files changed, 13 insertions(+) create mode 100644 android/abi_gki_aarch64_honor diff --git a/android/abi_gki_aarch64.xml b/android/abi_gki_aarch64.xml index 9fb761ab3b74..e5beb2b22443 100644 --- a/android/abi_gki_aarch64.xml +++ b/android/abi_gki_aarch64.xml @@ -322,6 +322,7 @@ + @@ -6310,6 +6311,7 @@ + @@ -118047,6 +118049,12 @@ + + + + + + @@ -120626,6 +120634,7 @@ + diff --git a/android/abi_gki_aarch64_honor b/android/abi_gki_aarch64_honor new file mode 100644 index 000000000000..a1c85ea34e54 --- /dev/null +++ b/android/abi_gki_aarch64_honor @@ -0,0 +1,3 @@ +[abi_symbol_list] + __traceiter_android_rvh_dma_buf_stats_teardown + __tracepoint_android_rvh_dma_buf_stats_teardown diff --git a/build.config.gki.aarch64 b/build.config.gki.aarch64 index 70f439aacafe..6486341b9aef 100644 --- a/build.config.gki.aarch64 +++ b/build.config.gki.aarch64 @@ -19,6 +19,7 @@ android/abi_gki_aarch64_fips140 android/abi_gki_aarch64_galaxy android/abi_gki_aarch64_generic android/abi_gki_aarch64_hikey960 +android/abi_gki_aarch64_honor android/abi_gki_aarch64_imx android/abi_gki_aarch64_lenovo android/abi_gki_aarch64_mtk From e973de77ad37a8506e36dad6e8a6689c98078504 Mon Sep 17 00:00:00 2001 From: Maxim Devaev Date: Sat, 21 Aug 2021 16:40:04 +0300 Subject: [PATCH 1332/1477] UPSTREAM: usb: gadget: f_hid: optional SETUP/SET_REPORT mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d7428bc26fc767942c38d74b80299bcd4f01e7cb ] f_hid provides the OUT Endpoint as only way for receiving reports from the host. SETUP/SET_REPORT method is not supported, and this causes a number of compatibility problems with various host drivers, especially in the case of keyboard emulation using f_hid. - Some hosts do not support the OUT Endpoint and ignore it, so it becomes impossible for the gadget to receive a report from the host. In the case of a keyboard, the gadget loses the ability to receive the status of the LEDs. - Some BIOSes/UEFIs can't work with HID devices with the OUT Endpoint at all. This may be due to their bugs or incomplete implementation of the HID standard. For example, absolutely all Apple UEFIs can't handle the OUT Endpoint if it goes after IN Endpoint in the descriptor and require the reverse order (OUT, IN) which is a violation of the standard. Other hosts either do not initialize gadgets with a descriptor containing the OUT Endpoint completely (like some HP and DELL BIOSes and embedded firmwares like on KVM switches), or initialize them, but will not poll the IN Endpoint. This patch adds configfs option no_out_endpoint=1 to disable the OUT Endpoint and allows f_hid to receive reports from the host via SETUP/SET_REPORT. Previously, there was such a feature in f_hid, but it was replaced by the OUT Endpoint [1] in the commit 99c515005857 ("usb: gadget: hidg: register OUT INT endpoint for SET_REPORT"). So this patch actually returns the removed functionality while making it optional. For backward compatibility reasons, the OUT Endpoint mode remains the default behaviour. - The OUT Endpoint mode provides the report queue and reduces USB overhead (eliminating SETUP routine) on transmitting a report from the host. - If the SETUP/SET_REPORT mode is used, there is no report queue, so the userspace will only read last report. For classic HID devices like keyboards this is not a problem, since it's intended to transmit the status of the LEDs and only the last report is important. This mode provides better compatibility with strange and buggy host drivers. Both modes passed USBCV tests. Checking with the USB protocol analyzer also confirmed that everything is working as it should and the new mode ensures operability in all of the described cases. Bug: 176850153 Link: https://www.spinics.net/lists/linux-usb/msg65494.html [1] Reviewed-by: Maciej Żenczykowski Acked-by: Felipe Balbi Signed-off-by: Maxim Devaev Link: https://lore.kernel.org/r/20210821134004.363217-1-mdevaev@gmail.com Signed-off-by: Greg Kroah-Hartman Stable-dep-of: 89ff3dfac604 ("usb: gadget: f_hid: fix f_hidg lifetime vs cdev") Signed-off-by: Sasha Levin Signed-off-by: Lee Jones Change-Id: I0f5759015f17661fe4fe23a1216f1047978c79b1 Signed-off-by: Lee Jones --- drivers/usb/gadget/function/f_hid.c | 220 +++++++++++++++++++++++----- drivers/usb/gadget/function/u_hid.h | 1 + 2 files changed, 188 insertions(+), 33 deletions(-) diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c index 6742271cd6e6..8cb199f52b52 100644 --- a/drivers/usb/gadget/function/f_hid.c +++ b/drivers/usb/gadget/function/f_hid.c @@ -45,12 +45,25 @@ struct f_hidg { unsigned short report_desc_length; char *report_desc; unsigned short report_length; + /* + * use_out_ep - if true, the OUT Endpoint (interrupt out method) + * will be used to receive reports from the host + * using functions with the "intout" suffix. + * Otherwise, the OUT Endpoint will not be configured + * and the SETUP/SET_REPORT method ("ssreport" suffix) + * will be used to receive reports. + */ + bool use_out_ep; /* recv report */ - struct list_head completed_out_req; spinlock_t read_spinlock; wait_queue_head_t read_queue; + /* recv report - interrupt out only (use_out_ep == 1) */ + struct list_head completed_out_req; unsigned int qlen; + /* recv report - setup set_report only (use_out_ep == 0) */ + char *set_report_buf; + unsigned int set_report_length; /* send report */ spinlock_t write_spinlock; @@ -79,7 +92,7 @@ static struct usb_interface_descriptor hidg_interface_desc = { .bDescriptorType = USB_DT_INTERFACE, /* .bInterfaceNumber = DYNAMIC */ .bAlternateSetting = 0, - .bNumEndpoints = 2, + /* .bNumEndpoints = DYNAMIC (depends on use_out_ep) */ .bInterfaceClass = USB_CLASS_HID, /* .bInterfaceSubClass = DYNAMIC */ /* .bInterfaceProtocol = DYNAMIC */ @@ -140,7 +153,7 @@ static struct usb_ss_ep_comp_descriptor hidg_ss_out_comp_desc = { /* .wBytesPerInterval = DYNAMIC */ }; -static struct usb_descriptor_header *hidg_ss_descriptors[] = { +static struct usb_descriptor_header *hidg_ss_descriptors_intout[] = { (struct usb_descriptor_header *)&hidg_interface_desc, (struct usb_descriptor_header *)&hidg_desc, (struct usb_descriptor_header *)&hidg_ss_in_ep_desc, @@ -150,6 +163,14 @@ static struct usb_descriptor_header *hidg_ss_descriptors[] = { NULL, }; +static struct usb_descriptor_header *hidg_ss_descriptors_ssreport[] = { + (struct usb_descriptor_header *)&hidg_interface_desc, + (struct usb_descriptor_header *)&hidg_desc, + (struct usb_descriptor_header *)&hidg_ss_in_ep_desc, + (struct usb_descriptor_header *)&hidg_ss_in_comp_desc, + NULL, +}; + /* High-Speed Support */ static struct usb_endpoint_descriptor hidg_hs_in_ep_desc = { @@ -176,7 +197,7 @@ static struct usb_endpoint_descriptor hidg_hs_out_ep_desc = { */ }; -static struct usb_descriptor_header *hidg_hs_descriptors[] = { +static struct usb_descriptor_header *hidg_hs_descriptors_intout[] = { (struct usb_descriptor_header *)&hidg_interface_desc, (struct usb_descriptor_header *)&hidg_desc, (struct usb_descriptor_header *)&hidg_hs_in_ep_desc, @@ -184,6 +205,13 @@ static struct usb_descriptor_header *hidg_hs_descriptors[] = { NULL, }; +static struct usb_descriptor_header *hidg_hs_descriptors_ssreport[] = { + (struct usb_descriptor_header *)&hidg_interface_desc, + (struct usb_descriptor_header *)&hidg_desc, + (struct usb_descriptor_header *)&hidg_hs_in_ep_desc, + NULL, +}; + /* Full-Speed Support */ static struct usb_endpoint_descriptor hidg_fs_in_ep_desc = { @@ -210,7 +238,7 @@ static struct usb_endpoint_descriptor hidg_fs_out_ep_desc = { */ }; -static struct usb_descriptor_header *hidg_fs_descriptors[] = { +static struct usb_descriptor_header *hidg_fs_descriptors_intout[] = { (struct usb_descriptor_header *)&hidg_interface_desc, (struct usb_descriptor_header *)&hidg_desc, (struct usb_descriptor_header *)&hidg_fs_in_ep_desc, @@ -218,6 +246,13 @@ static struct usb_descriptor_header *hidg_fs_descriptors[] = { NULL, }; +static struct usb_descriptor_header *hidg_fs_descriptors_ssreport[] = { + (struct usb_descriptor_header *)&hidg_interface_desc, + (struct usb_descriptor_header *)&hidg_desc, + (struct usb_descriptor_header *)&hidg_fs_in_ep_desc, + NULL, +}; + /*-------------------------------------------------------------------------*/ /* Strings */ @@ -241,8 +276,8 @@ static struct usb_gadget_strings *ct_func_strings[] = { /*-------------------------------------------------------------------------*/ /* Char Device */ -static ssize_t f_hidg_read(struct file *file, char __user *buffer, - size_t count, loff_t *ptr) +static ssize_t f_hidg_intout_read(struct file *file, char __user *buffer, + size_t count, loff_t *ptr) { struct f_hidg *hidg = file->private_data; struct f_hidg_req_list *list; @@ -255,15 +290,15 @@ static ssize_t f_hidg_read(struct file *file, char __user *buffer, spin_lock_irqsave(&hidg->read_spinlock, flags); -#define READ_COND (!list_empty(&hidg->completed_out_req)) +#define READ_COND_INTOUT (!list_empty(&hidg->completed_out_req)) /* wait for at least one buffer to complete */ - while (!READ_COND) { + while (!READ_COND_INTOUT) { spin_unlock_irqrestore(&hidg->read_spinlock, flags); if (file->f_flags & O_NONBLOCK) return -EAGAIN; - if (wait_event_interruptible(hidg->read_queue, READ_COND)) + if (wait_event_interruptible(hidg->read_queue, READ_COND_INTOUT)) return -ERESTARTSYS; spin_lock_irqsave(&hidg->read_spinlock, flags); @@ -313,6 +348,60 @@ static ssize_t f_hidg_read(struct file *file, char __user *buffer, return count; } +#define READ_COND_SSREPORT (hidg->set_report_buf != NULL) + +static ssize_t f_hidg_ssreport_read(struct file *file, char __user *buffer, + size_t count, loff_t *ptr) +{ + struct f_hidg *hidg = file->private_data; + char *tmp_buf = NULL; + unsigned long flags; + + if (!count) + return 0; + + spin_lock_irqsave(&hidg->read_spinlock, flags); + + while (!READ_COND_SSREPORT) { + spin_unlock_irqrestore(&hidg->read_spinlock, flags); + if (file->f_flags & O_NONBLOCK) + return -EAGAIN; + + if (wait_event_interruptible(hidg->read_queue, READ_COND_SSREPORT)) + return -ERESTARTSYS; + + spin_lock_irqsave(&hidg->read_spinlock, flags); + } + + count = min_t(unsigned int, count, hidg->set_report_length); + tmp_buf = hidg->set_report_buf; + hidg->set_report_buf = NULL; + + spin_unlock_irqrestore(&hidg->read_spinlock, flags); + + if (tmp_buf != NULL) { + count -= copy_to_user(buffer, tmp_buf, count); + kfree(tmp_buf); + } else { + count = -ENOMEM; + } + + wake_up(&hidg->read_queue); + + return count; +} + +static ssize_t f_hidg_read(struct file *file, char __user *buffer, + size_t count, loff_t *ptr) +{ + struct f_hidg *hidg = file->private_data; + + if (hidg->use_out_ep) + return f_hidg_intout_read(file, buffer, count, ptr); + else + return f_hidg_ssreport_read(file, buffer, count, ptr); +} + static void f_hidg_req_complete(struct usb_ep *ep, struct usb_request *req) { struct f_hidg *hidg = (struct f_hidg *)ep->driver_data; @@ -433,14 +522,20 @@ static __poll_t f_hidg_poll(struct file *file, poll_table *wait) if (WRITE_COND) ret |= EPOLLOUT | EPOLLWRNORM; - if (READ_COND) - ret |= EPOLLIN | EPOLLRDNORM; + if (hidg->use_out_ep) { + if (READ_COND_INTOUT) + ret |= EPOLLIN | EPOLLRDNORM; + } else { + if (READ_COND_SSREPORT) + ret |= EPOLLIN | EPOLLRDNORM; + } return ret; } #undef WRITE_COND -#undef READ_COND +#undef READ_COND_SSREPORT +#undef READ_COND_INTOUT static int f_hidg_release(struct inode *inode, struct file *fd) { @@ -467,7 +562,7 @@ static inline struct usb_request *hidg_alloc_ep_req(struct usb_ep *ep, return alloc_ep_req(ep, length); } -static void hidg_set_report_complete(struct usb_ep *ep, struct usb_request *req) +static void hidg_intout_complete(struct usb_ep *ep, struct usb_request *req) { struct f_hidg *hidg = (struct f_hidg *) req->context; struct usb_composite_dev *cdev = hidg->func.config->cdev; @@ -502,6 +597,37 @@ free_req: } } +static void hidg_ssreport_complete(struct usb_ep *ep, struct usb_request *req) +{ + struct f_hidg *hidg = (struct f_hidg *)req->context; + struct usb_composite_dev *cdev = hidg->func.config->cdev; + char *new_buf = NULL; + unsigned long flags; + + if (req->status != 0 || req->buf == NULL || req->actual == 0) { + ERROR(cdev, + "%s FAILED: status=%d, buf=%p, actual=%d\n", + __func__, req->status, req->buf, req->actual); + return; + } + + spin_lock_irqsave(&hidg->read_spinlock, flags); + + new_buf = krealloc(hidg->set_report_buf, req->actual, GFP_ATOMIC); + if (new_buf == NULL) { + spin_unlock_irqrestore(&hidg->read_spinlock, flags); + return; + } + hidg->set_report_buf = new_buf; + + hidg->set_report_length = req->actual; + memcpy(hidg->set_report_buf, req->buf, req->actual); + + spin_unlock_irqrestore(&hidg->read_spinlock, flags); + + wake_up(&hidg->read_queue); +} + static int hidg_setup(struct usb_function *f, const struct usb_ctrlrequest *ctrl) { @@ -549,7 +675,11 @@ static int hidg_setup(struct usb_function *f, case ((USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE) << 8 | HID_REQ_SET_REPORT): VDBG(cdev, "set_report | wLength=%d\n", ctrl->wLength); - goto stall; + if (hidg->use_out_ep) + goto stall; + req->complete = hidg_ssreport_complete; + req->context = hidg; + goto respond; break; case ((USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE) << 8 @@ -637,15 +767,18 @@ static void hidg_disable(struct usb_function *f) unsigned long flags; usb_ep_disable(hidg->in_ep); - usb_ep_disable(hidg->out_ep); - spin_lock_irqsave(&hidg->read_spinlock, flags); - list_for_each_entry_safe(list, next, &hidg->completed_out_req, list) { - free_ep_req(hidg->out_ep, list->req); - list_del(&list->list); - kfree(list); + if (hidg->out_ep) { + usb_ep_disable(hidg->out_ep); + + spin_lock_irqsave(&hidg->read_spinlock, flags); + list_for_each_entry_safe(list, next, &hidg->completed_out_req, list) { + free_ep_req(hidg->out_ep, list->req); + list_del(&list->list); + kfree(list); + } + spin_unlock_irqrestore(&hidg->read_spinlock, flags); } - spin_unlock_irqrestore(&hidg->read_spinlock, flags); spin_lock_irqsave(&hidg->write_spinlock, flags); if (!hidg->write_pending) { @@ -691,8 +824,7 @@ static int hidg_set_alt(struct usb_function *f, unsigned intf, unsigned alt) } } - - if (hidg->out_ep != NULL) { + if (hidg->use_out_ep && hidg->out_ep != NULL) { /* restart endpoint */ usb_ep_disable(hidg->out_ep); @@ -717,7 +849,7 @@ static int hidg_set_alt(struct usb_function *f, unsigned intf, unsigned alt) hidg_alloc_ep_req(hidg->out_ep, hidg->report_length); if (req) { - req->complete = hidg_set_report_complete; + req->complete = hidg_intout_complete; req->context = hidg; status = usb_ep_queue(hidg->out_ep, req, GFP_ATOMIC); @@ -743,7 +875,8 @@ static int hidg_set_alt(struct usb_function *f, unsigned intf, unsigned alt) } return 0; disable_out_ep: - usb_ep_disable(hidg->out_ep); + if (hidg->out_ep) + usb_ep_disable(hidg->out_ep); free_req_in: if (req_in) free_ep_req(hidg->in_ep, req_in); @@ -795,14 +928,21 @@ static int hidg_bind(struct usb_configuration *c, struct usb_function *f) goto fail; hidg->in_ep = ep; - ep = usb_ep_autoconfig(c->cdev->gadget, &hidg_fs_out_ep_desc); - if (!ep) - goto fail; - hidg->out_ep = ep; + hidg->out_ep = NULL; + if (hidg->use_out_ep) { + ep = usb_ep_autoconfig(c->cdev->gadget, &hidg_fs_out_ep_desc); + if (!ep) + goto fail; + hidg->out_ep = ep; + } + + /* used only if use_out_ep == 1 */ + hidg->set_report_buf = NULL; /* set descriptor dynamic values */ hidg_interface_desc.bInterfaceSubClass = hidg->bInterfaceSubClass; hidg_interface_desc.bInterfaceProtocol = hidg->bInterfaceProtocol; + hidg_interface_desc.bNumEndpoints = hidg->use_out_ep ? 2 : 1; hidg->protocol = HID_REPORT_PROTOCOL; hidg->idle = 1; hidg_ss_in_ep_desc.wMaxPacketSize = cpu_to_le16(hidg->report_length); @@ -833,9 +973,19 @@ static int hidg_bind(struct usb_configuration *c, struct usb_function *f) hidg_ss_out_ep_desc.bEndpointAddress = hidg_fs_out_ep_desc.bEndpointAddress; - status = usb_assign_descriptors(f, hidg_fs_descriptors, - hidg_hs_descriptors, hidg_ss_descriptors, - hidg_ss_descriptors); + if (hidg->use_out_ep) + status = usb_assign_descriptors(f, + hidg_fs_descriptors_intout, + hidg_hs_descriptors_intout, + hidg_ss_descriptors_intout, + hidg_ss_descriptors_intout); + else + status = usb_assign_descriptors(f, + hidg_fs_descriptors_ssreport, + hidg_hs_descriptors_ssreport, + hidg_ss_descriptors_ssreport, + hidg_ss_descriptors_ssreport); + if (status) goto fail; @@ -950,6 +1100,7 @@ CONFIGFS_ATTR(f_hid_opts_, name) F_HID_OPT(subclass, 8, 255); F_HID_OPT(protocol, 8, 255); +F_HID_OPT(no_out_endpoint, 8, 1); F_HID_OPT(report_length, 16, 65535); static ssize_t f_hid_opts_report_desc_show(struct config_item *item, char *page) @@ -1009,6 +1160,7 @@ CONFIGFS_ATTR_RO(f_hid_opts_, dev); static struct configfs_attribute *hid_attrs[] = { &f_hid_opts_attr_subclass, &f_hid_opts_attr_protocol, + &f_hid_opts_attr_no_out_endpoint, &f_hid_opts_attr_report_length, &f_hid_opts_attr_report_desc, &f_hid_opts_attr_dev, @@ -1093,6 +1245,7 @@ static void hidg_free(struct usb_function *f) hidg = func_to_hidg(f); opts = container_of(f->fi, struct f_hid_opts, func_inst); kfree(hidg->report_desc); + kfree(hidg->set_report_buf); kfree(hidg); mutex_lock(&opts->lock); --opts->refcnt; @@ -1139,6 +1292,7 @@ static struct usb_function *hidg_alloc(struct usb_function_instance *fi) return ERR_PTR(-ENOMEM); } } + hidg->use_out_ep = !opts->no_out_endpoint; mutex_unlock(&opts->lock); diff --git a/drivers/usb/gadget/function/u_hid.h b/drivers/usb/gadget/function/u_hid.h index 84e6da302499..fa631f34bb3d 100644 --- a/drivers/usb/gadget/function/u_hid.h +++ b/drivers/usb/gadget/function/u_hid.h @@ -20,6 +20,7 @@ struct f_hid_opts { int minor; unsigned char subclass; unsigned char protocol; + unsigned char no_out_endpoint; unsigned short report_length; unsigned short report_desc_length; unsigned char *report_desc; From 56c8a404365f8d4733f82456bea32b4f832e0954 Mon Sep 17 00:00:00 2001 From: John Keeping Date: Tue, 22 Nov 2022 12:35:21 +0000 Subject: [PATCH 1333/1477] UPSTREAM: usb: gadget: f_hid: fix f_hidg lifetime vs cdev [ Upstream commit 89ff3dfac604614287ad5aad9370c3f984ea3f4b ] The embedded struct cdev does not have its lifetime correctly tied to the enclosing struct f_hidg, so there is a use-after-free if /dev/hidgN is held open while the gadget is deleted. This can readily be replicated with libusbgx's example programs (for conciseness - operating directly via configfs is equivalent): gadget-hid exec 3<> /dev/hidg0 gadget-vid-pid-remove exec 3<&- Pull the existing device up in to struct f_hidg and make use of the cdev_device_{add,del}() helpers. This changes the lifetime of the device object to match struct f_hidg, but note that it is still added and deleted at the same time. Bug: 176850153 Fixes: 71adf1189469 ("USB: gadget: add HID gadget driver") Tested-by: Lee Jones Reviewed-by: Andrzej Pietrasiewicz Reviewed-by: Lee Jones Signed-off-by: John Keeping Link: https://lore.kernel.org/r/20221122123523.3068034-2-john@metanate.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Lee Jones Change-Id: I5d37ca47c5f087d5b1b303b4e8a1614ea3f50159 Signed-off-by: Lee Jones --- drivers/usb/gadget/function/f_hid.c | 52 ++++++++++++++++------------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c index 8cb199f52b52..97e927eacc62 100644 --- a/drivers/usb/gadget/function/f_hid.c +++ b/drivers/usb/gadget/function/f_hid.c @@ -71,7 +71,7 @@ struct f_hidg { wait_queue_head_t write_queue; struct usb_request *req; - int minor; + struct device dev; struct cdev cdev; struct usb_function func; @@ -84,6 +84,14 @@ static inline struct f_hidg *func_to_hidg(struct usb_function *f) return container_of(f, struct f_hidg, func); } +static void hidg_release(struct device *dev) +{ + struct f_hidg *hidg = container_of(dev, struct f_hidg, dev); + + kfree(hidg->set_report_buf); + kfree(hidg); +} + /*-------------------------------------------------------------------------*/ /* Static descriptors */ @@ -904,9 +912,7 @@ static int hidg_bind(struct usb_configuration *c, struct usb_function *f) struct usb_ep *ep; struct f_hidg *hidg = func_to_hidg(f); struct usb_string *us; - struct device *device; int status; - dev_t dev; /* maybe allocate device-global string IDs, and patch descriptors */ us = usb_gstrings_attach(c->cdev, ct_func_strings, @@ -999,21 +1005,11 @@ static int hidg_bind(struct usb_configuration *c, struct usb_function *f) /* create char device */ cdev_init(&hidg->cdev, &f_hidg_fops); - dev = MKDEV(major, hidg->minor); - status = cdev_add(&hidg->cdev, dev, 1); + status = cdev_device_add(&hidg->cdev, &hidg->dev); if (status) goto fail_free_descs; - device = device_create(hidg_class, NULL, dev, NULL, - "%s%d", "hidg", hidg->minor); - if (IS_ERR(device)) { - status = PTR_ERR(device); - goto del; - } - return 0; -del: - cdev_del(&hidg->cdev); fail_free_descs: usb_free_all_descriptors(f); fail: @@ -1244,9 +1240,7 @@ static void hidg_free(struct usb_function *f) hidg = func_to_hidg(f); opts = container_of(f->fi, struct f_hid_opts, func_inst); - kfree(hidg->report_desc); - kfree(hidg->set_report_buf); - kfree(hidg); + put_device(&hidg->dev); mutex_lock(&opts->lock); --opts->refcnt; mutex_unlock(&opts->lock); @@ -1256,8 +1250,7 @@ static void hidg_unbind(struct usb_configuration *c, struct usb_function *f) { struct f_hidg *hidg = func_to_hidg(f); - device_destroy(hidg_class, MKDEV(major, hidg->minor)); - cdev_del(&hidg->cdev); + cdev_device_del(&hidg->cdev, &hidg->dev); usb_free_all_descriptors(f); } @@ -1266,6 +1259,7 @@ static struct usb_function *hidg_alloc(struct usb_function_instance *fi) { struct f_hidg *hidg; struct f_hid_opts *opts; + int ret; /* allocate and initialize one new instance */ hidg = kzalloc(sizeof(*hidg), GFP_KERNEL); @@ -1277,17 +1271,27 @@ static struct usb_function *hidg_alloc(struct usb_function_instance *fi) mutex_lock(&opts->lock); ++opts->refcnt; - hidg->minor = opts->minor; + device_initialize(&hidg->dev); + hidg->dev.release = hidg_release; + hidg->dev.class = hidg_class; + hidg->dev.devt = MKDEV(major, opts->minor); + ret = dev_set_name(&hidg->dev, "hidg%d", opts->minor); + if (ret) { + --opts->refcnt; + mutex_unlock(&opts->lock); + return ERR_PTR(ret); + } + hidg->bInterfaceSubClass = opts->subclass; hidg->bInterfaceProtocol = opts->protocol; hidg->report_length = opts->report_length; hidg->report_desc_length = opts->report_desc_length; if (opts->report_desc) { - hidg->report_desc = kmemdup(opts->report_desc, - opts->report_desc_length, - GFP_KERNEL); + hidg->report_desc = devm_kmemdup(&hidg->dev, opts->report_desc, + opts->report_desc_length, + GFP_KERNEL); if (!hidg->report_desc) { - kfree(hidg); + put_device(&hidg->dev); mutex_unlock(&opts->lock); return ERR_PTR(-ENOMEM); } From 869cae6f2547a267014bc4e20875bc1cdc7c3493 Mon Sep 17 00:00:00 2001 From: Udipto Goswami Date: Tue, 24 Jan 2023 14:41:49 +0530 Subject: [PATCH 1334/1477] UPSTREAM: usb: gadget: f_fs: Fix unbalanced spinlock in __ffs_ep0_queue_wait __ffs_ep0_queue_wait executes holding the spinlock of &ffs->ev.waitq.lock and unlocks it after the assignments to usb_request are done. However in the code if the request is already NULL we bail out returning -EINVAL but never unlocked the spinlock. Fix this by adding spin_unlock_irq &ffs->ev.waitq.lock before returning. Fixes: 6a19da111057 ("usb: gadget: f_fs: Prevent race during ffs_ep0_queue_wait") Reviewed-by: John Keeping Signed-off-by: Udipto Goswami Link: https://lore.kernel.org/r/20230124091149.18647-1-quic_ugoswami@quicinc.com Signed-off-by: Greg Kroah-Hartman BUG: 266631160 (cherry picked from commit 921deb9da15851425ccbb6ee409dc2fd8fbdfe6b usb-linus) Change-Id: I7cd4445a6e1eee431fd1a1848b114181f4eea45c Signed-off-by: Udipto Goswami --- drivers/usb/gadget/function/f_fs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index f49f8706fbd3..53154a6fd9fb 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -279,8 +279,10 @@ static int __ffs_ep0_queue_wait(struct ffs_data *ffs, char *data, size_t len) struct usb_request *req = ffs->ep0req; int ret; - if (!req) + if (!req) { + spin_unlock_irq(&ffs->ev.waitq.lock); return -EINVAL; + } req->zero = len < le16_to_cpu(ffs->ev.setup.wLength); From fc1490c62150aa32d56d75007b49ccca51ba2fe4 Mon Sep 17 00:00:00 2001 From: Daniele Alessandrelli Date: Mon, 18 Jan 2021 16:59:04 +0000 Subject: [PATCH 1335/1477] UPSTREAM: remoteproc: core: Fix rproc->firmware free in rproc_set_firmware() rproc_alloc_firmware() (called by rproc_alloc()) can allocate rproc->firmware using kstrdup_const() and therefore should be freed using kfree_const(); however, rproc_set_firmware() frees it using the simple kfree(). This causes a kernel oops if a constant string is passed to rproc_alloc() and rproc_set_firmware() is subsequently called. Fix the above issue by using kfree_const() to free rproc->firmware in rproc_set_firmware(). Reviewed-by: Mathieu Poirier Signed-off-by: Daniele Alessandrelli Link: https://lore.kernel.org/r/20210118165904.719999-1-daniele.alessandrelli@linux.intel.com Signed-off-by: Bjorn Andersson [juhyung: A kernel panic was observed with downstream Qualcomm's qcom_spss and spss_utils modules on production phones. drivers/remoteproc/qcom_spss.c's qcom_spss_set_fw_name() have upstream's rproc_set_firmware() logic duplicated and hence needs its own fix (change to call rproc_set_firmware() directly) to fully fix the kernel panic, but remoteproc_core needs to be fixed too.] Change-Id: I7d993e38d4babc30562dd47c366cba9fda388130 (cherry picked from commit 43d3f2c715cefcfb89b10675728e9bf0d8bb98e3) Signed-off-by: Juhyung Park --- drivers/remoteproc/remoteproc_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c index 83cd040a2993..45cc99d42765 100644 --- a/drivers/remoteproc/remoteproc_core.c +++ b/drivers/remoteproc/remoteproc_core.c @@ -2000,7 +2000,7 @@ int rproc_set_firmware(struct rproc *rproc, const char *fw_name) goto out; } - kfree(rproc->firmware); + kfree_const(rproc->firmware); rproc->firmware = p; out: From 8cf3c254952c177eea7c30f005ae057b3cc5a9e1 Mon Sep 17 00:00:00 2001 From: SEO HOYOUNG Date: Fri, 7 Jan 2022 06:39:24 +0900 Subject: [PATCH 1336/1477] FROMGIT: scsi: ufs: Modify Tactive time setting conditions The Tactive time determines the waiting time before burst at hibern8 exit and is determined by hardware at linkup time. However, in the case of Samsung devices, increase host's Tactive time +100us for stability. If the HCI's Tactive time is equal or greater than the device, +100us should be set. Link: https://lore.kernel.org/r/20220106213924.186263-1-hy50.seo@samsung.com Reviewed-by: Alim Akhtar Acked-by: Avri Altman Signed-off-by: SEO HOYOUNG Signed-off-by: Martin K. Petersen (cherry picked from commit 9008661e19606bdf6dddd33073b70872da400590 git://git.kernel.org/pub/scm/linux/kernel/git/mkp/scsi.git for-next) Bug: 204438323 Change-Id: I6ffe1c279cab9b780558de763e94cf01cfd4be3e Signed-off-by: Bart Van Assche Signed-off-by: Byunghoon Kim --- drivers/scsi/ufs/ufshcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index ea90dec13695..1812edbb7d72 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -7651,7 +7651,7 @@ static int ufshcd_quirk_tune_host_pa_tactivate(struct ufs_hba *hba) peer_pa_tactivate_us = peer_pa_tactivate * gran_to_us_table[peer_granularity - 1]; - if (pa_tactivate_us > peer_pa_tactivate_us) { + if (pa_tactivate_us >= peer_pa_tactivate_us) { u32 new_peer_pa_tactivate; new_peer_pa_tactivate = pa_tactivate_us / From e427004fadda5202699a1a8d4dca22ae419e2eb4 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Tue, 24 Jan 2023 16:51:27 +0000 Subject: [PATCH 1337/1477] ANDROID: Revert "ANDROID: allmodconfig: disable WERROR" This reverts commit eb57c31115051c5404d1bb1f2daec20e051b0287. This branch looks clean of WERROR warnings. Let's try to re-enable it. Bug: 261962742 Fixes: eb57c31115051 ("ANDROID: allmodconfig: disable WERROR") (cherry picked from commit d19f8758ae7732b14fd85e97c66dec2855d3be73) Signed-off-by: Lee Jones Change-Id: I0106dcd43d7e4b4e20ac768f3faac40285bc837b --- build.config.allmodconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/build.config.allmodconfig b/build.config.allmodconfig index e5cbe7faa60a..a48f8d420208 100644 --- a/build.config.allmodconfig +++ b/build.config.allmodconfig @@ -9,7 +9,6 @@ function update_config() { -d CPU_BIG_ENDIAN \ -d DYNAMIC_FTRACE \ -e UNWINDER_FRAME_POINTER \ - -d WERROR \ (cd ${OUT_DIR} && \ make O=${OUT_DIR} $archsubarch CROSS_COMPILE=${CROSS_COMPILE} "${TOOL_ARGS[@]}" ${MAKE_ARGS} olddefconfig) From e096145ac3a3a823e735b4a8d9633b70d1de29ee Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 25 Jan 2023 14:19:33 +0000 Subject: [PATCH 1338/1477] ANDROID: mm: page_pinner: ensure do_div() arguments matches with respect to type Fixes the following compiler warnings: mm/page_pinner.c:240:28: error: comparison of distinct pointer types ('typeof ((ts_usec)) *' (aka 'long long *') and 'uint64_t *' (aka 'unsigned long long *')) [-Werror,-Wcompare-distinct-pointer-types] unsigned long rem_usec = do_div(ts_usec, 1000000); ^~~~~~~~~~~~~~~~~~~~~~~~ include/asm-generic/div64.h:226:28: note: expanded from macro 'do_div' (void)(((typeof((n)) *)0) == ((uint64_t *)0)); \ ~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~~ mm/page_pinner.c:312:13: error: comparison of distinct pointer types ('typeof ((ts_usec)) *' (aka 'long long *') and 'uint64_t *' (aka 'unsigned long long *')) [-Werror,-Wcompare-distinct-pointer-types] rem_usec = do_div(ts_usec, 1000000); ^~~~~~~~~~~~~~~~~~~~~~~~ include/asm-generic/div64.h:226:28: note: expanded from macro 'do_div' (void)(((typeof((n)) *)0) == ((uint64_t *)0)); \ ~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~~ Bug: 261962742 Signed-off-by: Lee Jones Change-Id: I63bc6e2d82bfd757c4bf9df53a1a17a1a6235ba7 --- mm/page_pinner.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/page_pinner.c b/mm/page_pinner.c index 6a825777afac..a444584103ad 100644 --- a/mm/page_pinner.c +++ b/mm/page_pinner.c @@ -236,7 +236,7 @@ print_page_pinner(bool longterm, char __user *buf, size_t count, struct captured ret = snprintf(kbuf, count, "Page pinned for %lld us\n", record->elapsed); } else { - s64 ts_usec = record->ts_usec; + u64 ts_usec = record->ts_usec; unsigned long rem_usec = do_div(ts_usec, 1000000); ret = snprintf(kbuf, count, @@ -291,7 +291,7 @@ void __dump_page_pinner(struct page *page) unsigned long pfn; int count; unsigned long rem_usec; - s64 ts_usec; + u64 ts_usec; if (unlikely(!page_ext)) { pr_alert("There is not page extension available.\n"); From 79c4f55c94250c03029c35d1a13c73a97c7afc66 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Fri, 12 Mar 2021 09:47:22 +0000 Subject: [PATCH 1339/1477] UPSTREAM: scsi: bfa: Move a large struct from the stack onto the heap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the following W=1 kernel build warning(s): drivers/scsi/bfa/bfa_fcs_lport.c: In function ‘bfa_fcs_lport_fdmi_build_rhba_pyld’: drivers/scsi/bfa/bfa_fcs_lport.c:2152:1: warning: the frame size of 1200 bytes is larger than 1024 bytes [-Wframe-larger-than=] Bug: 261962742 Link: https://lore.kernel.org/r/20210312094738.2207817-15-lee.jones@linaro.org Cc: Anil Gurumurthy Cc: Sudarsana Kalluru Cc: "James E.J. Bottomley" Cc: "Martin K. Petersen" Cc: linux-scsi@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Martin K. Petersen (cherry picked from commit a7a11b6cfec2c3dc77b8206966f371dfafabea47) Signed-off-by: Lee Jones Change-Id: I9d266d1fd3546d198e163b9186f9eae836f7572a --- drivers/scsi/bfa/bfa_fcs_lport.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/bfa/bfa_fcs_lport.c b/drivers/scsi/bfa/bfa_fcs_lport.c index 3486e402bfc1..98292025b347 100644 --- a/drivers/scsi/bfa/bfa_fcs_lport.c +++ b/drivers/scsi/bfa/bfa_fcs_lport.c @@ -1408,7 +1408,7 @@ static void bfa_fcs_lport_fdmi_rpa_response(void *fcsarg, u32 resid_len, struct fchs_s *rsp_fchs); static void bfa_fcs_lport_fdmi_timeout(void *arg); -static u16 bfa_fcs_lport_fdmi_build_rhba_pyld(struct bfa_fcs_lport_fdmi_s *fdmi, +static int bfa_fcs_lport_fdmi_build_rhba_pyld(struct bfa_fcs_lport_fdmi_s *fdmi, u8 *pyld); static u16 bfa_fcs_lport_fdmi_build_rprt_pyld(struct bfa_fcs_lport_fdmi_s *fdmi, u8 *pyld); @@ -1887,6 +1887,8 @@ bfa_fcs_lport_fdmi_send_rhba(void *fdmi_cbarg, struct bfa_fcxp_s *fcxp_alloced) bfa_fcs_lport_fdmi_build_rhba_pyld(fdmi, (u8 *) ((struct ct_hdr_s *) pyld + 1)); + if (attr_len < 0) + return; bfa_fcxp_send(fcxp, NULL, port->fabric->vf_id, port->lp_tag, BFA_FALSE, FC_CLASS_3, (len + attr_len), &fchs, @@ -1896,17 +1898,20 @@ bfa_fcs_lport_fdmi_send_rhba(void *fdmi_cbarg, struct bfa_fcxp_s *fcxp_alloced) bfa_sm_send_event(fdmi, FDMISM_EVENT_RHBA_SENT); } -static u16 +static int bfa_fcs_lport_fdmi_build_rhba_pyld(struct bfa_fcs_lport_fdmi_s *fdmi, u8 *pyld) { struct bfa_fcs_lport_s *port = fdmi->ms->port; - struct bfa_fcs_fdmi_hba_attr_s hba_attr; - struct bfa_fcs_fdmi_hba_attr_s *fcs_hba_attr = &hba_attr; + struct bfa_fcs_fdmi_hba_attr_s *fcs_hba_attr; struct fdmi_rhba_s *rhba = (struct fdmi_rhba_s *) pyld; struct fdmi_attr_s *attr; + int len; u8 *curr_ptr; - u16 len, count; - u16 templen; + u16 templen, count; + + fcs_hba_attr = kzalloc(sizeof(*fcs_hba_attr), GFP_KERNEL); + if (!fcs_hba_attr) + return -ENOMEM; /* * get hba attributes @@ -2148,6 +2153,9 @@ bfa_fcs_lport_fdmi_build_rhba_pyld(struct bfa_fcs_lport_fdmi_s *fdmi, u8 *pyld) len += ((sizeof(attr->type) + sizeof(attr->len)) * count); rhba->hba_attr_blk.attr_count = cpu_to_be32(count); + + kfree(fcs_hba_attr); + return len; } From 80cad525155e2abdf832e95d8635c22e5f844857 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 27 Sep 2021 14:41:40 +0200 Subject: [PATCH 1340/1477] UPSTREAM: firmware: tegra: Reduce stack usage Building the bpmp-debugfs driver for Arm results in a warning for stack usage: drivers/firmware/tegra/bpmp-debugfs.c:321:16: error: stack frame size of 1224 bytes in function 'bpmp_debug_store' [-Werror,-Wframe-larger-than=] static ssize_t bpmp_debug_store(struct file *file, const char __user *buf, It should be possible to rearrange the code to not require two separate buffers for the file name, but the easiest workaround is to use dynamic allocation. Bug: 261962742 Fixes: 5e37b9c137ee ("firmware: tegra: Add support for in-band debug") Link: https://lore.kernel.org/all/20201204193714.3134651-1-arnd@kernel.org/ Signed-off-by: Arnd Bergmann [treding@nvidia.com: consistently return NULL on failure] Signed-off-by: Thierry Reding (cherry picked from commit 06c2d9a078abe784fd9fd0f1534e318e827712b4) Signed-off-by: Lee Jones Change-Id: Ic147141781cc5fd4250a6d09e283c23ba45cbd9a --- drivers/firmware/tegra/bpmp-debugfs.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/firmware/tegra/bpmp-debugfs.c b/drivers/firmware/tegra/bpmp-debugfs.c index fad97ec8e81f..286fe1257961 100644 --- a/drivers/firmware/tegra/bpmp-debugfs.c +++ b/drivers/firmware/tegra/bpmp-debugfs.c @@ -74,28 +74,36 @@ static void seqbuf_seek(struct seqbuf *seqbuf, ssize_t offset) static const char *get_filename(struct tegra_bpmp *bpmp, const struct file *file, char *buf, int size) { - char root_path_buf[512]; - const char *root_path; - const char *filename; + const char *root_path, *filename = NULL; + char *root_path_buf; size_t root_len; + root_path_buf = kzalloc(512, GFP_KERNEL); + if (!root_path_buf) + goto out; + root_path = dentry_path(bpmp->debugfs_mirror, root_path_buf, sizeof(root_path_buf)); if (IS_ERR(root_path)) - return NULL; + goto out; root_len = strlen(root_path); filename = dentry_path(file->f_path.dentry, buf, size); - if (IS_ERR(filename)) - return NULL; + if (IS_ERR(filename)) { + filename = NULL; + goto out; + } - if (strlen(filename) < root_len || - strncmp(filename, root_path, root_len)) - return NULL; + if (strlen(filename) < root_len || strncmp(filename, root_path, root_len)) { + filename = NULL; + goto out; + } filename += root_len; +out: + kfree(root_path_buf); return filename; } From 9e2b4cc230e7ccb7fdcaf06c09f86fd829e8db5c Mon Sep 17 00:00:00 2001 From: Clement Lecigne Date: Fri, 13 Jan 2023 13:07:45 +0100 Subject: [PATCH 1341/1477] UPSTREAM: ALSA: pcm: Move rwsem lock inside snd_ctl_elem_read to prevent UAF [ Note: this is a fix that works around the bug equivalently as the two upstream commits: 1fa4445f9adf ("ALSA: control - introduce snd_ctl_notify_one() helper") 56b88b50565c ("ALSA: pcm: Move rwsem lock inside snd_ctl_elem_read to prevent UAF") but in a simpler way to fit with older stable trees -- tiwai ] Add missing locking in ctl_elem_read_user/ctl_elem_write_user which can be easily triggered and turned into an use-after-free. Example code paths with SNDRV_CTL_IOCTL_ELEM_READ: 64-bits: snd_ctl_ioctl snd_ctl_elem_read_user [takes controls_rwsem] snd_ctl_elem_read [lock properly held, all good] [drops controls_rwsem] 32-bits (compat): snd_ctl_ioctl_compat snd_ctl_elem_write_read_compat ctl_elem_write_read snd_ctl_elem_read [missing lock, not good] CVE-2023-0266 was assigned for this issue. Bug: 265303544 Signed-off-by: Clement Lecigne Cc: stable@kernel.org # 5.12 and older Signed-off-by: Takashi Iwai Reviewed-by: Jaroslav Kysela Signed-off-by: Greg Kroah-Hartman (cherry picked from commit df02234e6b87d2a9a82acd3198e44bdeff8488c7) Signed-off-by: Lee Jones Change-Id: Ibe891cdcb9eaf0dfc7bd771689c85c32b5c0d1f7 --- sound/core/control_compat.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/core/control_compat.c b/sound/core/control_compat.c index 97467f6a32a1..980ab3580f1b 100644 --- a/sound/core/control_compat.c +++ b/sound/core/control_compat.c @@ -304,7 +304,9 @@ static int ctl_elem_read_user(struct snd_card *card, err = snd_power_wait(card, SNDRV_CTL_POWER_D0); if (err < 0) goto error; + down_read(&card->controls_rwsem); err = snd_ctl_elem_read(card, data); + up_read(&card->controls_rwsem); if (err < 0) goto error; err = copy_ctl_value_to_user(userdata, valuep, data, type, count); @@ -332,7 +334,9 @@ static int ctl_elem_write_user(struct snd_ctl_file *file, err = snd_power_wait(card, SNDRV_CTL_POWER_D0); if (err < 0) goto error; + down_write(&card->controls_rwsem); err = snd_ctl_elem_write(card, file, data); + up_write(&card->controls_rwsem); if (err < 0) goto error; err = copy_ctl_value_to_user(userdata, valuep, data, type, count); From 6bd9415d98f3c44a820c398ed5cbccecce07f331 Mon Sep 17 00:00:00 2001 From: Srinivasarao Pathipati Date: Fri, 27 Jan 2023 11:34:03 +0530 Subject: [PATCH 1342/1477] ANDROID: cpu: correct dl_cpu_busy() calls The patch 0039189a3b15 ("sched/deadline: Merge dl_task_can_attach() and dl_cpu_busy()") which is picked from upstream modifies declaration of function dl_cpu_busy(). But it won't update function usage from android specific code that introduced with patch 683010f555d8 ("ANDROID: cpu/hotplug: add pause/resume_cpus interface"). Bug: 266874695 Fixes: 0039189a3b15 ("sched/deadline: Merge dl_task_can_attach() and dl_cpu_busy()" Change-Id: I40c12f912b7fe854b1e2e13f75c727c3c9a2435c Signed-off-by: Srinivasarao Pathipati --- kernel/cpu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/cpu.c b/kernel/cpu.c index fc15c01d61b8..9bd53e65246b 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1159,7 +1159,7 @@ int remove_cpu(unsigned int cpu) } EXPORT_SYMBOL_GPL(remove_cpu); -extern bool dl_cpu_busy(unsigned int cpu); +extern int dl_cpu_busy(int cpu, struct task_struct *p); int __pause_drain_rq(struct cpumask *cpus) { @@ -1234,7 +1234,7 @@ int pause_cpus(struct cpumask *cpus) cpumask_and(cpus, cpus, cpu_active_mask); for_each_cpu(cpu, cpus) { - if (!cpu_online(cpu) || dl_cpu_busy(cpu) || + if (!cpu_online(cpu) || dl_cpu_busy(cpu, NULL) || get_cpu_device(cpu)->offline_disabled == true) { err = -EBUSY; goto err_cpu_maps_update; From 67884a649c141a44d91400df6ca0a5ef55e1771a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 5 Dec 2022 11:31:25 +0100 Subject: [PATCH 1343/1477] UPSTREAM: arm64: efi: Execute runtime services from a dedicated stack commit ff7a167961d1b97e0e205f245f806e564d3505e7 upstream. With the introduction of PRMT in the ACPI subsystem, the EFI rts workqueue is no longer the only caller of efi_call_virt_pointer() in the kernel. This means the EFI runtime services lock is no longer sufficient to manage concurrent calls into firmware, but also that firmware calls may occur that are not marshalled via the workqueue mechanism, but originate directly from the caller context. For added robustness, and to ensure that the runtime services have 8 KiB of stack space available as per the EFI spec, introduce a spinlock protected EFI runtime stack of 8 KiB, where the spinlock also ensures serialization between the EFI rts workqueue (which itself serializes EFI runtime calls) and other callers of efi_call_virt_pointer(). While at it, use the stack pivot to avoid reloading the shadow call stack pointer from the ordinary stack, as doing so could produce a gadget to defeat it. Bug: 260821414 Signed-off-by: Ard Biesheuvel Cc: Lee Jones Signed-off-by: Greg Kroah-Hartman Signed-off-by: Lee Jones Change-Id: Ie961576ae93cafc315cb37fb84cca0a6402eda59 --- arch/arm64/include/asm/efi.h | 3 +++ arch/arm64/kernel/efi-rt-wrapper.S | 13 ++++++++++++- arch/arm64/kernel/efi.c | 27 +++++++++++++++++++++++++++ 3 files changed, 42 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 973b14415271..16892f0d05ad 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -25,6 +25,7 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md); ({ \ efi_virtmap_load(); \ __efi_fpsimd_begin(); \ + spin_lock(&efi_rt_lock); \ }) #define arch_efi_call_virt(p, f, args...) \ @@ -36,10 +37,12 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md); #define arch_efi_call_virt_teardown() \ ({ \ + spin_unlock(&efi_rt_lock); \ __efi_fpsimd_end(); \ efi_virtmap_unload(); \ }) +extern spinlock_t efi_rt_lock; efi_status_t __efi_rt_asm_wrapper(void *, const char *, ...); #define ARCH_EFI_IRQ_FLAGS_MASK (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT) diff --git a/arch/arm64/kernel/efi-rt-wrapper.S b/arch/arm64/kernel/efi-rt-wrapper.S index 75691a2641c1..b2786b968fee 100644 --- a/arch/arm64/kernel/efi-rt-wrapper.S +++ b/arch/arm64/kernel/efi-rt-wrapper.S @@ -16,6 +16,12 @@ SYM_FUNC_START(__efi_rt_asm_wrapper) */ stp x1, x18, [sp, #16] + ldr_l x16, efi_rt_stack_top + mov sp, x16 +#ifdef CONFIG_SHADOW_CALL_STACK + str x18, [sp, #-16]! +#endif + /* * We are lucky enough that no EFI runtime services take more than * 5 arguments, so all are passed in registers rather than via the @@ -29,6 +35,7 @@ SYM_FUNC_START(__efi_rt_asm_wrapper) mov x4, x6 blr x8 + mov sp, x29 ldp x1, x2, [sp, #16] cmp x2, x18 ldp x29, x30, [sp], #32 @@ -42,6 +49,10 @@ SYM_FUNC_START(__efi_rt_asm_wrapper) * called with preemption disabled and a separate shadow stack is used * for interrupts. */ - mov x18, x2 +#ifdef CONFIG_SHADOW_CALL_STACK + ldr_l x18, efi_rt_stack_top + ldr x18, [x18, #-16] +#endif + b efi_handle_corrupted_x18 // tail call SYM_FUNC_END(__efi_rt_asm_wrapper) diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index c5685179db5a..72f432d23ec5 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -143,3 +143,30 @@ asmlinkage efi_status_t efi_handle_corrupted_x18(efi_status_t s, const char *f) pr_err_ratelimited(FW_BUG "register x18 corrupted by EFI %s\n", f); return s; } + +DEFINE_SPINLOCK(efi_rt_lock); + +asmlinkage u64 *efi_rt_stack_top __ro_after_init; + +/* EFI requires 8 KiB of stack space for runtime services */ +static_assert(THREAD_SIZE >= SZ_8K); + +static int __init arm64_efi_rt_init(void) +{ + void *p; + + if (!efi_enabled(EFI_RUNTIME_SERVICES)) + return 0; + + p = __vmalloc_node(THREAD_SIZE, THREAD_ALIGN, GFP_KERNEL, + NUMA_NO_NODE, &&l); +l: if (!p) { + pr_warn("Failed to allocate EFI runtime stack\n"); + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); + return -ENOMEM; + } + + efi_rt_stack_top = p + THREAD_SIZE; + return 0; +} +core_initcall(arm64_efi_rt_init); From dd8418a59a40ac8bda0f3a23a8f3fae439d0cc9e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 9 Jan 2023 12:41:46 +0100 Subject: [PATCH 1344/1477] UPSTREAM: efi: rt-wrapper: Add missing include commit 18bba1843fc7f264f58c9345d00827d082f9c558 upstream. Add the missing #include of asm/assembler.h, which is where the ldr_l macro is defined. Bug: 260821414 Fixes: ff7a167961d1b97e ("arm64: efi: Execute runtime services from a dedicated stack") Signed-off-by: Ard Biesheuvel Cc: Lee Jones Signed-off-by: Greg Kroah-Hartman Signed-off-by: Lee Jones Change-Id: I50d1e21277ef64dcb1d58d7f1c062dc913cfee74 --- arch/arm64/kernel/efi-rt-wrapper.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/efi-rt-wrapper.S b/arch/arm64/kernel/efi-rt-wrapper.S index b2786b968fee..2d3c4b02393e 100644 --- a/arch/arm64/kernel/efi-rt-wrapper.S +++ b/arch/arm64/kernel/efi-rt-wrapper.S @@ -4,6 +4,7 @@ */ #include +#include SYM_FUNC_START(__efi_rt_asm_wrapper) stp x29, x30, [sp, #-32]! From 5dc06419d8a6717a128ebae57726f7c36544d306 Mon Sep 17 00:00:00 2001 From: Wesley Cheng Date: Wed, 17 Aug 2022 11:23:51 -0700 Subject: [PATCH 1345/1477] UPSTREAM: usb: dwc3: Do not service EP0 and conndone events if soft disconnected There are some operations that need to be ignored if there is a soft disconnect in progress. This is to avoid having a pending EP0 transfer in progress while attempting to stop active transfers and halting the controller. There were several instances seen where a soft disconnect was able to occur during early link negotiation, i.e. bus reset/conndone, which leads to the conndone handler re-configuring EPs while attempting to halt the controller, as DEP flags are cleared as part of the soft disconnect path. ep0out: cmd 'Start New Configuration' ep0out: cmd 'Set Endpoint Transfer Resource' ep0in: cmd 'Set Endpoint Transfer Resource' ep1out: cmd 'Set Endpoint Transfer Resource' ... event (00030601): Suspend [U3] event (00000101): Reset [U0] ep0out: req ffffff87e5c9e100 length 0/0 zsI ==> 0 event (00000201): Connection Done [U0] ep0out: cmd 'Start New Configuration' ep0out: cmd 'Set Endpoint Transfer Resource' In addition, if a soft disconnect occurs, EP0 events are still allowed to process, however, it will stall/restart during the SETUP phase. The host is still able to query for the DATA phase, leading to a xfernotready(DATA) event. Since none of the SETUP transfer parameters are populated, the xfernotready is treated as a "wrong direction" error, leading to a duplicate stall/restart routine. Add the proper softconnect/connected checks in sequences that are potentially involved during soft disconnect processing. Reviewed-by: Thinh Nguyen Signed-off-by: Wesley Cheng Link: https://lore.kernel.org/r/20220817182359.13550-2-quic_wcheng@quicinc.com Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 359d5a85a758906087801d7b3d3536a984211dec) Bug: 263189538 Change-Id: Ia933ce7a70febbab8bb122d650691bd937d9ec37 Signed-off-by: Krishna Kurapati --- drivers/usb/dwc3/ep0.c | 9 +++++++-- drivers/usb/dwc3/gadget.c | 4 ++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c index 7f01513df0f2..00527a20dd21 100644 --- a/drivers/usb/dwc3/ep0.c +++ b/drivers/usb/dwc3/ep0.c @@ -197,7 +197,7 @@ int dwc3_gadget_ep0_queue(struct usb_ep *ep, struct usb_request *request, int ret; spin_lock_irqsave(&dwc->lock, flags); - if (!dep->endpoint.desc || !dwc->pullups_connected) { + if (!dep->endpoint.desc || !dwc->pullups_connected || !dwc->connected) { dev_err(dwc->dev, "%s: can't queue to disabled endpoint\n", dep->name); ret = -ESHUTDOWN; @@ -814,8 +814,9 @@ static void dwc3_ep0_inspect_setup(struct dwc3 *dwc, struct usb_ctrlrequest *ctrl = (void *) dwc->ep0_trb; int ret = -EINVAL; u32 len; + struct dwc3_vendor *vdwc = container_of(dwc, struct dwc3_vendor, dwc); - if (!dwc->gadget_driver || !dwc->connected) + if (!dwc->gadget_driver || !vdwc->softconnect || !dwc->connected) goto out; trace_dwc3_ctrl_req(ctrl); @@ -1115,8 +1116,12 @@ void dwc3_ep0_end_control_data(struct dwc3 *dwc, struct dwc3_ep *dep) static void dwc3_ep0_xfernotready(struct dwc3 *dwc, const struct dwc3_event_depevt *event) { + struct dwc3_vendor *vdwc = container_of(dwc, struct dwc3_vendor, dwc); + switch (event->status) { case DEPEVT_STATUS_CONTROL_DATA: + if (!vdwc->softconnect || !dwc->connected) + return; /* * We already have a DATA transfer in the controller's cache, * if we receive a XferNotReady(DATA) we will ignore it, unless diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 71625ddba11a..d4fdc127a47e 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -3877,6 +3877,10 @@ static void dwc3_gadget_conndone_interrupt(struct dwc3 *dwc) u32 reg; u8 lanes = 1; u8 speed; + struct dwc3_vendor *vdwc = container_of(dwc, struct dwc3_vendor, dwc); + + if (!vdwc->softconnect) + return; reg = dwc3_readl(dwc->regs, DWC3_DSTS); speed = reg & DWC3_DSTS_CONNECTSPD; From 35cb147c38aaa5fb33eff2a42402ab601cfcb084 Mon Sep 17 00:00:00 2001 From: Wesley Cheng Date: Wed, 17 Aug 2022 11:23:52 -0700 Subject: [PATCH 1346/1477] UPSTREAM: usb: dwc3: gadget: Force sending delayed status during soft disconnect If any function drivers request for a delayed status phase, this leads to a SETUP transfer timeout error, since the function may take longer to process the DATA stage. This eventually results in end transfer timeouts, as there is a pending SETUP transaction. In addition, allow the DWC3_EP_DELAY_STOP to be set for if there is a delayed status requested. Ocasionally, a host may abort the current SETUP transaction, by issuing a subsequent SETUP token. In those situations, it would result in an endxfer timeout as well. Reviewed-by: Thinh Nguyen Signed-off-by: Wesley Cheng Link: https://lore.kernel.org/r/20220817182359.13550-3-quic_wcheng@quicinc.com Signed-off-by: Greg Kroah-Hartman (cherry picked from commit e1ee843488d58099a89979627ef85d5bd6c5cacd) Bug: 263189538 Change-Id: Ifcc747ecfecec7d4329950af1edc035cd55dc96d Signed-off-by: Krishna Kurapati --- drivers/usb/dwc3/gadget.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index d4fdc127a47e..dd7fcccc2044 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2491,6 +2491,9 @@ static int dwc3_gadget_soft_disconnect(struct dwc3 *dwc) if (dwc->ep0state != EP0_SETUP_PHASE) { int ret; + if (dwc->delayed_status) + dwc3_ep0_send_delayed_status(dwc); + reinit_completion(&dwc->ep0_in_setup); spin_unlock_irqrestore(&dwc->lock, flags); @@ -3695,7 +3698,7 @@ void dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force, * timeout. Delay issuing the End Transfer command until the Setup TRB is * prepared. */ - if (dwc->ep0state != EP0_SETUP_PHASE && !dwc->delayed_status) { + if (dwc->ep0state != EP0_SETUP_PHASE) { dep->flags |= DWC3_EP_DELAY_STOP; return; } From 0bbc89c3469b1696bd74f91e2217d74e82fa4bed Mon Sep 17 00:00:00 2001 From: Wesley Cheng Date: Wed, 17 Aug 2022 11:23:53 -0700 Subject: [PATCH 1347/1477] UPSTREAM: usb: dwc3: gadget: Synchronize IRQ between soft connect/disconnect Ensure that there are no pending events being handled in between soft connect/disconnect transitions. As we are keeping interrupts enabled, and EP0 events are still being serviced, this avoids any stale events from being serviced. Reviewed-by: Thinh Nguyen Signed-off-by: Wesley Cheng Link: https://lore.kernel.org/r/20220817182359.13550-4-quic_wcheng@quicinc.com Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 9711c67de7482c81e1daca3548fbc5c9603600e3) Bug: 263189538 Change-Id: I8ec1e0ec2fe2d48f5db26e833fb9456936380e28 Signed-off-by: Krishna Kurapati --- drivers/usb/dwc3/gadget.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index dd7fcccc2044..04475f9f40bf 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2562,6 +2562,8 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on) return 0; } + synchronize_irq(dwc->irq_gadget); + if (!is_on) { ret = dwc3_gadget_soft_disconnect(dwc); } else { From 19803140c06323c156e301732f948a7e0a04c30b Mon Sep 17 00:00:00 2001 From: Wesley Cheng Date: Wed, 17 Aug 2022 11:23:54 -0700 Subject: [PATCH 1348/1477] UPSTREAM: usb: dwc3: gadget: Continue handling EP0 xfercomplete events During soft disconnect, EP0 events are expected to be handled in order to allow the controller to successfully move into the halted state. Since __dwc3_gadget_stop() is executed before polling, EP0 has been disabled, and events are being blocked. Allow xfercomplete events to be handled, so that cached SETUP packets can be read out from the internal controller memory. Without doing so, it will lead to endxfer timeouts, which results to controller halt failures. Reviewed-by: Thinh Nguyen Signed-off-by: Wesley Cheng Link: https://lore.kernel.org/r/20220817182359.13550-5-quic_wcheng@quicinc.com Signed-off-by: Greg Kroah-Hartman (cherry picked from commit dff981842a0b1c05786c4c0cdea3ac80079ddd57) Bug: 263189538 Change-Id: Id9cbfacd05a351fda689b6845d2469c5e7e7e51b Signed-off-by: Krishna Kurapati --- drivers/usb/dwc3/gadget.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 04475f9f40bf..594fa02b804b 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2707,6 +2707,7 @@ static int __dwc3_gadget_start(struct dwc3 *dwc) dwc3_gadget_ep0_desc.wMaxPacketSize = cpu_to_le16(512); dep = dwc->eps[0]; + dep->flags = 0; ret = __dwc3_gadget_ep_enable(dep, DWC3_DEPCFG_ACTION_INIT); if (ret) { dev_err(dwc->dev, "failed to enable %s\n", dep->name); @@ -2714,6 +2715,7 @@ static int __dwc3_gadget_start(struct dwc3 *dwc) } dep = dwc->eps[1]; + dep->flags = 0; ret = __dwc3_gadget_ep_enable(dep, DWC3_DEPCFG_ACTION_INIT); if (ret) { dev_err(dwc->dev, "failed to enable %s\n", dep->name); @@ -3601,11 +3603,12 @@ static void dwc3_endpoint_interrupt(struct dwc3 *dwc, dep = dwc->eps[epnum]; if (!(dep->flags & DWC3_EP_ENABLED)) { - if (!(dep->flags & DWC3_EP_TRANSFER_STARTED)) + if ((epnum > 1) && !(dep->flags & DWC3_EP_TRANSFER_STARTED)) return; /* Handle only EPCMDCMPLT when EP disabled */ - if (event->endpoint_event != DWC3_DEPEVT_EPCMDCMPLT) + if ((event->endpoint_event != DWC3_DEPEVT_EPCMDCMPLT) && + !(epnum <= 1 && event->endpoint_event == DWC3_DEPEVT_XFERCOMPLETE)) return; } From 4fc39328579e687c18e83134b9ecf882e7555b91 Mon Sep 17 00:00:00 2001 From: Wesley Cheng Date: Thu, 1 Sep 2022 12:36:21 -0700 Subject: [PATCH 1349/1477] UPSTREAM: usb: dwc3: Avoid unmapping USB requests if endxfer is not complete If DWC3_EP_DELAYED_STOP is set during stop active transfers, then do not continue attempting to unmap request buffers during dwc3_remove_requests(). This can lead to SMMU faults, as the controller has not stopped the processing of the TRB. Defer this sequence to the EP0 out start, which ensures that there are no pending SETUP transactions before issuing the endxfer. Reviewed-by: Thinh Nguyen Signed-off-by: Wesley Cheng Link: https://lore.kernel.org/r/20220901193625.8727-2-quic_wcheng@quicinc.com Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 2b2da6574e77ebf83c0df6d8b838bc37764c4bfa) Bug: 263189538 Change-Id: I01ba6d79b56211316bfa492ae4d5d41680067c9e Signed-off-by: Krishna Kurapati --- drivers/usb/dwc3/core.h | 1 + drivers/usb/dwc3/ep0.c | 5 ++++- drivers/usb/dwc3/gadget.c | 6 +++++- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index 28df4e8b9359..b6f71aed4f82 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -1564,6 +1564,7 @@ int dwc3_send_gadget_generic_command(struct dwc3 *dwc, unsigned int cmd, u32 param); void dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force, bool interrupt); void dwc3_gadget_clear_tx_fifos(struct dwc3 *dwc); +void dwc3_remove_requests(struct dwc3 *dwc, struct dwc3_ep *dep, int status); #else static inline int dwc3_gadget_init(struct dwc3 *dwc) { return 0; } diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c index 00527a20dd21..fbb7d38ede7d 100644 --- a/drivers/usb/dwc3/ep0.c +++ b/drivers/usb/dwc3/ep0.c @@ -293,7 +293,10 @@ void dwc3_ep0_out_start(struct dwc3 *dwc) continue; dwc3_ep->flags &= ~DWC3_EP_DELAY_STOP; - dwc3_stop_active_transfer(dwc3_ep, true, true); + if (dwc->connected) + dwc3_stop_active_transfer(dwc3_ep, true, true); + else + dwc3_remove_requests(dwc, dwc3_ep, -ESHUTDOWN); } } diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 594fa02b804b..7a9b29f52860 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -967,12 +967,16 @@ out: return 0; } -static void dwc3_remove_requests(struct dwc3 *dwc, struct dwc3_ep *dep, int status) +void dwc3_remove_requests(struct dwc3 *dwc, struct dwc3_ep *dep, int status) { struct dwc3_request *req; dwc3_stop_active_transfer(dep, true, false); + /* If endxfer is delayed, avoid unmapping requests */ + if (dep->flags & DWC3_EP_DELAY_STOP) + return; + /* - giveback all requests to gadget driver */ while (!list_empty(&dep->started_list)) { req = next_request(&dep->started_list); From 4091dff1ffbca714aae6e96ae9503fd44ae83bb9 Mon Sep 17 00:00:00 2001 From: Krishna Kurapati Date: Wed, 21 Dec 2022 23:32:01 +0530 Subject: [PATCH 1350/1477] UPSTREAM: usb: dwc3: Remove DWC3 locking during gadget suspend/resume Remove the need for making dwc3_gadget_suspend() and dwc3_gadget_resume() to be called in a spinlock, as dwc3_gadget_run_stop() could potentially take some time to complete. Signed-off-by: Wesley Cheng Link: https://lore.kernel.org/r/20220901193625.8727-3-quic_wcheng@quicinc.com Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 5265397f94424eaea596026fd34dc7acf474dcec) Bug: 263189538 Change-Id: Ia28cf2f5dd1929a83812d685eec9a9008f8bc81d Signed-off-by: Krishna Kurapati --- drivers/usb/dwc3/core.c | 4 ---- drivers/usb/dwc3/gadget.c | 5 +++++ 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 40d358ca8b58..4806b415d237 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -1756,9 +1756,7 @@ static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg) case DWC3_GCTL_PRTCAP_DEVICE: if (pm_runtime_suspended(dwc->dev)) break; - spin_lock_irqsave(&dwc->lock, flags); dwc3_gadget_suspend(dwc); - spin_unlock_irqrestore(&dwc->lock, flags); synchronize_irq(dwc->irq_gadget); dwc3_core_exit(dwc); break; @@ -1819,9 +1817,7 @@ static int dwc3_resume_common(struct dwc3 *dwc, pm_message_t msg) return ret; dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_DEVICE); - spin_lock_irqsave(&dwc->lock, flags); dwc3_gadget_resume(dwc); - spin_unlock_irqrestore(&dwc->lock, flags); break; case DWC3_GCTL_PRTCAP_HOST: if (!PMSG_IS_AUTO(msg)) { diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 7a9b29f52860..47ac914fd136 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -4539,12 +4539,17 @@ void dwc3_gadget_exit(struct dwc3 *dwc) int dwc3_gadget_suspend(struct dwc3 *dwc) { + unsigned long flags; + if (!dwc->gadget_driver) return 0; dwc3_gadget_run_stop(dwc, false, false); + + spin_lock_irqsave(&dwc->lock, flags); dwc3_disconnect_gadget(dwc); __dwc3_gadget_stop(dwc); + spin_unlock_irqrestore(&dwc->lock, flags); return 0; } From 6b23440751533c037bf1fd5a6e05014e95eb42db Mon Sep 17 00:00:00 2001 From: Wesley Cheng Date: Thu, 1 Sep 2022 12:36:23 -0700 Subject: [PATCH 1351/1477] UPSTREAM: usb: dwc3: Increase DWC3 controller halt timeout Since EP0 transactions need to be completed before the controller halt sequence is finished, this may take some time depending on the host and the enabled functions. Increase the controller halt timeout, so that we give the controller sufficient time to handle EP0 transfers. Signed-off-by: Wesley Cheng Link: https://lore.kernel.org/r/20220901193625.8727-4-quic_wcheng@quicinc.com Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 461ee467507cb98a348fa91ff8460908bb0ea423) Bug: 263189538 Change-Id: Idb8998c4ae1d346a63043f40a3915a8a02601982 Signed-off-by: Krishna Kurapati --- drivers/usb/dwc3/gadget.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 47ac914fd136..6c81a019f8c2 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2434,7 +2434,7 @@ static void __dwc3_gadget_set_speed(struct dwc3 *dwc) static int dwc3_gadget_run_stop(struct dwc3 *dwc, int is_on, int suspend) { u32 reg; - u32 timeout = 500; + u32 timeout = 2000; if (pm_runtime_suspended(dwc->dev)) return 0; @@ -2467,6 +2467,7 @@ static int dwc3_gadget_run_stop(struct dwc3 *dwc, int is_on, int suspend) dwc3_gadget_dctl_write_safe(dwc, reg); do { + usleep_range(1000, 2000); reg = dwc3_readl(dwc->regs, DWC3_DSTS); reg &= DWC3_DSTS_DEVCTRLHLT; } while (--timeout && !(!is_on ^ !reg)); From e3a5b60c60de7d1ff5479932ddd260e13b4890ce Mon Sep 17 00:00:00 2001 From: Wesley Cheng Date: Thu, 1 Sep 2022 12:36:24 -0700 Subject: [PATCH 1352/1477] UPSTREAM: usb: dwc3: gadget: Skip waiting for CMDACT cleared during endxfer For endxfer commands that do not require an endpoint complete interrupt, avoid having to wait for the command active bit to clear. This allows for EP0 events to continue to be handled, which allows for the controller to complete it. Otherwise, it is known that the endxfer command will fail if there is a pending SETUP token that needs to be read. Suggested-by: Thinh Nguyen Reviewed-by: Thinh Nguyen Signed-off-by: Wesley Cheng Link: https://lore.kernel.org/r/20220901193625.8727-5-quic_wcheng@quicinc.com Signed-off-by: Greg Kroah-Hartman (cherry picked from commit b353eb6dc285a0775a447f53e5b2a50bf3f9684f) Bug: 263189538 Change-Id: If34b7ddc2e3ea10ec94afe43e6dba0878da6776f Signed-off-by: Krishna Kurapati --- drivers/usb/dwc3/gadget.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 6c81a019f8c2..a6ffa60b7de9 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -367,7 +367,9 @@ int dwc3_send_gadget_ep_cmd(struct dwc3_ep *dep, unsigned int cmd, dwc3_writel(dep->regs, DWC3_DEPCMD, cmd); - if (!(cmd & DWC3_DEPCMD_CMDACT)) { + if (!(cmd & DWC3_DEPCMD_CMDACT) || + (DWC3_DEPCMD_CMD(cmd) == DWC3_DEPCMD_ENDTRANSFER && + !(cmd & DWC3_DEPCMD_CMDIOC))) { ret = 0; goto skip_status; } From 5f30de1dff27b1384f63a9e0be535ecf8709c102 Mon Sep 17 00:00:00 2001 From: aaro Date: Tue, 31 Jan 2023 13:44:53 +0200 Subject: [PATCH 1353/1477] ANDROID: GKI: Add Tuxera symbol list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This list covers Microsoft exFAT by Tuxera driver. Leaf changes summary: 1 artifact changed Changed leaf types summary: 0 leaf type changed Removed/Changed/Added functions summary: 0 Removed, 0 Changed, 1 Added function Removed/Changed/Added variables summary: 0 Removed, 0 Changed, 0 Added variable 1 Added function: [A] 'function void _trace_android_vh_record_pcpu_rwsem_starttime(task_struct*, unsigned long int)' Bug: 267227900 Change-Id: Ie0189076093a19bf9e8ea146565f9d40bda55435 Signed-off-by: Aaro Mäkinen --- android/abi_gki_aarch64.xml | 139 +++++++++++++++++- android/abi_gki_aarch64_tuxera | 248 +++++++++++++++++++++++++++++++++ build.config.gki.aarch64 | 1 + 3 files changed, 381 insertions(+), 7 deletions(-) create mode 100644 android/abi_gki_aarch64_tuxera diff --git a/android/abi_gki_aarch64.xml b/android/abi_gki_aarch64.xml index e5beb2b22443..5f631576d09c 100644 --- a/android/abi_gki_aarch64.xml +++ b/android/abi_gki_aarch64.xml @@ -803,6 +803,7 @@ + @@ -13097,6 +13098,7 @@ + @@ -15332,6 +15334,7 @@ + @@ -22298,6 +22301,11 @@ + + + + + @@ -24271,6 +24279,17 @@ + + + + + + + + + + + @@ -45019,7 +45038,11 @@ - + + + + + @@ -50348,6 +50371,7 @@ + @@ -53976,6 +54000,11 @@ + + + + + @@ -66048,6 +66077,14 @@ + + + + + + + + @@ -68542,6 +68579,11 @@ + + + + + @@ -72650,6 +72692,7 @@ + @@ -73321,7 +73364,11 @@ - + + + + + @@ -78069,6 +78116,11 @@ + + + + + @@ -82583,6 +82635,7 @@ + @@ -84504,6 +84557,12 @@ + + + + + + @@ -86324,6 +86383,7 @@ + @@ -88885,6 +88945,53 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -90402,6 +90509,7 @@ + @@ -91641,6 +91749,7 @@ + @@ -95609,6 +95718,17 @@ + + + + + + + + + + + @@ -118049,10 +118169,10 @@ - - - - + + + + @@ -120634,7 +120754,7 @@ - + @@ -121476,6 +121596,11 @@ + + + + + diff --git a/android/abi_gki_aarch64_tuxera b/android/abi_gki_aarch64_tuxera new file mode 100644 index 000000000000..b14b56603408 --- /dev/null +++ b/android/abi_gki_aarch64_tuxera @@ -0,0 +1,248 @@ +[abi_symbol_list] +add_to_page_cache_locked +__alloc_pages_nodemask +__arch_copy_from_user +__arch_copy_to_user +arm64_const_caps_ready +autoremove_wake_function +balance_dirty_pages_ratelimited +bcmp +bdev_read_only +__bforget +bio_add_page +bio_alloc_bioset +bio_associate_blkg +bio_put +__bitmap_weight +bit_waitqueue +blkdev_issue_discard +blkdev_issue_flush +blk_finish_plug +blk_start_plug +__blockdev_direct_IO +block_invalidatepage +block_is_partially_uptodate +__breadahead +__bread_gfp +__brelse +buffer_migrate_page +capable +capable_wrt_inode_uidgid +__cfi_slowpath +__check_object_size +clear_inode +clear_page_dirty_for_io +complete_and_exit +cpu_hwcap_keys +cpu_hwcaps +create_empty_buffers +current_umask +d_add +d_add_ci +d_instantiate +d_make_root +down_read +down_write +dput +drop_nlink +d_splice_alias +dump_stack +end_buffer_read_sync +end_page_writeback +errseq_set +failure_tracking +fiemap_fill_next_extent +fiemap_prep +filemap_fdatawait_range +filemap_fdatawrite +filemap_flush +__filemap_set_wb_err +filemap_write_and_wait_range +file_remove_privs +file_update_time +file_write_and_wait_range +finish_wait +flush_dcache_page +freezing_slow_path +fs_bio_set +generic_error_remove_page +generic_file_direct_write +generic_file_llseek +generic_file_mmap +generic_file_open +generic_file_read_iter +generic_file_splice_read +generic_fillattr +generic_perform_write +generic_read_dir +generic_write_checks +__getblk_gfp +gic_nonsecure_priorities +grab_cache_page_write_begin +iget5_locked +igrab +ihold +ilookup5 +in_group_p +__init_rwsem +init_wait_entry +__init_waitqueue_head +inode_dio_wait +inode_init_once +inode_newsize_ok +inode_set_flags +__insert_inode_hash +invalidate_bdev +invalidate_mapping_pages +io_schedule +iov_iter_advance +iov_iter_alignment +iov_iter_get_pages +iput +is_bad_inode +iter_file_splice_write +iunique +jiffies +jiffies_to_msecs +kasan_flag_enabled +kfree +kill_block_super +__kmalloc +kmalloc_caches +kmem_cache_alloc +kmem_cache_alloc_trace +kmem_cache_create +kmem_cache_create_usercopy +kmem_cache_destroy +kmem_cache_free +krealloc +kthread_create_on_node +kthread_should_stop +kthread_stop +ktime_get_coarse_real_ts64 +kvfree +__list_add_valid +__list_del_entry_valid +ll_rw_block +load_nls +load_nls_default +__lock_buffer +__lock_page +lru_cache_add +make_bad_inode +mark_buffer_dirty +mark_buffer_write_io_error +__mark_inode_dirty +mark_page_accessed +memcpy +memmove +memset +mktime64 +mnt_drop_write_file +mnt_want_write_file +module_layout +mount_bdev +mpage_readahead +mpage_readpage +__msecs_to_jiffies +__mutex_init +mutex_lock +mutex_trylock +mutex_unlock +new_inode +notify_change +pagecache_get_page +page_cache_next_miss +page_cache_prev_miss +__page_pinner_migration_failed +pagevec_lookup_range_tag +__pagevec_release +__percpu_down_read +preempt_schedule +preempt_schedule_notrace +prepare_to_wait +prepare_to_wait_event +printk +__put_page +put_pages_list +___ratelimit +_raw_read_lock +_raw_read_lock_irqsave +_raw_read_unlock +_raw_read_unlock_irqrestore +_raw_spin_lock +_raw_spin_lock_irqsave +_raw_spin_unlock +_raw_spin_unlock_irqrestore +_raw_write_lock +_raw_write_lock_irqsave +_raw_write_unlock +_raw_write_unlock_irqrestore +rcuwait_wake_up +readahead_gfp_mask +read_cache_page +redirty_page_for_writepage +__refrigerator +register_filesystem +__remove_inode_hash +sb_min_blocksize +sb_set_blocksize +schedule +schedule_timeout_interruptible +seq_printf +setattr_prepare +set_freezable +set_nlink +set_page_dirty +__set_page_dirty_buffers +__set_page_dirty_nobuffers +set_user_nice +simple_strtol +simple_strtoul +simple_strtoull +sprintf +__stack_chk_fail +__stack_chk_guard +strchr +strcmp +strlen +strncasecmp +strncmp +strsep +strstr +submit_bh +submit_bio +__sync_dirty_buffer +sync_dirty_buffer +sync_filesystem +sync_inode_metadata +system_freezing_cnt +sys_tz +tag_pages_for_writeback +__test_set_page_writeback +time64_to_tm +_trace_android_vh_record_pcpu_rwsem_starttime +truncate_inode_pages +truncate_inode_pages_final +truncate_setsize +try_to_writeback_inodes_sb +unload_nls +unlock_buffer +unlock_new_inode +unlock_page +unregister_filesystem +up_read +up_write +vfree +vfs_fsync_range +__vmalloc +vsnprintf +vzalloc +__wait_on_buffer +wait_on_page_bit +wake_bit_function +__wake_up +wake_up_process +__warn_printk +write_inode_now +xa_load diff --git a/build.config.gki.aarch64 b/build.config.gki.aarch64 index 6486341b9aef..738ddee4af33 100644 --- a/build.config.gki.aarch64 +++ b/build.config.gki.aarch64 @@ -32,6 +32,7 @@ android/abi_gki_aarch64_vivo android/abi_gki_aarch64_xiaomi android/abi_gki_aarch64_asus android/abi_gki_aarch64_transsion +android/abi_gki_aarch64_tuxera " FILES="${FILES} From a1806694fcbf922a7fb6e268124734ca86e8db1d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 31 Oct 2022 11:02:45 +0100 Subject: [PATCH 1354/1477] UPSTREAM: media: dvb-core: Fix UAF due to refcount races at releasing commit fd3d91ab1c6ab0628fe642dd570b56302c30a792 upstream. The dvb-core tries to sync the releases of opened files at dvb_dmxdev_release() with two refcounts: dvbdev->users and dvr_dvbdev->users. A problem is present in those two syncs: when yet another dvb_demux_open() is called during those sync waits, dvb_demux_open() continues to process even if the device is being closed. This includes the increment of the former refcount, resulting in the leftover refcount after the sync of the latter refcount at dvb_dmxdev_release(). It ends up with use-after-free, since the function believes that all usages were gone and releases the resources. This patch addresses the problem by adding the check of dmxdev->exit flag at dvb_demux_open(), just like dvb_dvr_open() already does. With the exit flag check, the second call of dvb_demux_open() fails, hence the further corruption can be avoided. Also for avoiding the races of the dmxdev->exit flag reference, this patch serializes the dmxdev->exit set up and the sync waits with the dmxdev->mutex lock at dvb_dmxdev_release(). Without the mutex lock, dvb_demux_open() (or dvb_dvr_open()) may run concurrently with dvb_dmxdev_release(), which allows to skip the exit flag check and continue the open process that is being closed. CVE-2022-41218 is assigned to those bugs above. Bug: 248356119 Reported-by: Hyunwoo Kim Cc: Link: https://lore.kernel.org/20220908132754.30532-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Hans Verkuil Signed-off-by: Greg Kroah-Hartman Signed-off-by: Lee Jones Change-Id: I5a0dd4444ca25d07a050dcb07609c7c7ccf658af --- drivers/media/dvb-core/dmxdev.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/media/dvb-core/dmxdev.c b/drivers/media/dvb-core/dmxdev.c index e58cb8434daf..12b7f698f562 100644 --- a/drivers/media/dvb-core/dmxdev.c +++ b/drivers/media/dvb-core/dmxdev.c @@ -800,6 +800,11 @@ static int dvb_demux_open(struct inode *inode, struct file *file) if (mutex_lock_interruptible(&dmxdev->mutex)) return -ERESTARTSYS; + if (dmxdev->exit) { + mutex_unlock(&dmxdev->mutex); + return -ENODEV; + } + for (i = 0; i < dmxdev->filternum; i++) if (dmxdev->filter[i].state == DMXDEV_STATE_FREE) break; @@ -1458,7 +1463,10 @@ EXPORT_SYMBOL(dvb_dmxdev_init); void dvb_dmxdev_release(struct dmxdev *dmxdev) { + mutex_lock(&dmxdev->mutex); dmxdev->exit = 1; + mutex_unlock(&dmxdev->mutex); + if (dmxdev->dvbdev->users > 1) { wait_event(dmxdev->dvbdev->wait_queue, dmxdev->dvbdev->users == 1); From f069ba2b3db07a07491299ebbd19ee33475d5609 Mon Sep 17 00:00:00 2001 From: Yunfei Wang Date: Mon, 16 Jan 2023 14:20:03 +0800 Subject: [PATCH 1355/1477] UPSTREAM: iommu/iova: Fix alloc iova overflows issue In __alloc_and_insert_iova_range, there is an issue that retry_pfn overflows. The value of iovad->anchor.pfn_hi is ~0UL, then when iovad->cached_node is iovad->anchor, curr_iova->pfn_hi + 1 will overflow. As a result, if the retry logic is executed, low_pfn is updated to 0, and then new_pfn < low_pfn returns false to make the allocation successful. This issue occurs in the following two situations: 1. The first iova size exceeds the domain size. When initializing iova domain, iovad->cached_node is assigned as iovad->anchor. For example, the iova domain size is 10M, start_pfn is 0x1_F000_0000, and the iova size allocated for the first time is 11M. The following is the log information, new->pfn_lo is smaller than iovad->cached_node. Example log as follows: [ 223.798112][T1705487] sh: [name:iova&]__alloc_and_insert_iova_range start_pfn:0x1f0000,retry_pfn:0x0,size:0xb00,limit_pfn:0x1f0a00 [ 223.799590][T1705487] sh: [name:iova&]__alloc_and_insert_iova_range success start_pfn:0x1f0000,new->pfn_lo:0x1efe00,new->pfn_hi:0x1f08ff 2. The node with the largest iova->pfn_lo value in the iova domain is deleted, iovad->cached_node will be updated to iovad->anchor, and then the alloc iova size exceeds the maximum iova size that can be allocated in the domain. After judging that retry_pfn is less than limit_pfn, call retry_pfn+1 to fix the overflow issue. Signed-off-by: jianjiao zeng Signed-off-by: Yunfei Wang Cc: # 5.15.* Fixes: 4e89dce72521 ("iommu/iova: Retry from last rb tree node if iova search fails") Acked-by: Robin Murphy Link: https://lore.kernel.org/r/20230111063801.25107-1-yf.wang@mediatek.com Signed-off-by: Joerg Roedel Bug: 265596669 (cherry picked from commit dcdb3ba7e2a8caae7bfefd603bc22fd0ce9a389c) Change-Id: I810ad7023267c7c276961eefbceb652439799c30 [Yunfei: Resolved minor conflict for variable low_pfn_new] Signed-off-by: Yunfei Wang --- drivers/iommu/iova.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 53994947ecf7..95c9f79b8c1f 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -218,7 +218,7 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad, curr = __get_cached_rbnode(iovad, limit_pfn); curr_iova = rb_entry(curr, struct iova, node); - low_pfn_new = curr_iova->pfn_hi + 1; + low_pfn_new = curr_iova->pfn_hi; retry: do { @@ -232,7 +232,7 @@ retry: if (high_pfn < size || new_pfn < low_pfn) { if (low_pfn == iovad->start_pfn && low_pfn_new < limit_pfn) { high_pfn = limit_pfn; - low_pfn = low_pfn_new; + low_pfn = low_pfn_new + 1; curr = &iovad->anchor.node; curr_iova = rb_entry(curr, struct iova, node); goto retry; From 246a996565762c80c0fdc2a196c028f995caa182 Mon Sep 17 00:00:00 2001 From: Sungjong Seo Date: Tue, 24 May 2022 10:29:11 +0900 Subject: [PATCH 1356/1477] BACKPORT: f2fs: allow compression for mmap files in compress_mode=user Since commit e3c548323d32 ("f2fs: let's allow compression for mmap files"), it has been allowed to compress mmap files. However, in compress_mode=user, it is not allowed yet. To keep the same concept in both compress_modes, f2fs_ioc_(de)compress_file() should also allow it. Let's remove checking mmap files in f2fs_ioc_(de)compress_file() so that the compression for mmap files is also allowed in compress_mode=user. Bug: 228919347 Bug: 267580491 Signed-off-by: Sungjong Seo Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim (cherry picked from commit 77142c3cf71f09dcf3fb2ef43926a29e2cfd2ae3) Change-Id: I31798dfb851be6ee7e5156be8c46fe007583ee55 (cherry picked from commit d85467fdca7c61be37cfd65434dffba3fb4d978d) --- fs/f2fs/file.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index ef49b3684c0c..eb85dcad108c 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -4121,11 +4121,6 @@ static int f2fs_ioc_decompress_file(struct file *filp, unsigned long arg) goto out; } - if (f2fs_is_mmap_file(inode)) { - ret = -EBUSY; - goto out; - } - ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); if (ret) goto out; @@ -4193,11 +4188,6 @@ static int f2fs_ioc_compress_file(struct file *filp, unsigned long arg) goto out; } - if (f2fs_is_mmap_file(inode)) { - ret = -EBUSY; - goto out; - } - ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); if (ret) goto out; From ce72626280e65048aca5a0ad22cb3b769f85f894 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Mon, 20 Jun 2022 10:38:42 -0700 Subject: [PATCH 1357/1477] BACKPORT: f2fs: introduce memory mode Introduce memory mode to supports "normal" and "low" memory modes. "low" mode is to support low memory devices. Because of the nature of low memory devices, in this mode, f2fs will try to save memory sometimes by sacrificing performance. "normal" mode is the default mode and same as before. Bug: 232003054 Bug: 267580491 Signed-off-by: Daeho Jeong Signed-off-by: Jaegeuk Kim (cherry picked from commit 60f60d1fd86a) Change-Id: I7cb719b18f0002d7af47f7a18e8ec2f4c534bdd9 --- Documentation/filesystems/f2fs.rst | 5 +++++ fs/f2fs/f2fs.h | 11 +++++++++++ fs/f2fs/super.c | 25 +++++++++++++++++++++++++ 3 files changed, 41 insertions(+) diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst index f122a2b7c7d1..532c59becb15 100644 --- a/Documentation/filesystems/f2fs.rst +++ b/Documentation/filesystems/f2fs.rst @@ -300,6 +300,11 @@ inlinecrypt When possible, encrypt/decrypt the contents of encrypted Documentation/block/inline-encryption.rst. atgc Enable age-threshold garbage collection, it provides high effectiveness and efficiency on background GC. +memory=%s Control memory mode. This supports "normal" and "low" modes. + "low" mode is introduced to support low memory devices. + Because of the nature of low memory devices, in this mode, f2fs + will try to save memory sometimes by sacrificing performance. + "normal" mode is the default mode and same as before. age_extent_cache Enable an age extent cache based on rb-tree. It records data block update frequency of the extent per inode, in order to provide better temperature hints for data block diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 86abbb1a6f1d..1a1d164a4e24 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -152,6 +152,7 @@ struct f2fs_mount_info { int fsync_mode; /* fsync policy */ int fs_mode; /* fs mode: LFS or ADAPTIVE */ int bggc_mode; /* bggc mode: off, on or sync */ + int memory_mode; /* memory mode */ struct fscrypt_dummy_policy dummy_enc_policy; /* test dummy encryption */ block_t unusable_cap_perc; /* percentage for cap */ block_t unusable_cap; /* Amount of space allowed to be @@ -1331,6 +1332,11 @@ enum { */ }; +enum { + MEMORY_MODE_NORMAL, /* memory mode for normal devices */ + MEMORY_MODE_LOW, /* memory mode for low memry devices */ +}; + static inline int f2fs_test_bit(unsigned int nr, char *addr); static inline void f2fs_set_bit(unsigned int nr, char *addr); static inline void f2fs_clear_bit(unsigned int nr, char *addr); @@ -4403,6 +4409,11 @@ static inline bool f2fs_lfs_mode(struct f2fs_sb_info *sbi) return F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS; } +static inline bool f2fs_low_mem_mode(struct f2fs_sb_info *sbi) +{ + return F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_LOW; +} + static inline bool f2fs_may_compress(struct inode *inode) { if (IS_SWAPFILE(inode) || f2fs_is_pinned_file(inode) || diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 9650528d8f65..75340cdaf9d8 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -154,6 +154,7 @@ enum { Opt_atgc, Opt_gc_merge, Opt_nogc_merge, + Opt_memory_mode, Opt_age_extent_cache, Opt_err, }; @@ -230,6 +231,7 @@ static match_table_t f2fs_tokens = { {Opt_atgc, "atgc"}, {Opt_gc_merge, "gc_merge"}, {Opt_nogc_merge, "nogc_merge"}, + {Opt_memory_mode, "memory=%s"}, {Opt_age_extent_cache, "age_extent_cache"}, {Opt_err, NULL}, }; @@ -1153,6 +1155,22 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) case Opt_age_extent_cache: set_opt(sbi, AGE_EXTENT_CACHE); break; + case Opt_memory_mode: + name = match_strdup(&args[0]); + if (!name) + return -ENOMEM; + if (!strcmp(name, "normal")) { + F2FS_OPTION(sbi).memory_mode = + MEMORY_MODE_NORMAL; + } else if (!strcmp(name, "low")) { + F2FS_OPTION(sbi).memory_mode = + MEMORY_MODE_LOW; + } else { + kfree(name); + return -EINVAL; + } + kfree(name); + break; default: f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value", p); @@ -1901,6 +1919,12 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) if (test_opt(sbi, ATGC)) seq_puts(seq, ",atgc"); + + if (F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_NORMAL) + seq_printf(seq, ",memory=%s", "normal"); + else if (F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_LOW) + seq_printf(seq, ",memory=%s", "low"); + return 0; } @@ -1923,6 +1947,7 @@ static void default_options(struct f2fs_sb_info *sbi) F2FS_OPTION(sbi).compress_ext_cnt = 0; F2FS_OPTION(sbi).compress_mode = COMPR_MODE_FS; F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON; + F2FS_OPTION(sbi).memory_mode = MEMORY_MODE_NORMAL; sbi->sb->s_flags &= ~SB_INLINECRYPT; From 16996773d6de4939f6b7c85be9f97ad96265a67a Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Tue, 2 Aug 2022 12:24:37 -0700 Subject: [PATCH 1358/1477] BACKPORT: f2fs: handle decompress only post processing in softirq Now decompression is being handled in workqueue and it makes read I/O latency non-deterministic, because of the non-deterministic scheduling nature of workqueues. So, I made it handled in softirq context only if possible, not in low memory devices, since this modification will maintain decompresion related memory a little longer. Bug: 232003054 Bug: 267580491 Signed-off-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Change-Id: I1a7c642e05c2f8544d475039b733403181de641e (cherry picked from commit 9ef8cd45d7a9) --- fs/f2fs/compress.c | 199 ++++++++++++++++++++++++++++++--------------- fs/f2fs/data.c | 52 ++++++++---- fs/f2fs/f2fs.h | 17 ++-- 3 files changed, 177 insertions(+), 91 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 3116b38c2f6b..c448eaa08cf2 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -737,14 +737,19 @@ out: return ret; } -void f2fs_decompress_cluster(struct decompress_io_ctx *dic) +static int f2fs_prepare_decomp_mem(struct decompress_io_ctx *dic, + bool pre_alloc); +static void f2fs_release_decomp_mem(struct decompress_io_ctx *dic, + bool bypass_destroy_callback, bool pre_alloc); + +void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task) { struct f2fs_sb_info *sbi = F2FS_I_SB(dic->inode); struct f2fs_inode_info *fi = F2FS_I(dic->inode); const struct f2fs_compress_ops *cops = f2fs_cops[fi->i_compress_algorithm]; + bool bypass_callback = false; int ret; - int i; trace_f2fs_decompress_pages_start(dic->inode, dic->cluster_idx, dic->cluster_size, fi->i_compress_algorithm); @@ -754,41 +759,10 @@ void f2fs_decompress_cluster(struct decompress_io_ctx *dic) goto out_end_io; } - dic->tpages = page_array_alloc(dic->inode, dic->cluster_size); - if (!dic->tpages) { - ret = -ENOMEM; - goto out_end_io; - } - - for (i = 0; i < dic->cluster_size; i++) { - if (dic->rpages[i]) { - dic->tpages[i] = dic->rpages[i]; - continue; - } - - dic->tpages[i] = f2fs_compress_alloc_page(); - if (!dic->tpages[i]) { - ret = -ENOMEM; - goto out_end_io; - } - } - - if (cops->init_decompress_ctx) { - ret = cops->init_decompress_ctx(dic); - if (ret) - goto out_end_io; - } - - dic->rbuf = f2fs_vmap(dic->tpages, dic->cluster_size); - if (!dic->rbuf) { - ret = -ENOMEM; - goto out_destroy_decompress_ctx; - } - - dic->cbuf = f2fs_vmap(dic->cpages, dic->nr_cpages); - if (!dic->cbuf) { - ret = -ENOMEM; - goto out_vunmap_rbuf; + ret = f2fs_prepare_decomp_mem(dic, false); + if (ret) { + bypass_callback = true; + goto out_release; } dic->clen = le32_to_cpu(dic->cbuf->clen); @@ -796,7 +770,7 @@ void f2fs_decompress_cluster(struct decompress_io_ctx *dic) if (dic->clen > PAGE_SIZE * dic->nr_cpages - COMPRESS_HEADER_SIZE) { ret = -EFSCORRUPTED; - goto out_vunmap_cbuf; + goto out_release; } ret = cops->decompress_pages(dic); @@ -817,17 +791,13 @@ void f2fs_decompress_cluster(struct decompress_io_ctx *dic) } } -out_vunmap_cbuf: - vm_unmap_ram(dic->cbuf, dic->nr_cpages); -out_vunmap_rbuf: - vm_unmap_ram(dic->rbuf, dic->cluster_size); -out_destroy_decompress_ctx: - if (cops->destroy_decompress_ctx) - cops->destroy_decompress_ctx(dic); +out_release: + f2fs_release_decomp_mem(dic, bypass_callback, false); + out_end_io: trace_f2fs_decompress_pages_end(dic->inode, dic->cluster_idx, dic->clen, ret); - f2fs_decompress_end_io(dic, ret); + f2fs_decompress_end_io(dic, ret, in_task); } /* @@ -837,7 +807,7 @@ out_end_io: * (or in the case of a failure, cleans up without actually decompressing). */ void f2fs_end_read_compressed_page(struct page *page, bool failed, - block_t blkaddr) + block_t blkaddr, bool in_task) { struct decompress_io_ctx *dic = (struct decompress_io_ctx *)page_private(page); @@ -847,12 +817,12 @@ void f2fs_end_read_compressed_page(struct page *page, bool failed, if (failed) WRITE_ONCE(dic->failed, true); - else if (blkaddr) + else if (blkaddr && in_task) f2fs_cache_compressed_page(sbi, page, dic->inode->i_ino, blkaddr); if (atomic_dec_and_test(&dic->remaining_pages)) - f2fs_decompress_cluster(dic); + f2fs_decompress_cluster(dic, in_task); } static bool is_page_in_cluster(struct compress_ctx *cc, pgoff_t index) @@ -1492,13 +1462,82 @@ destroy_out: return err; } -static void f2fs_free_dic(struct decompress_io_ctx *dic); +static inline bool allow_memalloc_for_decomp(struct f2fs_sb_info *sbi, + bool pre_alloc) +{ + return pre_alloc ^ f2fs_low_mem_mode(sbi); +} + +static int f2fs_prepare_decomp_mem(struct decompress_io_ctx *dic, + bool pre_alloc) +{ + const struct f2fs_compress_ops *cops = + f2fs_cops[F2FS_I(dic->inode)->i_compress_algorithm]; + int i; + + if (!allow_memalloc_for_decomp(F2FS_I_SB(dic->inode), pre_alloc)) + return 0; + + dic->tpages = page_array_alloc(dic->inode, dic->cluster_size); + if (!dic->tpages) + return -ENOMEM; + + for (i = 0; i < dic->cluster_size; i++) { + if (dic->rpages[i]) { + dic->tpages[i] = dic->rpages[i]; + continue; + } + + dic->tpages[i] = f2fs_compress_alloc_page(); + if (!dic->tpages[i]) + return -ENOMEM; + } + + dic->rbuf = f2fs_vmap(dic->tpages, dic->cluster_size); + if (!dic->rbuf) + return -ENOMEM; + + dic->cbuf = f2fs_vmap(dic->cpages, dic->nr_cpages); + if (!dic->cbuf) + return -ENOMEM; + + if (cops->init_decompress_ctx) { + int ret = cops->init_decompress_ctx(dic); + + if (ret) + return ret; + } + + return 0; +} + +static void f2fs_release_decomp_mem(struct decompress_io_ctx *dic, + bool bypass_destroy_callback, bool pre_alloc) +{ + const struct f2fs_compress_ops *cops = + f2fs_cops[F2FS_I(dic->inode)->i_compress_algorithm]; + + if (!allow_memalloc_for_decomp(F2FS_I_SB(dic->inode), pre_alloc)) + return; + + if (!bypass_destroy_callback && cops->destroy_decompress_ctx) + cops->destroy_decompress_ctx(dic); + + if (dic->cbuf) + vm_unmap_ram(dic->cbuf, dic->nr_cpages); + + if (dic->rbuf) + vm_unmap_ram(dic->rbuf, dic->cluster_size); +} + +static void f2fs_free_dic(struct decompress_io_ctx *dic, + bool bypass_destroy_callback); struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc) { struct decompress_io_ctx *dic; pgoff_t start_idx = start_idx_of_cluster(cc); - int i; + int i, ret; dic = kmem_cache_zalloc(dic_entry_slab, GFP_NOFS); if (!dic) @@ -1526,32 +1565,43 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc) dic->nr_rpages = cc->cluster_size; dic->cpages = page_array_alloc(dic->inode, dic->nr_cpages); - if (!dic->cpages) + if (!dic->cpages) { + ret = -ENOMEM; goto out_free; + } for (i = 0; i < dic->nr_cpages; i++) { struct page *page; page = f2fs_compress_alloc_page(); - if (!page) + if (!page) { + ret = -ENOMEM; goto out_free; + } f2fs_set_compressed_page(page, cc->inode, start_idx + i + 1, dic); dic->cpages[i] = page; } + ret = f2fs_prepare_decomp_mem(dic, true); + if (ret) + goto out_free; + return dic; out_free: - f2fs_free_dic(dic); - return ERR_PTR(-ENOMEM); + f2fs_free_dic(dic, true); + return ERR_PTR(ret); } -static void f2fs_free_dic(struct decompress_io_ctx *dic) +static void f2fs_free_dic(struct decompress_io_ctx *dic, + bool bypass_destroy_callback) { int i; + f2fs_release_decomp_mem(dic, bypass_destroy_callback, true); + if (dic->tpages) { for (i = 0; i < dic->cluster_size; i++) { if (dic->rpages[i]) @@ -1576,17 +1626,33 @@ static void f2fs_free_dic(struct decompress_io_ctx *dic) kmem_cache_free(dic_entry_slab, dic); } -static void f2fs_put_dic(struct decompress_io_ctx *dic) +static void f2fs_late_free_dic(struct work_struct *work) { - if (refcount_dec_and_test(&dic->refcnt)) - f2fs_free_dic(dic); + struct decompress_io_ctx *dic = + container_of(work, struct decompress_io_ctx, free_work); + + f2fs_free_dic(dic, false); +} + +static void f2fs_put_dic(struct decompress_io_ctx *dic, bool in_task) +{ + if (refcount_dec_and_test(&dic->refcnt)) { + if (in_task) { + f2fs_free_dic(dic, false); + } else { + INIT_WORK(&dic->free_work, f2fs_late_free_dic); + queue_work(F2FS_I_SB(dic->inode)->post_read_wq, + &dic->free_work); + } + } } /* * Update and unlock the cluster's pagecache pages, and release the reference to * the decompress_io_ctx that was being held for I/O completion. */ -static void __f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed) +static void __f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed, + bool in_task) { int i; @@ -1607,7 +1673,7 @@ static void __f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed) unlock_page(rpage); } - f2fs_put_dic(dic); + f2fs_put_dic(dic, in_task); } static void f2fs_verify_cluster(struct work_struct *work) @@ -1624,14 +1690,15 @@ static void f2fs_verify_cluster(struct work_struct *work) SetPageError(rpage); } - __f2fs_decompress_end_io(dic, false); + __f2fs_decompress_end_io(dic, false, true); } /* * This is called when a compressed cluster has been decompressed * (or failed to be read and/or decompressed). */ -void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed) +void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed, + bool in_task) { if (!failed && dic->need_verity) { /* @@ -1643,7 +1710,7 @@ void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed) INIT_WORK(&dic->verity_work, f2fs_verify_cluster); fsverity_enqueue_verify_work(&dic->verity_work); } else { - __f2fs_decompress_end_io(dic, failed); + __f2fs_decompress_end_io(dic, failed, in_task); } } @@ -1652,12 +1719,12 @@ void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed) * * This is called when the page is no longer needed and can be freed. */ -void f2fs_put_page_dic(struct page *page) +void f2fs_put_page_dic(struct page *page, bool in_task) { struct decompress_io_ctx *dic = (struct decompress_io_ctx *)page_private(page); - f2fs_put_dic(dic); + f2fs_put_dic(dic, in_task); } /* diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index bc56bdd3e38b..17ead29e21ab 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -119,7 +119,7 @@ struct bio_post_read_ctx { unsigned int enabled_steps; }; -static void f2fs_finish_read_bio(struct bio *bio) +static void f2fs_finish_read_bio(struct bio *bio, bool in_task) { struct bio_vec *bv; struct bvec_iter_all iter_all; @@ -133,8 +133,9 @@ static void f2fs_finish_read_bio(struct bio *bio) if (f2fs_is_compressed_page(page)) { if (bio->bi_status) - f2fs_end_read_compressed_page(page, true, 0); - f2fs_put_page_dic(page); + f2fs_end_read_compressed_page(page, true, 0, + in_task); + f2fs_put_page_dic(page, in_task); continue; } @@ -191,7 +192,7 @@ static void f2fs_verify_bio(struct work_struct *work) fsverity_verify_bio(bio); } - f2fs_finish_read_bio(bio); + f2fs_finish_read_bio(bio, true); } /* @@ -203,7 +204,7 @@ static void f2fs_verify_bio(struct work_struct *work) * can involve reading verity metadata pages from the file, and these verity * metadata pages may be encrypted and/or compressed. */ -static void f2fs_verify_and_finish_bio(struct bio *bio) +static void f2fs_verify_and_finish_bio(struct bio *bio, bool in_task) { struct bio_post_read_ctx *ctx = bio->bi_private; @@ -211,7 +212,7 @@ static void f2fs_verify_and_finish_bio(struct bio *bio) INIT_WORK(&ctx->work, f2fs_verify_bio); fsverity_enqueue_verify_work(&ctx->work); } else { - f2fs_finish_read_bio(bio); + f2fs_finish_read_bio(bio, in_task); } } @@ -224,7 +225,8 @@ static void f2fs_verify_and_finish_bio(struct bio *bio) * that the bio includes at least one compressed page. The actual decompression * is done on a per-cluster basis, not a per-bio basis. */ -static void f2fs_handle_step_decompress(struct bio_post_read_ctx *ctx) +static void f2fs_handle_step_decompress(struct bio_post_read_ctx *ctx, + bool in_task) { struct bio_vec *bv; struct bvec_iter_all iter_all; @@ -237,7 +239,7 @@ static void f2fs_handle_step_decompress(struct bio_post_read_ctx *ctx) /* PG_error was set if decryption failed. */ if (f2fs_is_compressed_page(page)) f2fs_end_read_compressed_page(page, PageError(page), - blkaddr); + blkaddr, in_task); else all_compressed = false; @@ -262,15 +264,16 @@ static void f2fs_post_read_work(struct work_struct *work) fscrypt_decrypt_bio(ctx->bio); if (ctx->enabled_steps & STEP_DECOMPRESS) - f2fs_handle_step_decompress(ctx); + f2fs_handle_step_decompress(ctx, true); - f2fs_verify_and_finish_bio(ctx->bio); + f2fs_verify_and_finish_bio(ctx->bio, true); } static void f2fs_read_end_io(struct bio *bio) { struct f2fs_sb_info *sbi = F2FS_P_SB(bio_first_page_all(bio)); struct bio_post_read_ctx *ctx = bio->bi_private; + bool intask = in_task(); if (time_to_inject(sbi, FAULT_READ_IO)) { f2fs_show_injection_info(sbi, FAULT_READ_IO); @@ -278,16 +281,29 @@ static void f2fs_read_end_io(struct bio *bio) } if (bio->bi_status) { - f2fs_finish_read_bio(bio); + f2fs_finish_read_bio(bio, intask); return; } - if (ctx && (ctx->enabled_steps & (STEP_DECRYPT | STEP_DECOMPRESS))) { - INIT_WORK(&ctx->work, f2fs_post_read_work); - queue_work(ctx->sbi->post_read_wq, &ctx->work); - } else { - f2fs_verify_and_finish_bio(bio); + if (ctx) { + unsigned int enabled_steps = ctx->enabled_steps & + (STEP_DECRYPT | STEP_DECOMPRESS); + + /* + * If we have only decompression step between decompression and + * decrypt, we don't need post processing for this. + */ + if (enabled_steps == STEP_DECOMPRESS && + !f2fs_low_mem_mode(sbi)) { + f2fs_handle_step_decompress(ctx, intask); + } else if (enabled_steps) { + INIT_WORK(&ctx->work, f2fs_post_read_work); + queue_work(ctx->sbi->post_read_wq, &ctx->work); + return; + } } + + f2fs_verify_and_finish_bio(bio, intask); } static void f2fs_write_end_io(struct bio *bio) @@ -2249,7 +2265,7 @@ skip_reading_dnode: if (f2fs_load_compressed_page(sbi, page, blkaddr)) { if (atomic_dec_and_test(&dic->remaining_pages)) - f2fs_decompress_cluster(dic); + f2fs_decompress_cluster(dic, true); continue; } @@ -2267,7 +2283,7 @@ submit_and_realloc: page->index, for_write); if (IS_ERR(bio)) { ret = PTR_ERR(bio); - f2fs_decompress_end_io(dic, ret); + f2fs_decompress_end_io(dic, ret, true); f2fs_put_dnode(&dn); *bio_ret = NULL; return ret; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 1a1d164a4e24..8a7d1802a473 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1556,6 +1556,7 @@ struct decompress_io_ctx { void *private; /* payload buffer for specified decompression algorithm */ void *private2; /* extra payload buffer */ struct work_struct verity_work; /* work to verify the decompressed pages */ + struct work_struct free_work; /* work for late free this structure itself */ }; #define NULL_CLUSTER ((unsigned int)(~0)) @@ -4187,9 +4188,9 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page); bool f2fs_is_compress_backend_ready(struct inode *inode); int f2fs_init_compress_mempool(void); void f2fs_destroy_compress_mempool(void); -void f2fs_decompress_cluster(struct decompress_io_ctx *dic); +void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task); void f2fs_end_read_compressed_page(struct page *page, bool failed, - block_t blkaddr); + block_t blkaddr, bool in_task); bool f2fs_cluster_is_empty(struct compress_ctx *cc); bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index); void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct page *page); @@ -4205,8 +4206,9 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, unsigned nr_pages, sector_t *last_block_in_bio, bool is_readahead, bool for_write); struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc); -void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed); -void f2fs_put_page_dic(struct page *page); +void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed, + bool in_task); +void f2fs_put_page_dic(struct page *page, bool in_task); unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn); int f2fs_init_compress_ctx(struct compress_ctx *cc); void f2fs_destroy_compress_ctx(struct compress_ctx *cc, bool reuse); @@ -4252,13 +4254,14 @@ static inline struct page *f2fs_compress_control_page(struct page *page) } static inline int f2fs_init_compress_mempool(void) { return 0; } static inline void f2fs_destroy_compress_mempool(void) { } -static inline void f2fs_decompress_cluster(struct decompress_io_ctx *dic) { } +static inline void f2fs_decompress_cluster(struct decompress_io_ctx *dic, + bool in_task) { } static inline void f2fs_end_read_compressed_page(struct page *page, - bool failed, block_t blkaddr) + bool failed, block_t blkaddr, bool in_task) { WARN_ON_ONCE(1); } -static inline void f2fs_put_page_dic(struct page *page) +static inline void f2fs_put_page_dic(struct page *page, bool in_task) { WARN_ON_ONCE(1); } From a20fd832a4cc10940a9cc61a28a03f57a4342e6c Mon Sep 17 00:00:00 2001 From: Jaewook Kim Date: Wed, 3 Aug 2022 17:53:58 +0900 Subject: [PATCH 1359/1477] BACKPORT: f2fs: do not allow to decompress files have FI_COMPRESS_RELEASED If a file has FI_COMPRESS_RELEASED, all writes for it should not be allowed. However, as of now, in case of compress_mode=user, writes triggered by IOCTLs like F2FS_IOC_DE/COMPRESS_FILE are allowed unexpectly, which could crash that file. To fix it, let's do not allow F2FS_IOC_DE/COMPRESS_IOCTL if a file already has FI_COMPRESS_RELEASED flag. This is the reproduction process: 1. $ touch ./file 2. $ chattr +c ./file 3. $ dd if=/dev/random of=./file bs=4096 count=30 conv=notrunc 4. $ dd if=/dev/zero of=./file bs=4096 count=34 seek=30 conv=notrunc 5. $ sync 6. $ do_compress ./file ; call F2FS_IOC_COMPRESS_FILE 7. $ get_compr_blocks ./file ; call F2FS_IOC_GET_COMPRESS_BLOCKS 8. $ release ./file ; call F2FS_IOC_RELEASE_COMPRESS_BLOCKS 9. $ do_compress ./file ; call F2FS_IOC_COMPRESS_FILE again 10. $ get_compr_blocks ./file ; call F2FS_IOC_GET_COMPRESS_BLOCKS again This reproduction process is tested in 128kb cluster size. You can find compr_blocks has a negative value. Fixes: 5fdb322ff2c2b ("f2fs: add F2FS_IOC_DECOMPRESS_FILE and F2FS_IOC_COMPRESS_FILE") Bug: 228919347 Bug: 267580491 Signed-off-by: Junbeom Yeom Signed-off-by: Sungjong Seo Signed-off-by: Youngjin Gil Signed-off-by: Jaewook Kim Signed-off-by: Jaegeuk Kim (cherry picked from commit 854f8871ed48229d217edd7cb023f749e4a0817b) Change-Id: I086e4d88da68574ef793740d19ecbb2d4ad6ef1d --- fs/f2fs/file.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index eb85dcad108c..074cd1e83f5f 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -4121,6 +4121,11 @@ static int f2fs_ioc_decompress_file(struct file *filp, unsigned long arg) goto out; } + if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { + ret = -EINVAL; + goto out; + } + ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); if (ret) goto out; @@ -4188,6 +4193,11 @@ static int f2fs_ioc_compress_file(struct file *filp, unsigned long arg) goto out; } + if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { + ret = -EINVAL; + goto out; + } + ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); if (ret) goto out; From 9e620f2b545cd2bbeefb40e8ee0b71443e8c9de3 Mon Sep 17 00:00:00 2001 From: Szymon Heidrich Date: Tue, 6 Dec 2022 15:13:01 +0100 Subject: [PATCH 1360/1477] UPSTREAM: usb: gadget: uvc: Prevent buffer overflow in setup handler commit 4c92670b16727365699fe4b19ed32013bab2c107 upstream. Setup function uvc_function_setup permits control transfer requests with up to 64 bytes of payload (UVC_MAX_REQUEST_SIZE), data stage handler for OUT transfer uses memcpy to copy req->actual bytes to uvc_event->data.data array of size 60. This may result in an overflow of 4 bytes. Fixes: cdda479f15cd ("USB gadget: video class function driver") Cc: stable Reviewed-by: Laurent Pinchart Reviewed-by: Daniel Scally Change-Id: I397d2362464e0ffa57950871a956ba1529b86dd1 Signed-off-by: Szymon Heidrich Link: https://lore.kernel.org/r/20221206141301.51305-1-szymon.heidrich@gmail.com Signed-off-by: Greg Kroah-Hartman (cherry picked from commit c79538f32df12887f110dcd6b9c825b482905f24) Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_uvc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/function/f_uvc.c b/drivers/usb/gadget/function/f_uvc.c index 1fc00cce83fb..ba13d8997db8 100644 --- a/drivers/usb/gadget/function/f_uvc.c +++ b/drivers/usb/gadget/function/f_uvc.c @@ -213,8 +213,9 @@ uvc_function_ep0_complete(struct usb_ep *ep, struct usb_request *req) memset(&v4l2_event, 0, sizeof(v4l2_event)); v4l2_event.type = UVC_EVENT_DATA; - uvc_event->data.length = req->actual; - memcpy(&uvc_event->data.data, req->buf, req->actual); + uvc_event->data.length = min_t(unsigned int, req->actual, + sizeof(uvc_event->data.data)); + memcpy(&uvc_event->data.data, req->buf, uvc_event->data.length); v4l2_event_queue(&uvc->vdev, &v4l2_event); } } From 34d4848ba391419bccb56ed13178c67adf3b8edf Mon Sep 17 00:00:00 2001 From: Duke Xin Date: Sat, 19 Nov 2022 17:44:47 +0800 Subject: [PATCH 1361/1477] UPSTREAM: USB: serial: option: add Quectel EM05-G modem commit f0052d7a1edb3d8921b4e154aa8c46c4845b3714 upstream. The EM05-G modem has 2 USB configurations that are configurable via the AT command AT+QCFG="usbnet",[ 0 | 2 ] which make the modem enumerate with the following interfaces, respectively: "RMNET" : AT + DIAG + NMEA + Modem + QMI "MBIM" : MBIM + AT + DIAG + NMEA + Modem The detailed description of the USB configuration for each mode as follows: RMNET Mode -------------- T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 21 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=0311 Rev= 3.18 S: Manufacturer=Quectel S: Product=Quectel EM05-G C:* #Ifs= 5 Cfg#= 1 Atr=a0 MxPwr=500mA I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 6 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) E: Ad=89(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms MBIM Mode -------------- T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 16 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=0311 Rev= 3.18 S: Manufacturer=Quectel S: Product=Quectel EM05-G C:* #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA A: FirstIf#= 0 IfCount= 2 Cls=02(comm.) Sub=0e Prot=00 I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=89(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I:* If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms Change-Id: I752f10756ec16da0fedd3f9f0726bec460cbdcb9 Signed-off-by: Duke Xin Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 398215f78338e79a2b6352896c902c9461b435f1) Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 537ef276c78f..5636b8f52216 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -255,6 +255,7 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_PRODUCT_EP06 0x0306 #define QUECTEL_PRODUCT_EM05G 0x030a #define QUECTEL_PRODUCT_EM060K 0x030b +#define QUECTEL_PRODUCT_EM05G_SG 0x0311 #define QUECTEL_PRODUCT_EM12 0x0512 #define QUECTEL_PRODUCT_RM500Q 0x0800 #define QUECTEL_PRODUCT_RM520N 0x0801 @@ -1160,6 +1161,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0, 0) }, { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G, 0xff), .driver_info = RSVD(6) | ZLP }, + { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G_SG, 0xff), + .driver_info = RSVD(6) | ZLP }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K, 0xff, 0x00, 0x40) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K, 0xff, 0xff, 0x40) }, From 190b01ac50d63f56f47ee44484df3e5f6aff9d13 Mon Sep 17 00:00:00 2001 From: Bruno Thomsen Date: Sun, 27 Nov 2022 18:08:11 +0100 Subject: [PATCH 1362/1477] UPSTREAM: USB: serial: cp210x: add Kamstrup RF sniffer PIDs commit e88906b169ebcb8046e8f0ad76edd09ab41cfdfe upstream. The RF sniffers are based on cp210x where the RF frontends are based on a different USB stack. RF sniffers can analyze packets meta data including power level and perform packet injection. Can be used to perform RF frontend self-test when connected to a concentrator, ex. arch/arm/boot/dts/imx7d-flex-concentrator.dts Change-Id: Ie15ef418664bb91bb643297696bd3a1b6213eb95 Signed-off-by: Bruno Thomsen Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 9895ce5ea283384bc9374dc1b218f1811b16c4aa) Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/cp210x.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 6b5ba6180c30..8a4a0d4dbc13 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -199,6 +199,8 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x16DC, 0x0015) }, /* W-IE-NE-R Plein & Baus GmbH CML Control, Monitoring and Data Logger */ { USB_DEVICE(0x17A8, 0x0001) }, /* Kamstrup Optical Eye/3-wire */ { USB_DEVICE(0x17A8, 0x0005) }, /* Kamstrup M-Bus Master MultiPort 250D */ + { USB_DEVICE(0x17A8, 0x0011) }, /* Kamstrup 444 MHz RF sniffer */ + { USB_DEVICE(0x17A8, 0x0013) }, /* Kamstrup 870 MHz RF sniffer */ { USB_DEVICE(0x17A8, 0x0101) }, /* Kamstrup 868 MHz wM-Bus C-Mode Meter Reader (Int Ant) */ { USB_DEVICE(0x17A8, 0x0102) }, /* Kamstrup 868 MHz wM-Bus C-Mode Meter Reader (Ext Ant) */ { USB_DEVICE(0x17F4, 0xAAAA) }, /* Wavesense Jazz blood glucose meter */ From d81b6e6e8842a0c4a90c2ca62e6e56eb335e63e2 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 29 Nov 2022 15:17:49 +0100 Subject: [PATCH 1363/1477] UPSTREAM: USB: serial: f81232: fix division by zero on line-speed change commit a08ca6ebafe615c9028c53fc4c9e6c9b2b1f2888 upstream. The driver leaves the line speed unchanged in case a requested speed is not supported. Make sure to handle the case where the current speed is B0 (hangup) without dividing by zero when determining the clock source. Fixes: 268ddb5e9b62 ("USB: serial: f81232: add high baud rate support") Cc: stable@vger.kernel.org # 5.2 Cc: Ji-Ze Hong (Peter Hong) Reviewed-by: Greg Kroah-Hartman Change-Id: Ic145700e95cd93f23fa8574f695a967f0839b624 Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 5b75a004167422f1f4a8af767bd4239e53b3348c) Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/f81232.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/usb/serial/f81232.c b/drivers/usb/serial/f81232.c index 0c7eacc630e0..11fe49543f26 100644 --- a/drivers/usb/serial/f81232.c +++ b/drivers/usb/serial/f81232.c @@ -130,9 +130,6 @@ static u8 const clock_table[] = { F81232_CLK_1_846_MHZ, F81232_CLK_14_77_MHZ, static int calc_baud_divisor(speed_t baudrate, speed_t clockrate) { - if (!baudrate) - return 0; - return DIV_ROUND_CLOSEST(clockrate, baudrate); } @@ -523,9 +520,14 @@ static void f81232_set_baudrate(struct tty_struct *tty, speed_t baud_list[] = { baudrate, old_baudrate, F81232_DEF_BAUDRATE }; for (i = 0; i < ARRAY_SIZE(baud_list); ++i) { - idx = f81232_find_clk(baud_list[i]); + baudrate = baud_list[i]; + if (baudrate == 0) { + tty_encode_baud_rate(tty, 0, 0); + return; + } + + idx = f81232_find_clk(baudrate); if (idx >= 0) { - baudrate = baud_list[i]; tty_encode_baud_rate(tty, baudrate, baudrate); break; } From 4fa772e75720213315fc125523d0afa53e910551 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 29 Nov 2022 15:18:19 +0100 Subject: [PATCH 1364/1477] UPSTREAM: USB: serial: f81534: fix division by zero on line-speed change commit 188c9c2e0c7f4ae864113f80c40bafb394062271 upstream. The driver leaves the line speed unchanged in case a requested speed is not supported. Make sure to handle the case where the current speed is B0 (hangup) without dividing by zero when determining the clock source. Fixes: 3aacac02f385 ("USB: serial: f81534: add high baud rate support") Cc: stable@vger.kernel.org # 4.16 Cc: Ji-Ze Hong (Peter Hong) Reviewed-by: Greg Kroah-Hartman Change-Id: I87d08ff845ec24adae387ce829e56ff34f85ddb6 Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman (cherry picked from commit c8bf31a00f4f921259d950f1984fbea518eba557) Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/f81534.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/usb/serial/f81534.c b/drivers/usb/serial/f81534.c index 5661fd03e545..e952be683c63 100644 --- a/drivers/usb/serial/f81534.c +++ b/drivers/usb/serial/f81534.c @@ -538,9 +538,6 @@ static int f81534_submit_writer(struct usb_serial_port *port, gfp_t mem_flags) static u32 f81534_calc_baud_divisor(u32 baudrate, u32 clockrate) { - if (!baudrate) - return 0; - /* Round to nearest divisor */ return DIV_ROUND_CLOSEST(clockrate, baudrate); } @@ -570,9 +567,14 @@ static int f81534_set_port_config(struct usb_serial_port *port, u32 baud_list[] = {baudrate, old_baudrate, F81534_DEFAULT_BAUD_RATE}; for (i = 0; i < ARRAY_SIZE(baud_list); ++i) { - idx = f81534_find_clk(baud_list[i]); + baudrate = baud_list[i]; + if (baudrate == 0) { + tty_encode_baud_rate(tty, 0, 0); + return 0; + } + + idx = f81534_find_clk(baudrate); if (idx >= 0) { - baudrate = baud_list[i]; tty_encode_baud_rate(tty, baudrate, baudrate); break; } From 001a013e8493092c47f179e7c1c7d337f6ed043c Mon Sep 17 00:00:00 2001 From: Reka Norman Date: Wed, 30 Nov 2022 11:19:40 +0200 Subject: [PATCH 1365/1477] UPSTREAM: xhci: Apply XHCI_RESET_TO_DEFAULT quirk to ADL-N commit fed70b61ef2c0aed54456db3d485b215f6cc3209 upstream. ADL-N systems have the same issue as ADL-P, where a large boot firmware delay is seen if USB ports are left in U3 at shutdown. So apply the XHCI_RESET_TO_DEFAULT quirk to ADL-N as well. This patch depends on commit 34cd2db408d5 ("xhci: Add quirk to reset host back to default state at shutdown"). The issue it fixes is a ~20s boot time delay when booting from S5. It affects ADL-N devices, and ADL-N support was added starting from v5.16. Cc: stable@vger.kernel.org Change-Id: I320c75edae9b2367fcb6b7abcaa8c70249abb228 Signed-off-by: Reka Norman Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20221130091944.2171610-3-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 9ff7aff40ef5d5d6b271bf94fb0903c5a550f557) Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 0ee11a937011..9168b492c02b 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -59,6 +59,7 @@ #define PCI_DEVICE_ID_INTEL_TIGER_LAKE_XHCI 0x9a13 #define PCI_DEVICE_ID_INTEL_MAPLE_RIDGE_XHCI 0x1138 #define PCI_DEVICE_ID_INTEL_ALDER_LAKE_PCH_XHCI 0x51ed +#define PCI_DEVICE_ID_INTEL_ALDER_LAKE_N_PCH_XHCI 0x54ed #define PCI_DEVICE_ID_AMD_PROMONTORYA_4 0x43b9 #define PCI_DEVICE_ID_AMD_PROMONTORYA_3 0x43ba @@ -242,7 +243,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) xhci->quirks |= XHCI_MISSING_CAS; if (pdev->vendor == PCI_VENDOR_ID_INTEL && - pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_PCH_XHCI) + (pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_PCH_XHCI || + pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_N_PCH_XHCI)) xhci->quirks |= XHCI_RESET_TO_DEFAULT; if (pdev->vendor == PCI_VENDOR_ID_INTEL && From 228253f43ff4a4cbaff23e12fc4f99a9f03e62cb Mon Sep 17 00:00:00 2001 From: Tony Nguyen Date: Mon, 12 Dec 2022 11:00:31 -0800 Subject: [PATCH 1366/1477] UPSTREAM: igb: Initialize mailbox message for VF reset commit de5dc44370fbd6b46bd7f1a1e00369be54a041c8 upstream. When a MAC address is not assigned to the VF, that portion of the message sent to the VF is not set. The memory, however, is allocated from the stack meaning that information may be leaked to the VM. Initialize the message buffer to 0 so that no information is passed to the VM in this case. Fixes: 6ddbc4cf1f4d ("igb: Indicate failure on vf reset for empty mac address") Reported-by: Akihiko Odaki Change-Id: I71c8a33db5873854b642e61ec35bc8ffcfcd410a Signed-off-by: Tony Nguyen Reviewed-by: Akihiko Odaki Reviewed-by: Leon Romanovsky Link: https://lore.kernel.org/r/20221212190031.3983342-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman (cherry picked from commit f2479c3daaabccbac6c343a737615d0c595c6dc4) Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/igb/igb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 327196d15a6a..f24f1a8ec2fb 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -7416,7 +7416,7 @@ static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf) { struct e1000_hw *hw = &adapter->hw; unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses; - u32 reg, msgbuf[3]; + u32 reg, msgbuf[3] = {}; u8 *addr = (u8 *)(&msgbuf[1]); /* process all the same items cleared in a function level reset */ From 29cde746b8ea7d80651fb2adaefeca0a5434b42d Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 25 Nov 2020 23:37:45 +0100 Subject: [PATCH 1367/1477] UPSTREAM: HID: ite: Add support for Acer S1002 keyboard-dock [ Upstream commit c961facb5b19634eee5bcdd91fc5bf3f1c545bc5 ] Make the hid-ite driver handle the Acer S1002 keyboard-dock, this leads to 2 improvements: 1. The non working wifi-toggle hotkey now works. 2. Toggling the touchpad on of with the hotkey will no show OSD notifications in e.g. GNOME3. The actual toggling is handled inside the keyboard, this adds support for notifying evdev listeners about this. Change-Id: Ic18321b9cec3d09102e948af48315c64c0cb784a Signed-off-by: Hans de Goede Signed-off-by: Jiri Kosina Stable-dep-of: 9ad6645a9dce ("HID: ite: Enable QUIRK_TOUCHPAD_ON_OFF_REPORT on Acer Aspire Switch V 10") Signed-off-by: Sasha Levin (cherry picked from commit a20b5eec0742396531cdcc2b6d655ed032a75d35) Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-ite.c | 13 ++++++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index c5ff3d29bd44..2084c5af3c27 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -1155,6 +1155,7 @@ #define USB_DEVICE_ID_SYNAPTICS_DELL_K12A 0x2819 #define USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012 0x2968 #define USB_DEVICE_ID_SYNAPTICS_TP_V103 0x5710 +#define USB_DEVICE_ID_SYNAPTICS_ACER_ONE_S1002 0x73f4 #define USB_DEVICE_ID_SYNAPTICS_ACER_ONE_S1003 0x73f5 #define USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5 0x81a7 diff --git a/drivers/hid/hid-ite.c b/drivers/hid/hid-ite.c index 742c052b0110..22bfbebceaf4 100644 --- a/drivers/hid/hid-ite.c +++ b/drivers/hid/hid-ite.c @@ -18,10 +18,16 @@ static __u8 *ite_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int unsigned long quirks = (unsigned long)hid_get_drvdata(hdev); if (quirks & QUIRK_TOUCHPAD_ON_OFF_REPORT) { + /* For Acer Aspire Switch 10 SW5-012 keyboard-dock */ if (*rsize == 188 && rdesc[162] == 0x81 && rdesc[163] == 0x02) { - hid_info(hdev, "Fixing up ITE keyboard report descriptor\n"); + hid_info(hdev, "Fixing up Acer Sw5-012 ITE keyboard report descriptor\n"); rdesc[163] = HID_MAIN_ITEM_RELATIVE; } + /* For Acer One S1002 keyboard-dock */ + if (*rsize == 188 && rdesc[185] == 0x81 && rdesc[186] == 0x02) { + hid_info(hdev, "Fixing up Acer S1002 ITE keyboard report descriptor\n"); + rdesc[186] = HID_MAIN_ITEM_RELATIVE; + } } return rdesc; @@ -101,6 +107,11 @@ static const struct hid_device_id ite_devices[] = { USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012), .driver_data = QUIRK_TOUCHPAD_ON_OFF_REPORT }, /* ITE8910 USB kbd ctlr, with Synaptics touchpad connected to it. */ + { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, + USB_VENDOR_ID_SYNAPTICS, + USB_DEVICE_ID_SYNAPTICS_ACER_ONE_S1002), + .driver_data = QUIRK_TOUCHPAD_ON_OFF_REPORT }, + /* ITE8910 USB kbd ctlr, with Synaptics touchpad connected to it. */ { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, USB_VENDOR_ID_SYNAPTICS, USB_DEVICE_ID_SYNAPTICS_ACER_ONE_S1003) }, From 784df646aa711ba6a78a85c69864aa09bb810a8e Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 6 Feb 2021 21:53:27 +0100 Subject: [PATCH 1368/1477] UPSTREAM: HID: ite: Enable QUIRK_TOUCHPAD_ON_OFF_REPORT on Acer Aspire Switch 10E [ Upstream commit b7c20f3815985570ac71c39b1a3e68c201109578 ] The Acer Aspire Switch 10E (SW3-016)'s keyboard-dock uses the same USB-ids as the Acer One S1003 keyboard-dock. Yet they are not entirely the same: 1. The S1003 keyboard-dock has the same report descriptors as the S1002 keyboard-dock (which has different USB-ids) 2. The Acer Aspire Switch 10E's keyboard-dock has different report descriptors from the S1002/S1003 keyboard docks and it sends 0x00880078 / 0x00880079 usage events when the touchpad is toggled on/off (which is handled internally). This means that all Acer kbd-docks handled by the hid-ite.c drivers report their touchpad being toggled on/off through these custom usage-codes with the exception of the S1003 dock, which likely is a bug of that dock. Add a QUIRK_TOUCHPAD_ON_OFF_REPORT quirk for the Aspire Switch 10E / S1003 usb-id so that the touchpad toggling will get reported to userspace on the Aspire Switch 10E. Since the Aspire Switch 10E's kbd-dock has different report-descriptors, this also requires adding support for fixing those to ite_report_fixup(). Setting the quirk will also cause ite_report_fixup() to hit the S1002/S1003 descriptors path on the S1003. Since the S1003 kbd-dock never generates any input-reports for the fixed up part of the descriptors this does not matter; and if there are versions out there which do actually send input-reports for the touchpad-toggle then the fixup should actually help to make things work. This was tested on both an Acer Aspire Switch 10E and on an Acer One S1003. Change-Id: Ic30f2f4d30225aac0e32d12d9a98091c3dfd8efe Signed-off-by: Hans de Goede Signed-off-by: Jiri Kosina Stable-dep-of: 9ad6645a9dce ("HID: ite: Enable QUIRK_TOUCHPAD_ON_OFF_REPORT on Acer Aspire Switch V 10") Signed-off-by: Sasha Levin (cherry picked from commit 263a1782a618c1206e11932d4236dc3f30f856f2) Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-ite.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/hid/hid-ite.c b/drivers/hid/hid-ite.c index 22bfbebceaf4..14fc068affad 100644 --- a/drivers/hid/hid-ite.c +++ b/drivers/hid/hid-ite.c @@ -23,11 +23,16 @@ static __u8 *ite_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int hid_info(hdev, "Fixing up Acer Sw5-012 ITE keyboard report descriptor\n"); rdesc[163] = HID_MAIN_ITEM_RELATIVE; } - /* For Acer One S1002 keyboard-dock */ + /* For Acer One S1002/S1003 keyboard-dock */ if (*rsize == 188 && rdesc[185] == 0x81 && rdesc[186] == 0x02) { - hid_info(hdev, "Fixing up Acer S1002 ITE keyboard report descriptor\n"); + hid_info(hdev, "Fixing up Acer S1002/S1003 ITE keyboard report descriptor\n"); rdesc[186] = HID_MAIN_ITEM_RELATIVE; } + /* For Acer Aspire Switch 10E (SW3-016) keyboard-dock */ + if (*rsize == 210 && rdesc[184] == 0x81 && rdesc[185] == 0x02) { + hid_info(hdev, "Fixing up Acer Aspire Switch 10E (SW3-016) ITE keyboard report descriptor\n"); + rdesc[185] = HID_MAIN_ITEM_RELATIVE; + } } return rdesc; @@ -114,7 +119,8 @@ static const struct hid_device_id ite_devices[] = { /* ITE8910 USB kbd ctlr, with Synaptics touchpad connected to it. */ { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, USB_VENDOR_ID_SYNAPTICS, - USB_DEVICE_ID_SYNAPTICS_ACER_ONE_S1003) }, + USB_DEVICE_ID_SYNAPTICS_ACER_ONE_S1003), + .driver_data = QUIRK_TOUCHPAD_ON_OFF_REPORT }, { } }; MODULE_DEVICE_TABLE(hid, ite_devices); From fa335f5bb93ec5db6f0bcbce2168dfce203c2d3f Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 8 Nov 2022 16:13:50 +0100 Subject: [PATCH 1369/1477] UPSTREAM: HID: ite: Enable QUIRK_TOUCHPAD_ON_OFF_REPORT on Acer Aspire Switch V 10 [ Upstream commit 9ad6645a9dce4d0e42daca6ebf32a154401c59d3 ] The Acer Aspire Switch V 10 (SW5-017)'s keyboard-dock uses the same ITE controller setup as other Acer Switch 2-in-1's. This needs special handling for the wifi on/off toggle hotkey as well as to properly report touchpad on/off keypresses. Add the USB-ids for the SW5-017's keyboard-dock with a quirk setting of QUIRK_TOUCHPAD_ON_OFF_REPORT to fix both issues. Cc: Rudolf Polzer Change-Id: I2e6a1cc178cfd1a23a856d3c630addff559532a0 Signed-off-by: Hans de Goede Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin (cherry picked from commit 1d5db0c322ae885bd18b73802ce34750c633adcf) Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-ite.c | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 2084c5af3c27..2b7f6ea26761 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -1157,6 +1157,7 @@ #define USB_DEVICE_ID_SYNAPTICS_TP_V103 0x5710 #define USB_DEVICE_ID_SYNAPTICS_ACER_ONE_S1002 0x73f4 #define USB_DEVICE_ID_SYNAPTICS_ACER_ONE_S1003 0x73f5 +#define USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_017 0x73f6 #define USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5 0x81a7 #define USB_VENDOR_ID_TEXAS_INSTRUMENTS 0x2047 diff --git a/drivers/hid/hid-ite.c b/drivers/hid/hid-ite.c index 14fc068affad..b8cce9c196d8 100644 --- a/drivers/hid/hid-ite.c +++ b/drivers/hid/hid-ite.c @@ -121,6 +121,11 @@ static const struct hid_device_id ite_devices[] = { USB_VENDOR_ID_SYNAPTICS, USB_DEVICE_ID_SYNAPTICS_ACER_ONE_S1003), .driver_data = QUIRK_TOUCHPAD_ON_OFF_REPORT }, + /* ITE8910 USB kbd ctlr, with Synaptics touchpad connected to it. */ + { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, + USB_VENDOR_ID_SYNAPTICS, + USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_017), + .driver_data = QUIRK_TOUCHPAD_ON_OFF_REPORT }, { } }; MODULE_DEVICE_TABLE(hid, ite_devices); From 5278199031faf0e88e3def4a57cfed0ffccf430d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Exp=C3=B3sito?= Date: Thu, 10 Nov 2022 18:40:56 +0100 Subject: [PATCH 1370/1477] UPSTREAM: HID: uclogic: Add HID_QUIRK_HIDINPUT_FORCE quirk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3405a4beaaa852f3ed2a5eb3b5149932d5c3779b ] Commit f7d8e387d9ae ("HID: uclogic: Switch to Digitizer usage for styluses") changed the usage used in UCLogic from "Pen" to "Digitizer". However, the IS_INPUT_APPLICATION() macro evaluates to false for HID_DG_DIGITIZER causing issues with the XP-Pen Star G640 tablet. Add the HID_QUIRK_HIDINPUT_FORCE quirk to bypass the IS_INPUT_APPLICATION() check. Reported-by: Torge Matthies Reported-by: Alexander Zhang Tested-by: Alexander Zhang Change-Id: Ie08835706d75b43c9d8b04b81c501f960c60c380 Signed-off-by: José Expósito Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin (cherry picked from commit 7c3a523c9b19ba94c3cade5b2d4612bedcf22e21) Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-uclogic-core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hid/hid-uclogic-core.c b/drivers/hid/hid-uclogic-core.c index 4edb24195704..e4811d37ca77 100644 --- a/drivers/hid/hid-uclogic-core.c +++ b/drivers/hid/hid-uclogic-core.c @@ -172,6 +172,7 @@ static int uclogic_probe(struct hid_device *hdev, * than the pen, so use QUIRK_MULTI_INPUT for all tablets. */ hdev->quirks |= HID_QUIRK_MULTI_INPUT; + hdev->quirks |= HID_QUIRK_HIDINPUT_FORCE; /* Allocate and assign driver data */ drvdata = devm_kzalloc(&hdev->dev, sizeof(*drvdata), GFP_KERNEL); From 4373e5def3ce02f3954c7b249a1d6e19dda35b26 Mon Sep 17 00:00:00 2001 From: Sungwoo Kim Date: Fri, 18 Nov 2022 15:01:47 -0500 Subject: [PATCH 1371/1477] UPSTREAM: Bluetooth: L2CAP: Fix u8 overflow [ Upstream commit bcd70260ef56e0aee8a4fc6cd214a419900b0765 ] By keep sending L2CAP_CONF_REQ packets, chan->num_conf_rsp increases multiple times and eventually it will wrap around the maximum number (i.e., 255). This patch prevents this by adding a boundary check with L2CAP_MAX_CONF_RSP Btmon log: Bluetooth monitor ver 5.64 = Note: Linux version 6.1.0-rc2 (x86_64) 0.264594 = Note: Bluetooth subsystem version 2.22 0.264636 @ MGMT Open: btmon (privileged) version 1.22 {0x0001} 0.272191 = New Index: 00:00:00:00:00:00 (Primary,Virtual,hci0) [hci0] 13.877604 @ RAW Open: 9496 (privileged) version 2.22 {0x0002} 13.890741 = Open Index: 00:00:00:00:00:00 [hci0] 13.900426 (...) > ACL Data RX: Handle 200 flags 0x00 dlen 1033 #32 [hci0] 14.273106 invalid packet size (12 != 1033) 08 00 01 00 02 01 04 00 01 10 ff ff ............ > ACL Data RX: Handle 200 flags 0x00 dlen 1547 #33 [hci0] 14.273561 invalid packet size (14 != 1547) 0a 00 01 00 04 01 06 00 40 00 00 00 00 00 ........@..... > ACL Data RX: Handle 200 flags 0x00 dlen 2061 #34 [hci0] 14.274390 invalid packet size (16 != 2061) 0c 00 01 00 04 01 08 00 40 00 00 00 00 00 00 04 ........@....... > ACL Data RX: Handle 200 flags 0x00 dlen 2061 #35 [hci0] 14.274932 invalid packet size (16 != 2061) 0c 00 01 00 04 01 08 00 40 00 00 00 07 00 03 00 ........@....... = bluetoothd: Bluetooth daemon 5.43 14.401828 > ACL Data RX: Handle 200 flags 0x00 dlen 1033 #36 [hci0] 14.275753 invalid packet size (12 != 1033) 08 00 01 00 04 01 04 00 40 00 00 00 ........@... Change-Id: Id3b87180787bdded28e876ed0500ca6156579fb8 Signed-off-by: Sungwoo Kim Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin (cherry picked from commit f3fe6817156a2ad4b06f01afab04638a34d7c9a6) Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/l2cap_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index e531797b6523..e851f8047f74 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -4449,7 +4449,8 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, chan->ident = cmd->ident; l2cap_send_cmd(conn, cmd->ident, L2CAP_CONF_RSP, len, rsp); - chan->num_conf_rsp++; + if (chan->num_conf_rsp < L2CAP_CONF_MAX_CONF_RSP) + chan->num_conf_rsp++; /* Reset config buffer. */ chan->conf_len = 0; From 8e993eabeb64f19dec03b32e34677a69d48bf1e3 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 23 Nov 2022 15:18:28 +0100 Subject: [PATCH 1372/1477] UPSTREAM: net: loopback: use NET_NAME_PREDICTABLE for name_assign_type [ Upstream commit 31d929de5a112ee1b977a89c57de74710894bbbf ] When the name_assign_type attribute was introduced (commit 685343fc3ba6, "net: add name_assign_type netdev attribute"), the loopback device was explicitly mentioned as one which would make use of NET_NAME_PREDICTABLE: The name_assign_type attribute gives hints where the interface name of a given net-device comes from. These values are currently defined: ... NET_NAME_PREDICTABLE: The ifname has been assigned by the kernel in a predictable way that is guaranteed to avoid reuse and always be the same for a given device. Examples include statically created devices like the loopback device [...] Switch to that so that reading /sys/class/net/lo/name_assign_type produces something sensible instead of returning -EINVAL. Change-Id: I7add5ddb0dccfae0571609dd7cb6358c2b59d2a5 Signed-off-by: Rasmus Villemoes Reviewed-by: Jacob Keller Signed-off-by: David S. Miller Signed-off-by: Sasha Levin (cherry picked from commit eec1c3ade48b03d381b9e4a787970d8ddaf1c0b6) Signed-off-by: Greg Kroah-Hartman --- drivers/net/loopback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index a1c77cc00416..498e5c8013ef 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -208,7 +208,7 @@ static __net_init int loopback_net_init(struct net *net) int err; err = -ENOMEM; - dev = alloc_netdev(0, "lo", NET_NAME_UNKNOWN, loopback_setup); + dev = alloc_netdev(0, "lo", NET_NAME_PREDICTABLE, loopback_setup); if (!dev) goto out; From 463a74a83b8403121431f635a397ef5ef9f5c787 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 4 Mar 2021 12:21:05 -0700 Subject: [PATCH 1373/1477] UPSTREAM: kernel: provide create_io_thread() helper [ Upstream commit cc440e8738e5c875297ac0e90316745093be7e28 ] Provide a generic helper for setting up an io_uring worker. Returns a task_struct so that the caller can do whatever setup is needed, then call wake_up_new_task() to kick it into gear. Add a kernel_clone_args member, io_thread, which tells copy_process() to mark the task with PF_IO_WORKER. Change-Id: I670f155fc4ac1b93824391292f4822e32671215b Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 1500fed00878fd59b2d6a1832b8d3f7c261a5671) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- include/linux/sched/task.h | 2 ++ kernel/fork.c | 30 ++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index df9505faed27..5629761d9790 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -31,6 +31,7 @@ struct kernel_clone_args { /* Number of elements in *set_tid */ size_t set_tid_size; int cgroup; + int io_thread; struct cgroup *cgrp; struct css_set *cset; }; @@ -85,6 +86,7 @@ extern void exit_files(struct task_struct *); extern void exit_itimers(struct task_struct *); extern pid_t kernel_clone(struct kernel_clone_args *kargs); +struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node); struct task_struct *fork_idle(int); struct mm_struct *copy_init_mm(void); extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); diff --git a/kernel/fork.c b/kernel/fork.c index d515aa5b7eb5..563798b75acd 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1995,6 +1995,8 @@ static __latent_entropy struct task_struct *copy_process( p = dup_task_struct(current, node); if (!p) goto fork_out; + if (args->io_thread) + p->flags |= PF_IO_WORKER; cpufreq_task_times_init(p); @@ -2474,6 +2476,34 @@ struct mm_struct *copy_init_mm(void) return dup_mm(NULL, &init_mm); } +/* + * This is like kernel_clone(), but shaved down and tailored to just + * creating io_uring workers. It returns a created task, or an error pointer. + * The returned task is inactive, and the caller must fire it up through + * wake_up_new_task(p). All signals are blocked in the created task. + */ +struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node) +{ + unsigned long flags = CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD| + CLONE_IO; + struct kernel_clone_args args = { + .flags = ((lower_32_bits(flags) | CLONE_VM | + CLONE_UNTRACED) & ~CSIGNAL), + .exit_signal = (lower_32_bits(flags) & CSIGNAL), + .stack = (unsigned long)fn, + .stack_size = (unsigned long)arg, + .io_thread = 1, + }; + struct task_struct *tsk; + + tsk = copy_process(NULL, 0, node, &args); + if (!IS_ERR(tsk)) { + sigfillset(&tsk->blocked); + sigdelsetmask(&tsk->blocked, sigmask(SIGKILL)); + } + return tsk; +} + /* * Ok, this is the main fork-routine. * From ee44bd07c45dce0b7233d74daf86c2ebd0bbc0fa Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 10 Sep 2021 11:18:36 -0600 Subject: [PATCH 1374/1477] UPSTREAM: iov_iter: add helper to save iov_iter state [ Upstream commit 8fb0f47a9d7acf620d0fd97831b69da9bc5e22ed ] In an ideal world, when someone is passed an iov_iter and returns X bytes, then X bytes would have been consumed/advanced from the iov_iter. But we have use cases that always consume the entire iterator, a few examples of that are iomap and bdev O_DIRECT. This means we cannot rely on the state of the iov_iter once we've called ->read_iter() or ->write_iter(). This would be easier if we didn't always have to deal with truncate of the iov_iter, as rewinding would be trivial without that. We recently added a commit to track the truncate state, but that grew the iov_iter by 8 bytes and wasn't the best solution. Implement a helper to save enough of the iov_iter state to sanely restore it after we've called the read/write iterator helpers. This currently only works for IOVEC/BVEC/KVEC as that's all we need, support for other iterator types are left as an exercise for the reader. Link: https://lore.kernel.org/linux-fsdevel/CAHk-=wiacKV4Gh-MYjteU0LwNBSGpWrK-Ov25HdqB1ewinrFPg@mail.gmail.com/ Bug: 268174392 Change-Id: Iab4de49932dea2823db03bcef673f726bcef4a9f Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit e86db87191d8f91dfe787758c6dd646c2bf0c335) Signed-off-by: Greg Kroah-Hartman --- include/linux/uio.h | 15 +++++++++++++ lib/iov_iter.c | 52 ++++++++++++++++++++++++++++----------------- 2 files changed, 48 insertions(+), 19 deletions(-) diff --git a/include/linux/uio.h b/include/linux/uio.h index 27ff8eb786dc..cedb68e49e4f 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -26,6 +26,12 @@ enum iter_type { ITER_DISCARD = 64, }; +struct iov_iter_state { + size_t iov_offset; + size_t count; + unsigned long nr_segs; +}; + struct iov_iter { /* * Bit 0 is the read/write bit, set if we're writing. @@ -55,6 +61,14 @@ static inline enum iter_type iov_iter_type(const struct iov_iter *i) return i->type & ~(READ | WRITE); } +static inline void iov_iter_save_state(struct iov_iter *iter, + struct iov_iter_state *state) +{ + state->iov_offset = iter->iov_offset; + state->count = iter->count; + state->nr_segs = iter->nr_segs; +} + static inline bool iter_is_iovec(const struct iov_iter *i) { return iov_iter_type(i) == ITER_IOVEC; @@ -226,6 +240,7 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages, ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, size_t maxsize, size_t *start); int iov_iter_npages(const struct iov_iter *i, int maxpages); +void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state); const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags); diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 1b0a349fbcd9..650554964f18 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1836,24 +1836,38 @@ int import_single_range(int rw, void __user *buf, size_t len, } EXPORT_SYMBOL(import_single_range); -int iov_iter_for_each_range(struct iov_iter *i, size_t bytes, - int (*f)(struct kvec *vec, void *context), - void *context) +/** + * iov_iter_restore() - Restore a &struct iov_iter to the same state as when + * iov_iter_save_state() was called. + * + * @i: &struct iov_iter to restore + * @state: state to restore from + * + * Used after iov_iter_save_state() to bring restore @i, if operations may + * have advanced it. + * + * Note: only works on ITER_IOVEC, ITER_BVEC, and ITER_KVEC + */ +void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state) { - struct kvec w; - int err = -EINVAL; - if (!bytes) - return 0; - - iterate_all_kinds(i, bytes, v, -EINVAL, ({ - w.iov_base = kmap(v.bv_page) + v.bv_offset; - w.iov_len = v.bv_len; - err = f(&w, context); - kunmap(v.bv_page); - err;}), ({ - w = v; - err = f(&w, context);}) - ) - return err; + if (WARN_ON_ONCE(!iov_iter_is_bvec(i) && !iter_is_iovec(i)) && + !iov_iter_is_kvec(i)) + return; + i->iov_offset = state->iov_offset; + i->count = state->count; + /* + * For the *vec iters, nr_segs + iov is constant - if we increment + * the vec, then we also decrement the nr_segs count. Hence we don't + * need to track both of these, just one is enough and we can deduct + * the other from that. ITER_KVEC and ITER_IOVEC are the same struct + * size, so we can just increment the iov pointer as they are unionzed. + * ITER_BVEC _may_ be the same size on some archs, but on others it is + * not. Be safe and handle it separately. + */ + BUILD_BUG_ON(sizeof(struct iovec) != sizeof(struct kvec)); + if (iov_iter_is_bvec(i)) + i->bvec -= state->nr_segs - i->nr_segs; + else + i->iov -= state->nr_segs - i->nr_segs; + i->nr_segs = state->nr_segs; } -EXPORT_SYMBOL(iov_iter_for_each_range); From 72d2f4c1cdabb9715db99efec572882d5c72ecfa Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 4 Jan 2021 00:08:41 -0500 Subject: [PATCH 1375/1477] UPSTREAM: saner calling conventions for unlazy_child() [ Upstream commit ae66db45fd309fd1c6d4e846dfc8414dfec7d6ad ] same as for the previous commit - instead of 0/-ECHILD make it return true/false, rename to try_to_unlazy_child(). Change-Id: Ie949437504bd8db7f22f78bfbe5d5141e9959cf8 Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 36ec31201a3da85a112dd368be04aee05c713459) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- fs/namei.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index ec9cec03e0f8..a7add241875e 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -708,19 +708,19 @@ out: } /** - * unlazy_child - try to switch to ref-walk mode. + * try_to_unlazy_next - try to switch to ref-walk mode. * @nd: nameidata pathwalk data - * @dentry: child of nd->path.dentry - * @seq: seq number to check dentry against - * Returns: 0 on success, -ECHILD on failure + * @dentry: next dentry to step into + * @seq: seq number to check @dentry against + * Returns: true on success, false on failure * - * unlazy_child attempts to legitimize the current nd->path, nd->root and dentry - * for ref-walk mode. @dentry must be a path found by a do_lookup call on - * @nd. Must be called from rcu-walk context. - * Nothing should touch nameidata between unlazy_child() failure and + * Similar to to try_to_unlazy(), but here we have the next dentry already + * picked by rcu-walk and want to legitimize that in addition to the current + * nd->path and nd->root for ref-walk mode. Must be called from rcu-walk context. + * Nothing should touch nameidata between try_to_unlazy_next() failure and * terminate_walk(). */ -static int unlazy_child(struct nameidata *nd, struct dentry *dentry, unsigned seq) +static bool try_to_unlazy_next(struct nameidata *nd, struct dentry *dentry, unsigned seq) { BUG_ON(!(nd->flags & LOOKUP_RCU)); @@ -750,7 +750,7 @@ static int unlazy_child(struct nameidata *nd, struct dentry *dentry, unsigned se if (unlikely(!legitimize_root(nd))) goto out_dput; rcu_read_unlock(); - return 0; + return true; out2: nd->path.mnt = NULL; @@ -758,11 +758,11 @@ out1: nd->path.dentry = NULL; out: rcu_read_unlock(); - return -ECHILD; + return false; out_dput: rcu_read_unlock(); dput(dentry); - return -ECHILD; + return false; } static inline int d_revalidate(struct dentry *dentry, unsigned int flags) @@ -1458,7 +1458,7 @@ static inline int handle_mounts(struct nameidata *nd, struct dentry *dentry, return -ENOENT; if (likely(__follow_mount_rcu(nd, path, inode, seqp))) return 0; - if (unlazy_child(nd, dentry, seq)) + if (!try_to_unlazy_next(nd, dentry, seq)) return -ECHILD; // *path might've been clobbered by __follow_mount_rcu() path->mnt = nd->path.mnt; @@ -1579,7 +1579,7 @@ static struct dentry *lookup_fast(struct nameidata *nd, status = d_revalidate(dentry, nd->flags); if (likely(status > 0)) return dentry; - if (unlazy_child(nd, dentry, seq)) + if (!try_to_unlazy_next(nd, dentry, seq)) return ERR_PTR(-ECHILD); if (unlikely(status == -ECHILD)) /* we'd been told to redo it in non-rcu mode */ From 7928a1689bf08846afc79132cfffce2b8cf92c22 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 17 Dec 2020 09:19:09 -0700 Subject: [PATCH 1376/1477] UPSTREAM: fs: add support for LOOKUP_CACHED [ Upstream commit 6c6ec2b0a3e0381d886d531bd1471dfdb1509237 ] io_uring always punts opens to async context, since there's no control over whether the lookup blocks or not. Add LOOKUP_CACHED to support just doing the fast RCU based lookups, which we know will not block. If we can do a cached path resolution of the filename, then we don't have to always punt lookups for a worker. During path resolution, we always do LOOKUP_RCU first. If that fails and we terminate LOOKUP_RCU, then fail a LOOKUP_CACHED attempt as well. Cc: Al Viro Change-Id: If3c62e8681cd47bfafaa5a4de05a7e0418c1c718 Signed-off-by: Jens Axboe Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman (cherry picked from commit c1fe7bd3e1aa85865396b464b31f28b094a4353c) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- fs/namei.c | 9 +++++++++ include/linux/namei.h | 1 + 2 files changed, 10 insertions(+) diff --git a/fs/namei.c b/fs/namei.c index a7add241875e..414def462b99 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -689,6 +689,8 @@ static bool try_to_unlazy(struct nameidata *nd) BUG_ON(!(nd->flags & LOOKUP_RCU)); nd->flags &= ~LOOKUP_RCU; + if (nd->flags & LOOKUP_CACHED) + goto out1; if (unlikely(!legitimize_links(nd))) goto out1; if (unlikely(!legitimize_path(nd, &nd->path, nd->seq))) @@ -725,6 +727,8 @@ static bool try_to_unlazy_next(struct nameidata *nd, struct dentry *dentry, unsi BUG_ON(!(nd->flags & LOOKUP_RCU)); nd->flags &= ~LOOKUP_RCU; + if (nd->flags & LOOKUP_CACHED) + goto out2; if (unlikely(!legitimize_links(nd))) goto out2; if (unlikely(!legitimize_mnt(nd->path.mnt, nd->m_seq))) @@ -870,6 +874,7 @@ static int complete_walk(struct nameidata *nd) */ if (!(nd->flags & (LOOKUP_ROOT | LOOKUP_IS_SCOPED))) nd->root.mnt = NULL; + nd->flags &= ~LOOKUP_CACHED; if (!try_to_unlazy(nd)) return -ECHILD; } @@ -2288,6 +2293,10 @@ static const char *path_init(struct nameidata *nd, unsigned flags) int error; const char *s = nd->name->name; + /* LOOKUP_CACHED requires RCU, ask caller to retry */ + if ((flags & (LOOKUP_RCU | LOOKUP_CACHED)) == LOOKUP_CACHED) + return ERR_PTR(-EAGAIN); + if (!*s) flags &= ~LOOKUP_RCU; if (flags & LOOKUP_RCU) diff --git a/include/linux/namei.h b/include/linux/namei.h index a4bb992623c4..b9605b2b46e7 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -46,6 +46,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT}; #define LOOKUP_NO_XDEV 0x040000 /* No mountpoint crossing. */ #define LOOKUP_BENEATH 0x080000 /* No escaping from starting point. */ #define LOOKUP_IN_ROOT 0x100000 /* Treat dirfd as fs root. */ +#define LOOKUP_CACHED 0x200000 /* Only do cached lookup */ /* LOOKUP_* flags which do scope-related checks based on the dirfd. */ #define LOOKUP_IS_SCOPED (LOOKUP_BENEATH | LOOKUP_IN_ROOT) From eaf736aa7114d64e444cd381a4beb9abe2e05661 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 15 Feb 2021 12:03:23 -0500 Subject: [PATCH 1377/1477] UPSTREAM: fix handling of nd->depth on LOOKUP_CACHED failures in try_to_unlazy* [ Upstream commit eacd9aa8cedeb412842c7b339adbaa0477fdd5ad ] After switching to non-RCU mode, we want nd->depth to match the number of entries in nd->stack[] that need eventual path_put(). legitimize_links() takes care of that on failures; unfortunately, failure exits added for LOOKUP_CACHED do not. We could add the logics for that into those failure exits, both in try_to_unlazy() and in try_to_unlazy_next(), but since both checks are immediately followed by legitimize_links() and there's no calls of legitimize_links() other than those two... It's easier to move the check (and required handling of nd->depth on failure) into legitimize_links() itself. [caught by Jens: ... and since we are zeroing ->depth here, we need to do drop_links() first] Fixes: 6c6ec2b0a3e0 "fs: add support for LOOKUP_CACHED" Tested-by: Jens Axboe Change-Id: I6cf685bfce81acb4d68c3991b2a936968a39c739 Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 146fe79fff13fea7b5f3a9e913689e07fd4e6432) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- fs/namei.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 414def462b99..d1e95cd6b28b 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -633,6 +633,11 @@ static inline bool legitimize_path(struct nameidata *nd, static bool legitimize_links(struct nameidata *nd) { int i; + if (unlikely(nd->flags & LOOKUP_CACHED)) { + drop_links(nd); + nd->depth = 0; + return false; + } for (i = 0; i < nd->depth; i++) { struct saved *last = nd->stack + i; if (unlikely(!legitimize_path(nd, &last->link, last->seq))) { @@ -689,8 +694,6 @@ static bool try_to_unlazy(struct nameidata *nd) BUG_ON(!(nd->flags & LOOKUP_RCU)); nd->flags &= ~LOOKUP_RCU; - if (nd->flags & LOOKUP_CACHED) - goto out1; if (unlikely(!legitimize_links(nd))) goto out1; if (unlikely(!legitimize_path(nd, &nd->path, nd->seq))) @@ -727,8 +730,6 @@ static bool try_to_unlazy_next(struct nameidata *nd, struct dentry *dentry, unsi BUG_ON(!(nd->flags & LOOKUP_RCU)); nd->flags &= ~LOOKUP_RCU; - if (nd->flags & LOOKUP_CACHED) - goto out2; if (unlikely(!legitimize_links(nd))) goto out2; if (unlikely(!legitimize_mnt(nd->path.mnt, nd->m_seq))) From 6b92128557b70edd54bbd93f12f524b8802dfeb9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 6 Apr 2021 12:33:07 -0400 Subject: [PATCH 1378/1477] UPSTREAM: Make sure nd->path.mnt and nd->path.dentry are always valid pointers [ Upstream commit 7d01ef7585c07afaf487759a48486228cd065726 ] Initialize them in set_nameidata() and make sure that terminate_walk() clears them once the pointers become potentially invalid (i.e. we leave RCU mode or drop them in non-RCU one). Currently we have "path_init() always initializes them and nobody accesses them outside of path_init()/terminate_walk() segments", which is asking for trouble. With that change we would have nd->path.{mnt,dentry} 1) always valid - NULL or pointing to currently allocated objects. 2) non-NULL while we are successfully walking 3) NULL when we are not walking at all 4) contributing to refcounts whenever non-NULL outside of RCU mode. Fixes: 6c6ec2b0a3e0 ("fs: add support for LOOKUP_CACHED") Reported-by: syzbot+c88a7030da47945a3cc3@syzkaller.appspotmail.com Tested-by: Christian Brauner Change-Id: I0532db6ea79fb760d50a88f75e2bb0691c24e93c Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 0cf0ce8fb5b10d669072345ea855de112d0e0a43) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- fs/namei.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index d1e95cd6b28b..3c4bdd332ff6 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -532,6 +532,8 @@ static void set_nameidata(struct nameidata *p, int dfd, struct filename *name) p->stack = p->internal; p->dfd = dfd; p->name = name; + p->path.mnt = NULL; + p->path.dentry = NULL; p->total_link_count = old ? old->total_link_count : 0; p->saved = old; current->nameidata = p; @@ -605,6 +607,8 @@ static void terminate_walk(struct nameidata *nd) rcu_read_unlock(); } nd->depth = 0; + nd->path.mnt = NULL; + nd->path.dentry = NULL; } /* path_put is needed afterwards regardless of success or failure */ @@ -2327,8 +2331,6 @@ static const char *path_init(struct nameidata *nd, unsigned flags) } nd->root.mnt = NULL; - nd->path.mnt = NULL; - nd->path.dentry = NULL; /* Absolute pathname -- fetch the root (LOOKUP_IN_ROOT uses nd->dfd). */ if (*s == '/' && !(flags & LOOKUP_IN_ROOT)) { From 2507b99d9a76940cab259a32048870ea2c37b58e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 17 Dec 2020 09:19:10 -0700 Subject: [PATCH 1379/1477] UPSTREAM: fs: expose LOOKUP_CACHED through openat2() RESOLVE_CACHED [ Upstream commit 99668f618062816ca7ba639b007eb145b9d3d41e ] Now that we support non-blocking path resolution internally, expose it via openat2() in the struct open_how ->resolve flags. This allows applications using openat2() to limit path resolution to the extent that it is already cached. If the lookup cannot be satisfied in a non-blocking manner, openat2(2) will return -1/-EAGAIN. Cc: Al Viro Change-Id: Iddb58268e0a2b8adfc54e56192da43dda1868d8c Signed-off-by: Jens Axboe Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 5683caa7350f389d099b72bfdb289d2073286e32) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- fs/open.c | 6 ++++++ include/linux/fcntl.h | 2 +- include/uapi/linux/openat2.h | 4 ++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/open.c b/fs/open.c index b9d55ca3763b..93e85572b165 100644 --- a/fs/open.c +++ b/fs/open.c @@ -1110,6 +1110,12 @@ inline int build_open_flags(const struct open_how *how, struct open_flags *op) lookup_flags |= LOOKUP_BENEATH; if (how->resolve & RESOLVE_IN_ROOT) lookup_flags |= LOOKUP_IN_ROOT; + if (how->resolve & RESOLVE_CACHED) { + /* Don't bother even trying for create/truncate/tmpfile open */ + if (flags & (O_TRUNC | O_CREAT | O_TMPFILE)) + return -EAGAIN; + lookup_flags |= LOOKUP_CACHED; + } op->lookup_flags = lookup_flags; return 0; diff --git a/include/linux/fcntl.h b/include/linux/fcntl.h index 921e750843e6..766fcd973beb 100644 --- a/include/linux/fcntl.h +++ b/include/linux/fcntl.h @@ -19,7 +19,7 @@ /* List of all valid flags for the how->resolve argument: */ #define VALID_RESOLVE_FLAGS \ (RESOLVE_NO_XDEV | RESOLVE_NO_MAGICLINKS | RESOLVE_NO_SYMLINKS | \ - RESOLVE_BENEATH | RESOLVE_IN_ROOT) + RESOLVE_BENEATH | RESOLVE_IN_ROOT | RESOLVE_CACHED) /* List of all open_how "versions". */ #define OPEN_HOW_SIZE_VER0 24 /* sizeof first published struct */ diff --git a/include/uapi/linux/openat2.h b/include/uapi/linux/openat2.h index 58b1eb711360..a5feb7604948 100644 --- a/include/uapi/linux/openat2.h +++ b/include/uapi/linux/openat2.h @@ -35,5 +35,9 @@ struct open_how { #define RESOLVE_IN_ROOT 0x10 /* Make all jumps to "/" and ".." be scoped inside the dirfd (similar to chroot(2)). */ +#define RESOLVE_CACHED 0x20 /* Only complete if resolution can be + completed through cached lookup. May + return -EAGAIN if that's not + possible. */ #endif /* _UAPI_LINUX_OPENAT2_H */ From 9505ff1a81c5fb75648c7cfae648f8755685ff89 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 23 Feb 2021 09:44:37 -0300 Subject: [PATCH 1380/1477] UPSTREAM: tools headers UAPI: Sync openat2.h with the kernel sources [ Upstream commit 1e61463cfcd0b3e7a19ba36b8a98c64ebaac5c6e ] To pick the changes in: 99668f618062816c ("fs: expose LOOKUP_CACHED through openat2() RESOLVE_CACHED") That don't result in any change in tooling, only silences this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/openat2.h' differs from latest version at 'include/uapi/linux/openat2.h' diff -u tools/include/uapi/linux/openat2.h include/uapi/linux/openat2.h Cc: Al Viro Cc: Jens Axboe Change-Id: I990de5703d50cb6aeceea1bcd7bf631b1f9c4484 Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 0b8cd5d814cb53d4109d2ba5e73dab38bf49a3b7) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- tools/include/uapi/linux/openat2.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/include/uapi/linux/openat2.h b/tools/include/uapi/linux/openat2.h index 58b1eb711360..a5feb7604948 100644 --- a/tools/include/uapi/linux/openat2.h +++ b/tools/include/uapi/linux/openat2.h @@ -35,5 +35,9 @@ struct open_how { #define RESOLVE_IN_ROOT 0x10 /* Make all jumps to "/" and ".." be scoped inside the dirfd (similar to chroot(2)). */ +#define RESOLVE_CACHED 0x20 /* Only complete if resolution can be + completed through cached lookup. May + return -EAGAIN if that's not + possible. */ #endif /* _UAPI_LINUX_OPENAT2_H */ From af091af9dbe3f8da14e44f80c4a22f4a8454a7e4 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 5 Sep 2020 11:13:35 -0600 Subject: [PATCH 1381/1477] UPSTREAM: net: provide __sys_shutdown_sock() that takes a socket [ Upstream commit b713c195d59332277a31a59c91f755e53b5b302b ] No functional changes in this patch, needed to provide io_uring support for shutdown(2). Cc: netdev@vger.kernel.org Cc: David S. Miller Acked-by: Jakub Kicinski Change-Id: I57af3873e6d10fd244dcc8ed261d715a35a7434d Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 069ac28d92432dd7cdac0a2c141a1b3b8d4330d5) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- include/linux/socket.h | 1 + net/socket.c | 15 ++++++++++++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/include/linux/socket.h b/include/linux/socket.h index 9aa530d497da..42222a84167f 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -436,5 +436,6 @@ extern int __sys_getpeername(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len); extern int __sys_socketpair(int family, int type, int protocol, int __user *usockvec); +extern int __sys_shutdown_sock(struct socket *sock, int how); extern int __sys_shutdown(int fd, int how); #endif /* _LINUX_SOCKET_H */ diff --git a/net/socket.c b/net/socket.c index abca62588f21..85be3f256a04 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2182,6 +2182,17 @@ SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname, * Shutdown a socket. */ +int __sys_shutdown_sock(struct socket *sock, int how) +{ + int err; + + err = security_socket_shutdown(sock, how); + if (!err) + err = sock->ops->shutdown(sock, how); + + return err; +} + int __sys_shutdown(int fd, int how) { int err, fput_needed; @@ -2189,9 +2200,7 @@ int __sys_shutdown(int fd, int how) sock = sockfd_lookup_light(fd, &err, &fput_needed); if (sock != NULL) { - err = security_socket_shutdown(sock, how); - if (!err) - err = sock->ops->shutdown(sock, how); + err = __sys_shutdown_sock(sock, how); fput_light(sock->file, fput_needed); } return err; From 13f03f52752278fd629b4e102c35dccce26b61bb Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 25 Aug 2021 12:25:44 +0100 Subject: [PATCH 1382/1477] UPSTREAM: net: add accept helper not installing fd [ Upstream commit d32f89da7fa8ccc8b3fb8f909d61e42b9bc39329 ] Introduce and reuse a helper that acts similarly to __sys_accept4_file() but returns struct file instead of installing file descriptor. Will be used by io_uring. Change-Id: I27ff49709d0c3332ca9b831a62fecc14f7ef3304 Signed-off-by: Pavel Begunkov Acked-by: Jakub Kicinski Signed-off-by: Jens Axboe Acked-by: David S. Miller Link: https://lore.kernel.org/r/c57b9e8e818d93683a3d24f8ca50ca038d1da8c4.1629888991.git.asml.silence@gmail.com Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit ad0b0137953a2c973958dadf6d222e120e278856) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- include/linux/socket.h | 3 ++ net/socket.c | 67 +++++++++++++++++++++++------------------- 2 files changed, 39 insertions(+), 31 deletions(-) diff --git a/include/linux/socket.h b/include/linux/socket.h index 42222a84167f..c3b35d18bcd3 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -421,6 +421,9 @@ extern int __sys_accept4_file(struct file *file, unsigned file_flags, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen, int flags, unsigned long nofile); +extern struct file *do_accept(struct file *file, unsigned file_flags, + struct sockaddr __user *upeer_sockaddr, + int __user *upeer_addrlen, int flags); extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen, int flags); extern int __sys_socket(int family, int type, int protocol); diff --git a/net/socket.c b/net/socket.c index 85be3f256a04..16800b0ae2d3 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1689,30 +1689,22 @@ SYSCALL_DEFINE2(listen, int, fd, int, backlog) return __sys_listen(fd, backlog); } -int __sys_accept4_file(struct file *file, unsigned file_flags, +struct file *do_accept(struct file *file, unsigned file_flags, struct sockaddr __user *upeer_sockaddr, - int __user *upeer_addrlen, int flags, - unsigned long nofile) + int __user *upeer_addrlen, int flags) { struct socket *sock, *newsock; struct file *newfile; - int err, len, newfd; + int err, len; struct sockaddr_storage address; - if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) - return -EINVAL; - - if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) - flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; - sock = sock_from_file(file, &err); if (!sock) - goto out; + return ERR_PTR(err); - err = -ENFILE; newsock = sock_alloc(); if (!newsock) - goto out; + return ERR_PTR(-ENFILE); newsock->type = sock->type; newsock->ops = sock->ops; @@ -1723,18 +1715,9 @@ int __sys_accept4_file(struct file *file, unsigned file_flags, */ __module_get(newsock->ops->owner); - newfd = __get_unused_fd_flags(flags, nofile); - if (unlikely(newfd < 0)) { - err = newfd; - sock_release(newsock); - goto out; - } newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name); - if (IS_ERR(newfile)) { - err = PTR_ERR(newfile); - put_unused_fd(newfd); - goto out; - } + if (IS_ERR(newfile)) + return newfile; err = security_socket_accept(sock, newsock); if (err) @@ -1759,16 +1742,38 @@ int __sys_accept4_file(struct file *file, unsigned file_flags, } /* File flags are not inherited via accept() unlike another OSes. */ - - fd_install(newfd, newfile); - err = newfd; -out: - return err; + return newfile; out_fd: fput(newfile); - put_unused_fd(newfd); - goto out; + return ERR_PTR(err); +} +int __sys_accept4_file(struct file *file, unsigned file_flags, + struct sockaddr __user *upeer_sockaddr, + int __user *upeer_addrlen, int flags, + unsigned long nofile) +{ + struct file *newfile; + int newfd; + + if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) + return -EINVAL; + + if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) + flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; + + newfd = __get_unused_fd_flags(flags, nofile); + if (unlikely(newfd < 0)) + return newfd; + + newfile = do_accept(file, file_flags, upeer_sockaddr, upeer_addrlen, + flags); + if (IS_ERR(newfile)) { + put_unused_fd(newfd); + return PTR_ERR(newfile); + } + fd_install(newfd, newfile); + return newfd; } /* From 661bc0f679c79ddce276b9a04e3bbf135ad3eddf Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 26 Oct 2020 14:32:27 -0600 Subject: [PATCH 1383/1477] UPSTREAM: signal: Add task_sigpending() helper [ Upstream commit 5c251e9dc0e127bac6fc5b8e6696363d2e35f515 ] This is in preparation for maintaining signal_pending() as the decider of whether or not a schedule() loop should be broken, or continue sleeping. This is different than the core signal use cases, which really need to know whether an actual signal is pending or not. task_sigpending() returns non-zero if TIF_SIGPENDING is set. Only core kernel use cases should care about the distinction between the two, make sure those use the task_sigpending() helper. Change-Id: I0d8572e173cc4536673da1682c9537db11f68167 Signed-off-by: Jens Axboe Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Reviewed-by: Oleg Nesterov Link: https://lore.kernel.org/r/20201026203230.386348-2-axboe@kernel.dk Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 52cfde6bbf64f7f058efa805c6dbb6332d4de6fa) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- include/linux/sched/signal.h | 9 +++++++-- kernel/events/uprobes.c | 2 +- kernel/signal.c | 8 ++++---- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 02d53c6b574c..e291b3cb1de4 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -360,11 +360,16 @@ static inline int restart_syscall(void) return -ERESTARTNOINTR; } -static inline int signal_pending(struct task_struct *p) +static inline int task_sigpending(struct task_struct *p) { return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING)); } +static inline int signal_pending(struct task_struct *p) +{ + return task_sigpending(p); +} + static inline int __fatal_signal_pending(struct task_struct *p) { return unlikely(sigismember(&p->pending.signal, SIGKILL)); @@ -372,7 +377,7 @@ static inline int __fatal_signal_pending(struct task_struct *p) static inline int fatal_signal_pending(struct task_struct *p) { - return signal_pending(p) && __fatal_signal_pending(p); + return task_sigpending(p) && __fatal_signal_pending(p); } static inline int signal_pending_state(long state, struct task_struct *p) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index e1bbb3b92921..826a2355da1e 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1973,7 +1973,7 @@ bool uprobe_deny_signal(void) WARN_ON_ONCE(utask->state != UTASK_SSTEP); - if (signal_pending(t)) { + if (task_sigpending(t)) { spin_lock_irq(&t->sighand->siglock); clear_tsk_thread_flag(t, TIF_SIGPENDING); spin_unlock_irq(&t->sighand->siglock); diff --git a/kernel/signal.c b/kernel/signal.c index c75a1d5f9d51..17e40f003830 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -987,7 +987,7 @@ static inline bool wants_signal(int sig, struct task_struct *p) if (task_is_stopped_or_traced(p)) return false; - return task_curr(p) || !signal_pending(p); + return task_curr(p) || !task_sigpending(p); } static void complete_signal(int sig, struct task_struct *p, enum pid_type type) @@ -2849,7 +2849,7 @@ static void retarget_shared_pending(struct task_struct *tsk, sigset_t *which) /* Remove the signals this thread can handle. */ sigandsets(&retarget, &retarget, &t->blocked); - if (!signal_pending(t)) + if (!task_sigpending(t)) signal_wake_up(t, 0); if (sigisemptyset(&retarget)) @@ -2883,7 +2883,7 @@ void exit_signals(struct task_struct *tsk) cgroup_threadgroup_change_end(tsk); - if (!signal_pending(tsk)) + if (!task_sigpending(tsk)) goto out; unblocked = tsk->blocked; @@ -2927,7 +2927,7 @@ long do_no_restart_syscall(struct restart_block *param) static void __set_task_blocked(struct task_struct *tsk, const sigset_t *newset) { - if (signal_pending(tsk) && !thread_group_empty(tsk)) { + if (task_sigpending(tsk) && !thread_group_empty(tsk)) { sigset_t newblocked; /* A set of now blocked but previously unblocked signals. */ sigandnsets(&newblocked, newset, ¤t->blocked); From 98006a0a159f2c45ffb6a6393d9af390dde15a49 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 26 Sep 2020 17:20:17 -0600 Subject: [PATCH 1384/1477] UPSTREAM: fs: make do_renameat2() take struct filename [ Upstream commit e886663cfd029b64a1d8da7efae7014526d884e9 ] Pass in the struct filename pointers instead of the user string, and update the three callers to do the same. This behaves like do_unlinkat(), which also takes a filename struct and puts it when it is done. Converting callers is then trivial. Change-Id: Ie23f87f8c6bb18a61254a0848d861ad6fad14232 Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 214f80e25176eb4d756bc9fe528ef7bf23d2f9a1) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- fs/internal.h | 2 ++ fs/namei.c | 40 ++++++++++++++++++++++------------------ 2 files changed, 24 insertions(+), 18 deletions(-) diff --git a/fs/internal.h b/fs/internal.h index 5155f6ce95c7..95a4c9d9692f 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -77,6 +77,8 @@ extern int vfs_path_lookup(struct dentry *, struct vfsmount *, long do_rmdir(int dfd, struct filename *name); long do_unlinkat(int dfd, struct filename *name); int may_linkat(struct path *link); +int do_renameat2(int olddfd, struct filename *oldname, int newdfd, + struct filename *newname, unsigned int flags); /* * namespace.c diff --git a/fs/namei.c b/fs/namei.c index 3c4bdd332ff6..887bc1f8be97 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4437,8 +4437,8 @@ out: } EXPORT_SYMBOL_NS(vfs_rename, ANDROID_GKI_VFS_EXPORT_ONLY); -static int do_renameat2(int olddfd, const char __user *oldname, int newdfd, - const char __user *newname, unsigned int flags) +int do_renameat2(int olddfd, struct filename *from, int newdfd, + struct filename *to, unsigned int flags) { struct dentry *old_dentry, *new_dentry; struct dentry *trap; @@ -4446,32 +4446,30 @@ static int do_renameat2(int olddfd, const char __user *oldname, int newdfd, struct qstr old_last, new_last; int old_type, new_type; struct inode *delegated_inode = NULL; - struct filename *from; - struct filename *to; unsigned int lookup_flags = 0, target_flags = LOOKUP_RENAME_TARGET; bool should_retry = false; - int error; + int error = -EINVAL; if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) - return -EINVAL; + goto put_both; if ((flags & (RENAME_NOREPLACE | RENAME_WHITEOUT)) && (flags & RENAME_EXCHANGE)) - return -EINVAL; + goto put_both; if (flags & RENAME_EXCHANGE) target_flags = 0; retry: - from = filename_parentat(olddfd, getname(oldname), lookup_flags, - &old_path, &old_last, &old_type); + from = filename_parentat(olddfd, from, lookup_flags, &old_path, + &old_last, &old_type); if (IS_ERR(from)) { error = PTR_ERR(from); - goto exit; + goto put_new; } - to = filename_parentat(newdfd, getname(newname), lookup_flags, - &new_path, &new_last, &new_type); + to = filename_parentat(newdfd, to, lookup_flags, &new_path, &new_last, + &new_type); if (IS_ERR(to)) { error = PTR_ERR(to); goto exit1; @@ -4564,34 +4562,40 @@ exit2: if (retry_estale(error, lookup_flags)) should_retry = true; path_put(&new_path); - putname(to); exit1: path_put(&old_path); - putname(from); if (should_retry) { should_retry = false; lookup_flags |= LOOKUP_REVAL; goto retry; } -exit: +put_both: + if (!IS_ERR(from)) + putname(from); +put_new: + if (!IS_ERR(to)) + putname(to); return error; } SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname, unsigned int, flags) { - return do_renameat2(olddfd, oldname, newdfd, newname, flags); + return do_renameat2(olddfd, getname(oldname), newdfd, getname(newname), + flags); } SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname) { - return do_renameat2(olddfd, oldname, newdfd, newname, 0); + return do_renameat2(olddfd, getname(oldname), newdfd, getname(newname), + 0); } SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newname) { - return do_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0); + return do_renameat2(AT_FDCWD, getname(oldname), AT_FDCWD, + getname(newname), 0); } int readlink_copy(char __user *buffer, int buflen, const char *link) From 82c3becbef85c1c0f0290f09d2d9467acc149ab7 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 20 Nov 2020 17:14:40 -0600 Subject: [PATCH 1385/1477] UPSTREAM: file: Rename __close_fd_get_file close_fd_get_file [ Upstream commit 9fe83c43e71cdb8e5b9520bcb98706a2b3c680c8 ] The function close_fd_get_file is explicitly a variant of __close_fd[1]. Now that __close_fd has been renamed close_fd, rename close_fd_get_file to be consistent with close_fd. When __alloc_fd, __close_fd and __fd_install were introduced the double underscore indicated that the function took a struct files_struct parameter. The function __close_fd_get_file never has so the naming has always been inconsistent. This just cleans things up so there are not any lingering mentions or references __close_fd left in the code. [1] 80cd795630d6 ("binder: fix use-after-free due to ksys_close() during fdget()") Link: https://lkml.kernel.org/r/20201120231441.29911-23-ebiederm@xmission.com Change-Id: I1c759a36dfa09259eff5b09127fde4e041777a3e Signed-off-by: Eric W. Biederman Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 57b20530363d127ab6a82e336275769258eb5f37) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 2 +- fs/file.c | 4 ++-- fs/io_uring.c | 2 +- include/linux/fdtable.h | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index d07a6d269776..16e0ba315abf 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2021,7 +2021,7 @@ static void binder_deferred_fd_close(int fd) if (!twcb) return; init_task_work(&twcb->twork, binder_do_fd_close); - __close_fd_get_file(fd, &twcb->file); + close_fd_get_file(fd, &twcb->file); if (twcb->file) { filp_close(twcb->file, current->files); task_work_add(current, &twcb->twork, TWA_RESUME); diff --git a/fs/file.c b/fs/file.c index 8431dfde036c..aacc5bf80bd6 100644 --- a/fs/file.c +++ b/fs/file.c @@ -780,11 +780,11 @@ int __close_range(unsigned fd, unsigned max_fd, unsigned int flags) } /* - * variant of __close_fd that gets a ref on the file for later fput. + * variant of close_fd that gets a ref on the file for later fput. * The caller must ensure that filp_close() called on the file, and then * an fput(). */ -int __close_fd_get_file(unsigned int fd, struct file **res) +int close_fd_get_file(unsigned int fd, struct file **res) { struct files_struct *files = current->files; struct file *file; diff --git a/fs/io_uring.c b/fs/io_uring.c index 816c60a34ea1..661303a435be 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -4270,7 +4270,7 @@ static int io_close(struct io_kiocb *req, bool force_nonblock, /* might be already done during nonblock submission */ if (!close->put_file) { - ret = __close_fd_get_file(close->fd, &close->put_file); + ret = close_fd_get_file(close->fd, &close->put_file); if (ret < 0) return (ret == -ENOENT) ? -EBADF : ret; } diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index a32bf47c593e..f1a99d3e5570 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -123,7 +123,7 @@ extern void __fd_install(struct files_struct *files, extern int __close_fd(struct files_struct *files, unsigned int fd); extern int __close_range(unsigned int fd, unsigned int max_fd, unsigned int flags); -extern int __close_fd_get_file(unsigned int fd, struct file **res); +extern int close_fd_get_file(unsigned int fd, struct file **res); extern int unshare_fd(unsigned long unshare_flags, unsigned int max_fds, struct files_struct **new_fdp); From 00af4b88ad4ee075ded32abcf9a32a294678ff7f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 19 Jan 2021 15:41:52 -0700 Subject: [PATCH 1386/1477] UPSTREAM: fs: provide locked helper variant of close_fd_get_file() [ Upstream commit 53dec2ea74f2ef360e8455439be96a780baa6097 ] Assumes current->files->file_lock is already held on invocation. Helps the caller check the file before removing the fd, if it needs to. Change-Id: Idd87700a119403ce3867aa52294a14213b505faa Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit d2136fc145be417e851dfe50703fac2af6aabe46) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- fs/file.c | 48 +++++++++++++++++++++++++++++++----------------- fs/internal.h | 1 + 2 files changed, 32 insertions(+), 17 deletions(-) diff --git a/fs/file.c b/fs/file.c index aacc5bf80bd6..97a0cd31faec 100644 --- a/fs/file.c +++ b/fs/file.c @@ -22,6 +22,8 @@ #include #include +#include "internal.h" + unsigned int sysctl_nr_open __read_mostly = 1024*1024; unsigned int sysctl_nr_open_min = BITS_PER_LONG; /* our min() is unusable in constant expressions ;-/ */ @@ -779,6 +781,32 @@ int __close_range(unsigned fd, unsigned max_fd, unsigned int flags) return 0; } +/* + * See close_fd_get_file() below, this variant assumes current->files->file_lock + * is held. + */ +int __close_fd_get_file(unsigned int fd, struct file **res) +{ + struct files_struct *files = current->files; + struct file *file; + struct fdtable *fdt; + + fdt = files_fdtable(files); + if (fd >= fdt->max_fds) + goto out_err; + file = fdt->fd[fd]; + if (!file) + goto out_err; + rcu_assign_pointer(fdt->fd[fd], NULL); + __put_unused_fd(files, fd); + get_file(file); + *res = file; + return 0; +out_err: + *res = NULL; + return -ENOENT; +} + /* * variant of close_fd that gets a ref on the file for later fput. * The caller must ensure that filp_close() called on the file, and then @@ -787,27 +815,13 @@ int __close_range(unsigned fd, unsigned max_fd, unsigned int flags) int close_fd_get_file(unsigned int fd, struct file **res) { struct files_struct *files = current->files; - struct file *file; - struct fdtable *fdt; + int ret; spin_lock(&files->file_lock); - fdt = files_fdtable(files); - if (fd >= fdt->max_fds) - goto out_unlock; - file = fdt->fd[fd]; - if (!file) - goto out_unlock; - rcu_assign_pointer(fdt->fd[fd], NULL); - __put_unused_fd(files, fd); + ret = __close_fd_get_file(fd, res); spin_unlock(&files->file_lock); - get_file(file); - *res = file; - return 0; -out_unlock: - spin_unlock(&files->file_lock); - *res = NULL; - return -ENOENT; + return ret; } void do_close_on_exec(struct files_struct *files) diff --git a/fs/internal.h b/fs/internal.h index 95a4c9d9692f..06d313b9beec 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -134,6 +134,7 @@ extern struct file *do_file_open_root(struct dentry *, struct vfsmount *, const char *, const struct open_flags *); extern struct open_how build_open_how(int flags, umode_t mode); extern int build_open_flags(const struct open_how *how, struct open_flags *op); +extern int __close_fd_get_file(unsigned int fd, struct file **res); long do_sys_ftruncate(unsigned int fd, loff_t length, int small); int chmod_common(const struct path *path, umode_t mode); From a14b0287222a1bfeac3526b44463863ec611f676 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 26 Oct 2020 14:32:28 -0600 Subject: [PATCH 1387/1477] UPSTREAM: entry: Add support for TIF_NOTIFY_SIGNAL [ Upstream commit 12db8b690010ccfadf9d0b49a1e1798e47dbbe1a ] Add TIF_NOTIFY_SIGNAL handling in the generic entry code, which if set, will return true if signal_pending() is used in a wait loop. That causes an exit of the loop so that notify_signal tracehooks can be run. If the wait loop is currently inside a system call, the system call is restarted once task_work has been processed. In preparation for only having arch_do_signal() handle syscall restarts if _TIF_SIGPENDING isn't set, rename it to arch_do_signal_or_restart(). Pass in a boolean that tells the architecture specific signal handler if it should attempt to get a signal, or just process a potential syscall restart. For !CONFIG_GENERIC_ENTRY archs, add the TIF_NOTIFY_SIGNAL handling to get_signal(). This is done to minimize the needed architecture changes to support this feature. Change-Id: Iec8202baf6ec6ff5d31c339869d8f34af4182677 Signed-off-by: Jens Axboe Signed-off-by: Thomas Gleixner Reviewed-by: Oleg Nesterov Link: https://lore.kernel.org/r/20201026203230.386348-3-axboe@kernel.dk Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 3c295bd2ddaecf3509458c86bf7ba610042f3609) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/signal.c | 4 ++-- include/linux/entry-common.h | 11 ++++++++--- include/linux/entry-kvm.h | 4 ++-- include/linux/sched/signal.h | 11 ++++++++++- include/linux/tracehook.h | 27 +++++++++++++++++++++++++++ kernel/entry/common.c | 14 +++++++++++--- kernel/entry/kvm.c | 3 +++ kernel/signal.c | 14 ++++++++++++++ 8 files changed, 77 insertions(+), 11 deletions(-) diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index b001ba811cab..9eff48171532 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -798,11 +798,11 @@ static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs) * want to handle. Thus you cannot kill init even with a SIGKILL even by * mistake. */ -void arch_do_signal(struct pt_regs *regs) +void arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal) { struct ksignal ksig; - if (get_signal(&ksig)) { + if (has_signal && get_signal(&ksig)) { /* Whee! Actually deliver the signal. */ handle_signal(&ksig, regs); return; diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index 7dff07713a07..1a128baf3628 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -37,6 +37,10 @@ # define _TIF_UPROBE (0) #endif +#ifndef _TIF_NOTIFY_SIGNAL +# define _TIF_NOTIFY_SIGNAL (0) +#endif + /* * TIF flags handled in syscall_enter_from_user_mode() */ @@ -69,7 +73,7 @@ #define EXIT_TO_USER_MODE_WORK \ (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ - _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | \ + _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \ ARCH_EXIT_TO_USER_MODE_WORK) /** @@ -259,12 +263,13 @@ static __always_inline void arch_exit_to_user_mode(void) { } #endif /** - * arch_do_signal - Architecture specific signal delivery function + * arch_do_signal_or_restart - Architecture specific signal delivery function * @regs: Pointer to currents pt_regs + * @has_signal: actual signal to handle * * Invoked from exit_to_user_mode_loop(). */ -void arch_do_signal(struct pt_regs *regs); +void arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal); /** * arch_syscall_exit_tracehook - Wrapper around tracehook_report_syscall_exit() diff --git a/include/linux/entry-kvm.h b/include/linux/entry-kvm.h index 0cef17afb41a..9b93f8584ff7 100644 --- a/include/linux/entry-kvm.h +++ b/include/linux/entry-kvm.h @@ -11,8 +11,8 @@ # define ARCH_XFER_TO_GUEST_MODE_WORK (0) #endif -#define XFER_TO_GUEST_MODE_WORK \ - (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ +#define XFER_TO_GUEST_MODE_WORK \ + (_TIF_NEED_RESCHED | _TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL | \ _TIF_NOTIFY_RESUME | ARCH_XFER_TO_GUEST_MODE_WORK) struct kvm_vcpu; diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index e291b3cb1de4..0ca6f248dc4f 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -367,6 +367,15 @@ static inline int task_sigpending(struct task_struct *p) static inline int signal_pending(struct task_struct *p) { +#if defined(TIF_NOTIFY_SIGNAL) + /* + * TIF_NOTIFY_SIGNAL isn't really a signal, but it requires the same + * behavior in terms of ensuring that we break out of wait loops + * so that notify signal callbacks can be processed. + */ + if (unlikely(test_tsk_thread_flag(p, TIF_NOTIFY_SIGNAL))) + return 1; +#endif return task_sigpending(p); } @@ -514,7 +523,7 @@ extern int set_user_sigmask(const sigset_t __user *umask, size_t sigsetsize); static inline void restore_saved_sigmask_unless(bool interrupted) { if (interrupted) - WARN_ON(!test_thread_flag(TIF_SIGPENDING)); + WARN_ON(!signal_pending(current)); else restore_saved_sigmask(); } diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index b480e1a07ed8..f7d82e4fafd6 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -198,4 +198,31 @@ static inline void tracehook_notify_resume(struct pt_regs *regs) blkcg_maybe_throttle_current(); } +/* + * called by exit_to_user_mode_loop() if ti_work & _TIF_NOTIFY_SIGNAL. This + * is currently used by TWA_SIGNAL based task_work, which requires breaking + * wait loops to ensure that task_work is noticed and run. + */ +static inline void tracehook_notify_signal(void) +{ +#if defined(TIF_NOTIFY_SIGNAL) + clear_thread_flag(TIF_NOTIFY_SIGNAL); + smp_mb__after_atomic(); + if (current->task_works) + task_work_run(); +#endif +} + +/* + * Called when we have work to process from exit_to_user_mode_loop() + */ +static inline void set_notify_signal(struct task_struct *task) +{ +#if defined(TIF_NOTIFY_SIGNAL) + if (!test_and_set_tsk_thread_flag(task, TIF_NOTIFY_SIGNAL) && + !wake_up_state(task, TASK_INTERRUPTIBLE)) + kick_process(task); +#endif +} + #endif /* */ diff --git a/kernel/entry/common.c b/kernel/entry/common.c index e289e6773292..09f58853f692 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -135,7 +135,15 @@ static __always_inline void exit_to_user_mode(void) } /* Workaround to allow gradual conversion of architecture code */ -void __weak arch_do_signal(struct pt_regs *regs) { } +void __weak arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal) { } + +static void handle_signal_work(struct pt_regs *regs, unsigned long ti_work) +{ + if (ti_work & _TIF_NOTIFY_SIGNAL) + tracehook_notify_signal(); + + arch_do_signal_or_restart(regs, ti_work & _TIF_SIGPENDING); +} static unsigned long exit_to_user_mode_loop(struct pt_regs *regs, unsigned long ti_work) @@ -157,8 +165,8 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs, if (ti_work & _TIF_PATCH_PENDING) klp_update_patch_state(current); - if (ti_work & _TIF_SIGPENDING) - arch_do_signal(regs); + if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) + handle_signal_work(regs, ti_work); if (ti_work & _TIF_NOTIFY_RESUME) { tracehook_notify_resume(regs); diff --git a/kernel/entry/kvm.c b/kernel/entry/kvm.c index 2a3139dab109..049fd06b4c3d 100644 --- a/kernel/entry/kvm.c +++ b/kernel/entry/kvm.c @@ -8,6 +8,9 @@ static int xfer_to_guest_mode_work(struct kvm_vcpu *vcpu, unsigned long ti_work) do { int ret; + if (ti_work & _TIF_NOTIFY_SIGNAL) + tracehook_notify_signal(); + if (ti_work & _TIF_SIGPENDING) { kvm_handle_signal_exit(vcpu); return -EINTR; diff --git a/kernel/signal.c b/kernel/signal.c index 17e40f003830..6eb4b9da515d 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2552,6 +2552,20 @@ bool get_signal(struct ksignal *ksig) struct signal_struct *signal = current->signal; int signr; + /* + * For non-generic architectures, check for TIF_NOTIFY_SIGNAL so + * that the arch handlers don't all have to do it. If we get here + * without TIF_SIGPENDING, just exit after running signal work. + */ +#ifdef TIF_NOTIFY_SIGNAL + if (!IS_ENABLED(CONFIG_GENERIC_ENTRY)) { + if (test_thread_flag(TIF_NOTIFY_SIGNAL)) + tracehook_notify_signal(); + if (!task_sigpending(current)) + return false; + } +#endif + if (unlikely(uprobe_deny_signal())) return false; From 862aa233e7d7536c1a5654f727f045ce98486149 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 26 Oct 2020 14:32:30 -0600 Subject: [PATCH 1388/1477] UPSTREAM: task_work: Use TIF_NOTIFY_SIGNAL if available [ Upstream commit 114518eb6430b832d2f9f5a008043b913ccf0e24 ] If the arch supports TIF_NOTIFY_SIGNAL, then use that for TWA_SIGNAL as it's more efficient than using the signal delivery method. This is especially true on threaded applications, where ->sighand is shared across threads, but it's also lighter weight on non-shared cases. io_uring is a heavy consumer of TWA_SIGNAL based task_work. A test with threads shows a nice improvement running an io_uring based echo server. stock kernel: 0.01% <= 0.1 milliseconds 95.86% <= 0.2 milliseconds 98.27% <= 0.3 milliseconds 99.71% <= 0.4 milliseconds 100.00% <= 0.5 milliseconds 100.00% <= 0.6 milliseconds 100.00% <= 0.7 milliseconds 100.00% <= 0.8 milliseconds 100.00% <= 0.9 milliseconds 100.00% <= 1.0 milliseconds 100.00% <= 1.1 milliseconds 100.00% <= 2 milliseconds 100.00% <= 3 milliseconds 100.00% <= 3 milliseconds 1378930.00 requests per second ~1600% CPU 1.38M requests/second, and all 16 CPUs are maxed out. patched kernel: 0.01% <= 0.1 milliseconds 98.24% <= 0.2 milliseconds 99.47% <= 0.3 milliseconds 99.99% <= 0.4 milliseconds 100.00% <= 0.5 milliseconds 100.00% <= 0.6 milliseconds 100.00% <= 0.7 milliseconds 100.00% <= 0.8 milliseconds 100.00% <= 0.9 milliseconds 100.00% <= 1.2 milliseconds 1666111.38 requests per second ~1450% CPU 1.67M requests/second, and we're no longer just hammering on the sighand lock. The original reporter states: "For 5.7.15 my benchmark achieves 1.6M qps and system cpu is at ~80%. for 5.7.16 or later it achieves only 1M qps and the system cpu is is at ~100%" with the only difference there being that TWA_SIGNAL is used unconditionally in 5.7.16, since it's required to be able to handle the inability to run task_work if the application is waiting in the kernel already on an event that needs task_work run to be satisfied. Also see commit 0ba9c9edcd15. Reported-by: Roman Gershman Change-Id: I80788634e6b91012ebf94e0ab6bf897c99f9f732 Signed-off-by: Jens Axboe Signed-off-by: Thomas Gleixner Reviewed-by: Oleg Nesterov Link: https://lore.kernel.org/r/20201026203230.386348-5-axboe@kernel.dk Signed-off-by: Greg Kroah-Hartman (cherry picked from commit eb42e7b3043167d21f90204df75fa21b6d4af3ff) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- kernel/task_work.c | 41 +++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/kernel/task_work.c b/kernel/task_work.c index e3a8e5c66ae5..2c47ce899ee8 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -5,6 +5,34 @@ static struct callback_head work_exited; /* all we need is ->next == NULL */ +/* + * TWA_SIGNAL signaling - use TIF_NOTIFY_SIGNAL, if available, as it's faster + * than TIF_SIGPENDING as there's no dependency on ->sighand. The latter is + * shared for threads, and can cause contention on sighand->lock. Even for + * the non-threaded case TIF_NOTIFY_SIGNAL is more efficient, as no locking + * or IRQ disabling is involved for notification (or running) purposes. + */ +static void task_work_notify_signal(struct task_struct *task) +{ +#if defined(TIF_NOTIFY_SIGNAL) + set_notify_signal(task); +#else + unsigned long flags; + + /* + * Only grab the sighand lock if we don't already have some + * task_work pending. This pairs with the smp_store_mb() + * in get_signal(), see comment there. + */ + if (!(READ_ONCE(task->jobctl) & JOBCTL_TASK_WORK) && + lock_task_sighand(task, &flags)) { + task->jobctl |= JOBCTL_TASK_WORK; + signal_wake_up(task, 0); + unlock_task_sighand(task, &flags); + } +#endif +} + /** * task_work_add - ask the @task to execute @work->func() * @task: the task which should run the callback @@ -33,7 +61,6 @@ int task_work_add(struct task_struct *task, struct callback_head *work, enum task_work_notify_mode notify) { struct callback_head *head; - unsigned long flags; /* record the work call stack in order to print it in KASAN reports */ kasan_record_aux_stack(work); @@ -52,17 +79,7 @@ int task_work_add(struct task_struct *task, struct callback_head *work, set_notify_resume(task); break; case TWA_SIGNAL: - /* - * Only grab the sighand lock if we don't already have some - * task_work pending. This pairs with the smp_store_mb() - * in get_signal(), see comment there. - */ - if (!(READ_ONCE(task->jobctl) & JOBCTL_TASK_WORK) && - lock_task_sighand(task, &flags)) { - task->jobctl |= JOBCTL_TASK_WORK; - signal_wake_up(task, 0); - unlock_task_sighand(task, &flags); - } + task_work_notify_signal(task); break; default: WARN_ON_ONCE(1); From 109ccff96d05451c81c03e985abaebce557e929c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 26 Oct 2020 14:32:29 -0600 Subject: [PATCH 1389/1477] UPSTREAM: x86: Wire up TIF_NOTIFY_SIGNAL [ Upstream commit c8d5ed67936fddbe2ae845fc80397718006322d7 ] The generic entry code has support for TIF_NOTIFY_SIGNAL already. Just provide the TIF bit. [ tglx: Adopted to other TIF changes in x86 ] Change-Id: I3188c956ab5ed6abac721db8a12f3229f4c7d13c Signed-off-by: Jens Axboe Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20201026203230.386348-4-axboe@kernel.dk Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 4b1dcf8ec9b2f11b57f1ff5dcaa1f8575c7dacb5) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/thread_info.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index e701f29b4881..012c8ee93b67 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -93,6 +93,7 @@ struct thread_info { #define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_IA32 17 /* IA32 compatibility process */ #define TIF_SLD 18 /* Restore split lock detection on context switch */ +#define TIF_NOTIFY_SIGNAL 19 /* signal notifications exist */ #define TIF_MEMDIE 20 /* is terminating due to OOM killer */ #define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */ #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ @@ -121,6 +122,7 @@ struct thread_info { #define _TIF_NOCPUID (1 << TIF_NOCPUID) #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_IA32 (1 << TIF_IA32) +#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) #define _TIF_SLD (1 << TIF_SLD) #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) From d6b63ac444e0227df0c07a968ba55f39be71615e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 12 Oct 2020 07:15:37 -0600 Subject: [PATCH 1390/1477] UPSTREAM: arc: add support for TIF_NOTIFY_SIGNAL [ Upstream commit 53855e12588743ea128ee31f913d1c6e2f1d32c8 ] Wire up TIF_NOTIFY_SIGNAL handling for arc. Cc: linux-snps-arc@lists.infradead.org Acked-by: Vineet Gupta Change-Id: I958240d0f80efee85940de4f5518135267a4f3a1 Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 2dbb0354517b1ebef40890d05ce8362e99b86179) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- arch/arc/include/asm/thread_info.h | 4 +++- arch/arc/kernel/entry.S | 3 ++- arch/arc/kernel/signal.c | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/arc/include/asm/thread_info.h b/arch/arc/include/asm/thread_info.h index f9eef0e8f0b7..c0942c24d401 100644 --- a/arch/arc/include/asm/thread_info.h +++ b/arch/arc/include/asm/thread_info.h @@ -79,6 +79,7 @@ static inline __attribute_const__ struct thread_info *current_thread_info(void) #define TIF_SIGPENDING 2 /* signal pending */ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ #define TIF_SYSCALL_AUDIT 4 /* syscall auditing active */ +#define TIF_NOTIFY_SIGNAL 5 /* signal notifications exist */ #define TIF_SYSCALL_TRACE 15 /* syscall trace active */ /* true if poll_idle() is polling TIF_NEED_RESCHED */ @@ -89,11 +90,12 @@ static inline __attribute_const__ struct thread_info *current_thread_info(void) #define _TIF_SIGPENDING (1< Date: Thu, 22 Oct 2020 20:09:23 -0600 Subject: [PATCH 1391/1477] UPSTREAM: arm64: add support for TIF_NOTIFY_SIGNAL [ Upstream commit 192caabd4dd92c98d23ed4334d7596af05af2fb4 ] Wire up TIF_NOTIFY_SIGNAL handling for arm64. Cc: linux-arm-kernel@lists.infradead.org Acked-by: Will Deacon Acked-by: Catalin Marinas Change-Id: Ice69a6d4cd969c76423eab8674e08827815e072a Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 79a9991e87fe61ff2a8b5ac8c112b3ce3544cb53) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/thread_info.h | 5 ++++- arch/arm64/kernel/signal.c | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 1fbab854a51b..cdcf307764aa 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -68,6 +68,7 @@ void arch_release_task_struct(struct task_struct *tsk); #define TIF_UPROBE 4 /* uprobe breakpoint or singlestep */ #define TIF_FSCHECK 5 /* Check FS is USER_DS on return */ #define TIF_MTE_ASYNC_FAULT 6 /* MTE Asynchronous Tag Check Fault */ +#define TIF_NOTIFY_SIGNAL 7 /* signal notifications exist */ #define TIF_SYSCALL_TRACE 8 /* syscall trace active */ #define TIF_SYSCALL_AUDIT 9 /* syscall auditing */ #define TIF_SYSCALL_TRACEPOINT 10 /* syscall tracepoint for ftrace */ @@ -98,10 +99,12 @@ void arch_release_task_struct(struct task_struct *tsk); #define _TIF_32BIT (1 << TIF_32BIT) #define _TIF_SVE (1 << TIF_SVE) #define _TIF_MTE_ASYNC_FAULT (1 << TIF_MTE_ASYNC_FAULT) +#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \ - _TIF_UPROBE | _TIF_FSCHECK | _TIF_MTE_ASYNC_FAULT) + _TIF_UPROBE | _TIF_FSCHECK | _TIF_MTE_ASYNC_FAULT | \ + _TIF_NOTIFY_SIGNAL) #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 0dab5679a97d..b6fbbd527dd7 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -938,7 +938,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, (void __user *)NULL, current); } - if (thread_flags & _TIF_SIGPENDING) + if (thread_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) do_signal(regs); if (thread_flags & _TIF_NOTIFY_RESUME) { From c5825095c48f976d6d7ba9c2e7c87dcc5eb9d13f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 9 Oct 2020 15:10:55 -0600 Subject: [PATCH 1392/1477] UPSTREAM: m68k: add support for TIF_NOTIFY_SIGNAL [ Upstream commit e660653cd9f2df470d156c249631f68b9dee51ee ] Wire up TIF_NOTIFY_SIGNAL handling for m68k. Cc: linux-m68k@lists.linux-m68k.org Acked-by: Geert Uytterhoeven Change-Id: I28d2f9d79486d8728ff0127cd24a64dbb9b2de68 Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit fe137f46d41f90e1a36134e7876db897a25b7926) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- arch/m68k/include/asm/thread_info.h | 1 + arch/m68k/kernel/signal.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/m68k/include/asm/thread_info.h b/arch/m68k/include/asm/thread_info.h index 3689c6718c88..15a757073fa5 100644 --- a/arch/m68k/include/asm/thread_info.h +++ b/arch/m68k/include/asm/thread_info.h @@ -60,6 +60,7 @@ static inline struct thread_info *current_thread_info(void) * bits 0-7 are tested at every exception exit * bits 8-15 are also tested at syscall exit */ +#define TIF_NOTIFY_SIGNAL 4 #define TIF_NOTIFY_RESUME 5 /* callback before returning to user */ #define TIF_SIGPENDING 6 /* signal pending */ #define TIF_NEED_RESCHED 7 /* rescheduling necessary */ diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c index fd916844a683..5d12736b4b28 100644 --- a/arch/m68k/kernel/signal.c +++ b/arch/m68k/kernel/signal.c @@ -1129,7 +1129,8 @@ static void do_signal(struct pt_regs *regs) void do_notify_resume(struct pt_regs *regs) { - if (test_thread_flag(TIF_SIGPENDING)) + if (test_thread_flag(TIF_NOTIFY_SIGNAL) || + test_thread_flag(TIF_SIGPENDING)) do_signal(regs); if (test_thread_flag(TIF_NOTIFY_RESUME)) From 470c17bd71a8e3710cda154cce33ed134e3a58f2 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 9 Oct 2020 15:21:21 -0600 Subject: [PATCH 1393/1477] UPSTREAM: nios32: add support for TIF_NOTIFY_SIGNAL [ Upstream commit 42020064274c235d720d9c4b7d9a678b133e59cf ] Wire up TIF_NOTIFY_SIGNAL handling for nios32. Cc: Ley Foon Tan Acked-by: Ley Foon Tan Change-Id: I11eed799c048247128e1acfb241f97cc3703d5b2 Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit cf3c6486731979a78851428d9e45e6f943b5fa92) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- arch/nios2/include/asm/thread_info.h | 2 ++ arch/nios2/kernel/signal.c | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/nios2/include/asm/thread_info.h b/arch/nios2/include/asm/thread_info.h index 7349a4fa635b..272d2c72a727 100644 --- a/arch/nios2/include/asm/thread_info.h +++ b/arch/nios2/include/asm/thread_info.h @@ -86,6 +86,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_MEMDIE 4 /* is terminating due to OOM killer */ #define TIF_SECCOMP 5 /* secure computing */ #define TIF_SYSCALL_AUDIT 6 /* syscall auditing active */ +#define TIF_NOTIFY_SIGNAL 7 /* signal notifications exist */ #define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal() */ #define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling @@ -97,6 +98,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) +#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) #define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) diff --git a/arch/nios2/kernel/signal.c b/arch/nios2/kernel/signal.c index 916180e4a997..68d626c4f1ba 100644 --- a/arch/nios2/kernel/signal.c +++ b/arch/nios2/kernel/signal.c @@ -309,7 +309,8 @@ asmlinkage int do_notify_resume(struct pt_regs *regs) if (!user_mode(regs)) return 0; - if (test_thread_flag(TIF_SIGPENDING)) { + if (test_thread_flag(TIF_SIGPENDING) || + test_thread_flag(TIF_NOTIFY_SIGNAL)) { int restart = do_signal(regs); if (unlikely(restart)) { From 98031aa870071835e76f97a6aa68f38cd912bb3f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 9 Oct 2020 15:27:02 -0600 Subject: [PATCH 1394/1477] UPSTREAM: parisc: add support for TIF_NOTIFY_SIGNAL [ Upstream commit 18cb3281285d2190c0605d2e53543802319bd1a1 ] Wire up TIF_NOTIFY_SIGNAL handling for parisc. Cc: linux-parisc@vger.kernel.org Acked-by: Helge Deller Change-Id: Iaa3f0b072f528bf500dc9a10901b5791eb38623f Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 45b365bc6c1b57da984c70cae0c6c783ec094399) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- arch/parisc/include/asm/thread_info.h | 4 +++- arch/parisc/kernel/signal.c | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/parisc/include/asm/thread_info.h b/arch/parisc/include/asm/thread_info.h index 285757544cca..0bd38a972cea 100644 --- a/arch/parisc/include/asm/thread_info.h +++ b/arch/parisc/include/asm/thread_info.h @@ -52,6 +52,7 @@ struct thread_info { #define TIF_POLLING_NRFLAG 3 /* true if poll_idle() is polling TIF_NEED_RESCHED */ #define TIF_32BIT 4 /* 32 bit binary */ #define TIF_MEMDIE 5 /* is terminating due to OOM killer */ +#define TIF_NOTIFY_SIGNAL 6 /* signal notifications exist */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_NOTIFY_RESUME 8 /* callback before returning to user */ #define TIF_SINGLESTEP 9 /* single stepping? */ @@ -61,6 +62,7 @@ struct thread_info { #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) +#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) #define _TIF_32BIT (1 << TIF_32BIT) @@ -72,7 +74,7 @@ struct thread_info { #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) #define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | \ - _TIF_NEED_RESCHED) + _TIF_NEED_RESCHED | _TIF_NOTIFY_SIGNAL) #define _TIF_SYSCALL_TRACE_MASK (_TIF_SYSCALL_TRACE | _TIF_SINGLESTEP | \ _TIF_BLOCKSTEP | _TIF_SYSCALL_AUDIT | \ _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT) diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c index 8d6c9b88eb3f..db1a47cf424d 100644 --- a/arch/parisc/kernel/signal.c +++ b/arch/parisc/kernel/signal.c @@ -609,7 +609,8 @@ do_signal(struct pt_regs *regs, long in_syscall) void do_notify_resume(struct pt_regs *regs, long in_syscall) { - if (test_thread_flag(TIF_SIGPENDING)) + if (test_thread_flag(TIF_SIGPENDING) || + test_thread_flag(TIF_NOTIFY_SIGNAL)) do_signal(regs, in_syscall); if (test_thread_flag(TIF_NOTIFY_RESUME)) From b1f0e1159f9132c2c13f4b332717860a79ea952d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 22 Oct 2020 20:11:56 -0600 Subject: [PATCH 1395/1477] UPSTREAM: powerpc: add support for TIF_NOTIFY_SIGNAL [ Upstream commit 900f0713fdd730fab0f0bfa4a8ca4db2a8985bbe ] Wire up TIF_NOTIFY_SIGNAL handling for powerpc. Cc: linuxppc-dev@lists.ozlabs.org Acked-by: Michael Ellerman Change-Id: I30a11c05453437854c2a64be214ae557f04d2cea Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit abab3d4444b5f4732d741f45feb954fabb78af7f) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/thread_info.h | 5 ++++- arch/powerpc/kernel/signal.c | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 6de3517bea94..ff31d2fa2140 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -96,6 +96,7 @@ void arch_setup_new_exec(void); #define TIF_SYSCALL_TRACE 0 /* syscall trace active */ #define TIF_SIGPENDING 1 /* signal pending */ #define TIF_NEED_RESCHED 2 /* rescheduling necessary */ +#define TIF_NOTIFY_SIGNAL 3 /* signal notifications exist */ #define TIF_SYSCALL_EMU 4 /* syscall emulation active */ #define TIF_RESTORE_TM 5 /* need to restore TM FP/VEC/VSX */ #define TIF_PATCH_PENDING 6 /* pending live patching update */ @@ -121,6 +122,7 @@ void arch_setup_new_exec(void); #define _TIF_SYSCALL_TRACE (1<thread.regs); do_signal(current); } From 8489c863443a70578bccf66e10265da7a22ed8c8 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 9 Oct 2020 15:16:02 -0600 Subject: [PATCH 1396/1477] UPSTREAM: mips: add support for TIF_NOTIFY_SIGNAL [ Upstream commit f45c184bce15f4a314c0210519bc3b4aab408838 ] Wire up TIF_NOTIFY_SIGNAL handling for mips. Cc: linux-mips@vger.kernel.org Acked-By: Thomas Bogendoerfer Change-Id: Ie72b5e65361dee9ab5c8059bde16a3dd0181ca07 Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 8ca2e5709922dc8c68323fdeeee099bb89a7c080) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- arch/mips/include/asm/thread_info.h | 4 +++- arch/mips/kernel/signal.c | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h index ee26f9a4575d..e2c352da3877 100644 --- a/arch/mips/include/asm/thread_info.h +++ b/arch/mips/include/asm/thread_info.h @@ -115,6 +115,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_SECCOMP 4 /* secure computing */ #define TIF_NOTIFY_RESUME 5 /* callback before returning to user */ #define TIF_UPROBE 6 /* breakpointed or singlestepping */ +#define TIF_NOTIFY_SIGNAL 7 /* signal notifications exist */ #define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal() */ #define TIF_USEDFPU 16 /* FPU was used by this task this quantum (SMP) */ #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ @@ -139,6 +140,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_SECCOMP (1< Date: Fri, 9 Oct 2020 15:34:12 -0600 Subject: [PATCH 1397/1477] UPSTREAM: s390: add support for TIF_NOTIFY_SIGNAL [ Upstream commit 75309018a24ddfb930c51bad8f4070b9bc2c923b ] Wire up TIF_NOTIFY_SIGNAL handling for s390. Cc: linux-s390@vger.kernel.org Acked-by: Heiko Carstens Acked-by: Sven Schnelle Change-Id: I070ab3e37c109b36a2a0134b5548f424f4877dd4 Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 0aef2ec0639459444f90fe59751ec74b60af4e62) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- arch/s390/include/asm/thread_info.h | 2 ++ arch/s390/kernel/entry.S | 11 ++++++----- arch/s390/kernel/signal.c | 2 +- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index 13a04fcf7762..0045341ade48 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -65,6 +65,7 @@ void arch_setup_new_exec(void); #define TIF_GUARDED_STORAGE 4 /* load guarded storage control block */ #define TIF_PATCH_PENDING 5 /* pending live patching update */ #define TIF_PGSTE 6 /* New mm's will use 4K page tables */ +#define TIF_NOTIFY_SIGNAL 7 /* signal notifications exist */ #define TIF_ISOLATE_BP 8 /* Run process with isolated BP */ #define TIF_ISOLATE_BP_GUEST 9 /* Run KVM guests with isolated BP */ @@ -82,6 +83,7 @@ void arch_setup_new_exec(void); #define TIF_SYSCALL_TRACEPOINT 27 /* syscall tracepoint instrumentation */ #define _TIF_NOTIFY_RESUME BIT(TIF_NOTIFY_RESUME) +#define _TIF_NOTIFY_SIGNAL BIT(TIF_NOTIFY_SIGNAL) #define _TIF_SIGPENDING BIT(TIF_SIGPENDING) #define _TIF_NEED_RESCHED BIT(TIF_NEED_RESCHED) #define _TIF_UPROBE BIT(TIF_UPROBE) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 963e8cb936e2..88ecbcf097a3 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -52,7 +52,8 @@ STACK_SIZE = 1 << STACK_SHIFT STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE _TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ - _TIF_UPROBE | _TIF_GUARDED_STORAGE | _TIF_PATCH_PENDING) + _TIF_UPROBE | _TIF_GUARDED_STORAGE | _TIF_PATCH_PENDING | \ + _TIF_NOTIFY_SIGNAL) _TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ _TIF_SYSCALL_TRACEPOINT) _CIF_WORK = (_CIF_ASCE_PRIMARY | _CIF_ASCE_SECONDARY | _CIF_FPU) @@ -481,8 +482,8 @@ ENTRY(system_call) #endif TSTMSK __PT_FLAGS(%r11),_PIF_SYSCALL_RESTART jo .Lsysc_syscall_restart - TSTMSK __TI_flags(%r12),_TIF_SIGPENDING - jo .Lsysc_sigpending + TSTMSK __TI_flags(%r12),(_TIF_SIGPENDING|_TIF_NOTIFY_SIGNAL) + jnz .Lsysc_sigpending TSTMSK __TI_flags(%r12),_TIF_NOTIFY_RESUME jo .Lsysc_notify_resume TSTMSK __LC_CPU_FLAGS,(_CIF_ASCE_PRIMARY|_CIF_ASCE_SECONDARY) @@ -863,8 +864,8 @@ ENTRY(io_int_handler) TSTMSK __TI_flags(%r12),_TIF_PATCH_PENDING jo .Lio_patch_pending #endif - TSTMSK __TI_flags(%r12),_TIF_SIGPENDING - jo .Lio_sigpending + TSTMSK __TI_flags(%r12),(_TIF_SIGPENDING|_TIF_NOTIFY_SIGNAL) + jnz .Lio_sigpending TSTMSK __TI_flags(%r12),_TIF_NOTIFY_RESUME jo .Lio_notify_resume TSTMSK __TI_flags(%r12),_TIF_GUARDED_STORAGE diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 9e900a8977bd..b27b6c1f058d 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -472,7 +472,7 @@ void do_signal(struct pt_regs *regs) current->thread.system_call = test_pt_regs_flag(regs, PIF_SYSCALL) ? regs->int_code : 0; - if (get_signal(&ksig)) { + if (test_thread_flag(TIF_SIGPENDING) && get_signal(&ksig)) { /* Whee! Actually deliver the signal. */ if (current->thread.system_call) { regs->int_code = current->thread.system_call; From 85483753543280bde0658177e8a72f93fb5a7a54 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 9 Oct 2020 15:47:28 -0600 Subject: [PATCH 1398/1477] UPSTREAM: um: add support for TIF_NOTIFY_SIGNAL [ Upstream commit a5b3cd32ff238b87e94d47b927aff117e22d13c0 ] Wire up TIF_NOTIFY_SIGNAL handling for um. Cc: linux-um@lists.infradead.org Acked-By: Anton Ivanov Change-Id: Idc702390f1ba7575230aa4c81d27f80088b17afc Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit dc808ffd9778aa013c792bb2ed369b84e346b67f) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- arch/um/include/asm/thread_info.h | 2 ++ arch/um/kernel/process.c | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/um/include/asm/thread_info.h b/arch/um/include/asm/thread_info.h index 66ab6a07330b..e610e932cfe1 100644 --- a/arch/um/include/asm/thread_info.h +++ b/arch/um/include/asm/thread_info.h @@ -57,6 +57,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_SYSCALL_TRACE 0 /* syscall trace active */ #define TIF_SIGPENDING 1 /* signal pending */ #define TIF_NEED_RESCHED 2 /* rescheduling necessary */ +#define TIF_NOTIFY_SIGNAL 3 /* signal notifications exist */ #define TIF_RESTART_BLOCK 4 #define TIF_MEMDIE 5 /* is terminating due to OOM killer */ #define TIF_SYSCALL_AUDIT 6 @@ -68,6 +69,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) +#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) #define _TIF_MEMDIE (1 << TIF_MEMDIE) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 8eb8b736abc1..067fb85fbf11 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -99,7 +99,8 @@ void interrupt_end(void) if (need_resched()) schedule(); - if (test_thread_flag(TIF_SIGPENDING)) + if (test_thread_flag(TIF_SIGPENDING) || + test_thread_flag(TIF_NOTIFY_SIGNAL)) do_signal(regs); if (test_thread_flag(TIF_NOTIFY_RESUME)) tracehook_notify_resume(regs); From e2e4fbbceb01ae4c6682d974c51694173cc3910b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 9 Oct 2020 15:36:35 -0600 Subject: [PATCH 1399/1477] UPSTREAM: sh: add support for TIF_NOTIFY_SIGNAL [ Upstream commit 6d3a273355e3c8471ddf9e8ce9a7cc4472bf1ccc ] Wire up TIF_NOTIFY_SIGNAL handling for sh. Cc: linux-sh@vger.kernel.org Change-Id: If469e4a57e80bb31d749ca571d4039fc1680732e Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 3fde31e9626247e189dca71fe4723b8c722a676c) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- arch/sh/include/asm/thread_info.h | 4 +++- arch/sh/kernel/signal_32.c | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/sh/include/asm/thread_info.h b/arch/sh/include/asm/thread_info.h index 243ea5150aa0..598d0184ffea 100644 --- a/arch/sh/include/asm/thread_info.h +++ b/arch/sh/include/asm/thread_info.h @@ -105,6 +105,7 @@ extern void init_thread_xstate(void); #define TIF_SYSCALL_TRACE 0 /* syscall trace active */ #define TIF_SIGPENDING 1 /* signal pending */ #define TIF_NEED_RESCHED 2 /* rescheduling necessary */ +#define TIF_NOTIFY_SIGNAL 3 /* signal notifications exist */ #define TIF_SINGLESTEP 4 /* singlestepping active */ #define TIF_SYSCALL_AUDIT 5 /* syscall auditing active */ #define TIF_SECCOMP 6 /* secure computing */ @@ -116,6 +117,7 @@ extern void init_thread_xstate(void); #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) +#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) @@ -132,7 +134,7 @@ extern void init_thread_xstate(void); #define _TIF_ALLWORK_MASK (_TIF_SYSCALL_TRACE | _TIF_SIGPENDING | \ _TIF_NEED_RESCHED | _TIF_SYSCALL_AUDIT | \ _TIF_SINGLESTEP | _TIF_NOTIFY_RESUME | \ - _TIF_SYSCALL_TRACEPOINT) + _TIF_SYSCALL_TRACEPOINT | _TIF_NOTIFY_SIGNAL) /* work to do on interrupt/exception return */ #define _TIF_WORK_MASK (_TIF_ALLWORK_MASK & ~(_TIF_SYSCALL_TRACE | \ diff --git a/arch/sh/kernel/signal_32.c b/arch/sh/kernel/signal_32.c index 1add47fd31f6..dd3092911efa 100644 --- a/arch/sh/kernel/signal_32.c +++ b/arch/sh/kernel/signal_32.c @@ -499,7 +499,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, unsigned int save_r0, unsigned long thread_info_flags) { /* deal with pending signal delivery */ - if (thread_info_flags & _TIF_SIGPENDING) + if (thread_info_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) do_signal(regs, save_r0); if (thread_info_flags & _TIF_NOTIFY_RESUME) From 2b94543d45731098fb2a1fa68f6120d54d20945b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 9 Oct 2020 15:24:46 -0600 Subject: [PATCH 1400/1477] UPSTREAM: openrisc: add support for TIF_NOTIFY_SIGNAL [ Upstream commit e181c0aa2e532af2b17128fbde699f8578cc0562 ] Wire up TIF_NOTIFY_SIGNAL handling for openrisc. Cc: openrisc@lists.librecores.org Acked-by: Stafford Horne Change-Id: I664b804e2b20332f1c529bca7e083c62e95e6d9f Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 12284aec880fc9f06d98d2fee28982c030cf99cf) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- arch/openrisc/include/asm/thread_info.h | 2 ++ arch/openrisc/kernel/signal.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/openrisc/include/asm/thread_info.h b/arch/openrisc/include/asm/thread_info.h index 9afe68bc423b..4f9d2a261455 100644 --- a/arch/openrisc/include/asm/thread_info.h +++ b/arch/openrisc/include/asm/thread_info.h @@ -98,6 +98,7 @@ register struct thread_info *current_thread_info_reg asm("r10"); #define TIF_SINGLESTEP 4 /* restore singlestep on return to user * mode */ +#define TIF_NOTIFY_SIGNAL 5 /* signal notifications exist */ #define TIF_SYSCALL_TRACEPOINT 8 /* for ftrace syscall instrumentation */ #define TIF_RESTORE_SIGMASK 9 #define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling * TIF_NEED_RESCHED @@ -109,6 +110,7 @@ register struct thread_info *current_thread_info_reg asm("r10"); #define _TIF_SIGPENDING (1< Date: Fri, 9 Oct 2020 14:39:00 -0600 Subject: [PATCH 1401/1477] UPSTREAM: csky: add support for TIF_NOTIFY_SIGNAL [ Upstream commit f3924d763c8af4c5d441b666c01f4de03ac9449e ] Wire up TIF_NOTIFY_SIGNAL handling for csky. Cc: linux-csky@vger.kernel.org Acked-by: Guo Ren Change-Id: I5aeedb22a46f89abfd86283dd1067c8ad9a5ee4a Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit c2037d61dec2b07add549cb7e762702076279b2b) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- arch/csky/include/asm/thread_info.h | 5 ++++- arch/csky/kernel/signal.c | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/csky/include/asm/thread_info.h b/arch/csky/include/asm/thread_info.h index 68e7a1227170..21456a3737c2 100644 --- a/arch/csky/include/asm/thread_info.h +++ b/arch/csky/include/asm/thread_info.h @@ -64,6 +64,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_SYSCALL_TRACE 4 /* syscall trace active */ #define TIF_SYSCALL_TRACEPOINT 5 /* syscall tracepoint instrumentation */ #define TIF_SYSCALL_AUDIT 6 /* syscall auditing */ +#define TIF_NOTIFY_SIGNAL 7 /* signal notifications exist */ #define TIF_POLLING_NRFLAG 16 /* poll_idle() is TIF_NEED_RESCHED */ #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ #define TIF_RESTORE_SIGMASK 20 /* restore signal mask in do_signal() */ @@ -75,6 +76,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) +#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) #define _TIF_UPROBE (1 << TIF_UPROBE) #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) #define _TIF_MEMDIE (1 << TIF_MEMDIE) @@ -82,7 +84,8 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ - _TIF_NOTIFY_RESUME | _TIF_UPROBE) + _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ + _TIF_NOTIFY_SIGNAL) #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP) diff --git a/arch/csky/kernel/signal.c b/arch/csky/kernel/signal.c index 243228b0aa07..f7c1677e5971 100644 --- a/arch/csky/kernel/signal.c +++ b/arch/csky/kernel/signal.c @@ -261,7 +261,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, uprobe_notify_resume(regs); /* Handle pending signal delivery */ - if (thread_info_flags & _TIF_SIGPENDING) + if (thread_info_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) do_signal(regs); if (thread_info_flags & _TIF_NOTIFY_RESUME) { From 8c81f539a05ceed26c81b6107651e7146615e455 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 9 Oct 2020 14:45:22 -0600 Subject: [PATCH 1402/1477] UPSTREAM: hexagon: add support for TIF_NOTIFY_SIGNAL [ Upstream commit aeec8193578a71d0aee21218351849d38121ce90 ] Wire up TIF_NOTIFY_SIGNAL handling for hexagon. Cc: linux-hexagon@vger.kernel.org Acked-by: Brian Cain Change-Id: I68ddb4388560d3caa828b12912da7d54ff42df10 Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 19f3e328b4c6202198f27a3b899811d4a118333f) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- arch/hexagon/include/asm/thread_info.h | 2 ++ arch/hexagon/kernel/process.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/hexagon/include/asm/thread_info.h b/arch/hexagon/include/asm/thread_info.h index 563da1986464..535976665bf0 100644 --- a/arch/hexagon/include/asm/thread_info.h +++ b/arch/hexagon/include/asm/thread_info.h @@ -95,6 +95,7 @@ register struct thread_info *__current_thread_info asm(QUOTED_THREADINFO_REG); #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ #define TIF_SINGLESTEP 4 /* restore ss @ return to usr mode */ #define TIF_RESTORE_SIGMASK 6 /* restore sig mask in do_signal() */ +#define TIF_NOTIFY_SIGNAL 7 /* signal notifications exist */ /* true if poll_idle() is polling TIF_NEED_RESCHED */ #define TIF_MEMDIE 17 /* OOM killer killed process */ @@ -103,6 +104,7 @@ register struct thread_info *__current_thread_info asm(QUOTED_THREADINFO_REG); #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) +#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) /* work to do on interrupt/exception return - All but TIF_SYSCALL_TRACE */ #define _TIF_WORK_MASK (0x0000FFFF & ~_TIF_SYSCALL_TRACE) diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c index 67767c5ed98c..6a980cba7b29 100644 --- a/arch/hexagon/kernel/process.c +++ b/arch/hexagon/kernel/process.c @@ -174,7 +174,7 @@ int do_work_pending(struct pt_regs *regs, u32 thread_info_flags) return 1; } - if (thread_info_flags & _TIF_SIGPENDING) { + if (thread_info_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) { do_signal(regs); return 1; } From 6c3e852b4ff698740ad188cb997bf4ccbf07f48a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 9 Oct 2020 15:13:57 -0600 Subject: [PATCH 1403/1477] UPSTREAM: microblaze: add support for TIF_NOTIFY_SIGNAL [ Upstream commit f4ea089e429e0d366cd1a34a2cbe3c7b13d98d75 ] Wire up TIF_NOTIFY_SIGNAL handling for microblaze. Acked-by: Michal Simek Change-Id: I6540230d4b04069323371351dde3d9c964b80058 Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 02d383a59cbf8b7292947895604a2f9103f4f862) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- arch/microblaze/include/asm/thread_info.h | 2 ++ arch/microblaze/kernel/signal.c | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/microblaze/include/asm/thread_info.h b/arch/microblaze/include/asm/thread_info.h index ad8e8fcb90d3..44f5ca331862 100644 --- a/arch/microblaze/include/asm/thread_info.h +++ b/arch/microblaze/include/asm/thread_info.h @@ -107,6 +107,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ /* restore singlestep on return to user mode */ #define TIF_SINGLESTEP 4 +#define TIF_NOTIFY_SIGNAL 5 /* signal notifications exist */ #define TIF_MEMDIE 6 /* is terminating due to OOM killer */ #define TIF_SYSCALL_AUDIT 9 /* syscall auditing active */ #define TIF_SECCOMP 10 /* secure computing */ @@ -119,6 +120,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) +#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) diff --git a/arch/microblaze/kernel/signal.c b/arch/microblaze/kernel/signal.c index f11a0ccccabc..5a8d173d7b75 100644 --- a/arch/microblaze/kernel/signal.c +++ b/arch/microblaze/kernel/signal.c @@ -313,7 +313,8 @@ static void do_signal(struct pt_regs *regs, int in_syscall) asmlinkage void do_notify_resume(struct pt_regs *regs, int in_syscall) { - if (test_thread_flag(TIF_SIGPENDING)) + if (test_thread_flag(TIF_SIGPENDING) || + test_thread_flag(TIF_NOTIFY_SIGNAL)) do_signal(regs, in_syscall); if (test_thread_flag(TIF_NOTIFY_RESUME)) From 29420dc96b8244e6870618bccbfb738078c409ad Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 9 Oct 2020 16:00:49 -0600 Subject: [PATCH 1404/1477] UPSTREAM: arm: add support for TIF_NOTIFY_SIGNAL [ Upstream commit 32d59773da38cd83e497a70eb9754d4bbae3aeae ] Wire up TIF_NOTIFY_SIGNAL handling for arm. Cc: linux-arm-kernel@lists.infradead.org Acked-by: Russell King Change-Id: Ic1385829b4c4b28cb4b1f82595fd0b2acb9ec9eb Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 1bee9dbbcabbb77617fb257f964628b50ba2529c) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/thread_info.h | 7 ++++++- arch/arm/kernel/entry-common.S | 6 +++--- arch/arm/kernel/entry-v7m.S | 2 +- arch/arm/kernel/signal.c | 2 +- 4 files changed, 11 insertions(+), 6 deletions(-) diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index 536b6b979f63..eb7ce2747eb0 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -126,6 +126,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp *, * thread information flags: * TIF_USEDFPU - FPU was used by this task this quantum (SMP) * TIF_POLLING_NRFLAG - true if poll_idle() is polling TIF_NEED_RESCHED + * + * Any bit in the range of 0..15 will cause do_work_pending() to be invoked. */ #define TIF_SIGPENDING 0 /* signal pending */ #define TIF_NEED_RESCHED 1 /* rescheduling necessary */ @@ -135,6 +137,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *, #define TIF_SYSCALL_AUDIT 5 /* syscall auditing active */ #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ #define TIF_SECCOMP 7 /* seccomp syscall filtering active */ +#define TIF_NOTIFY_SIGNAL 8 /* signal notifications exist */ #define TIF_USING_IWMMXT 17 #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ @@ -148,6 +151,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *, #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) +#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) #define _TIF_USING_IWMMXT (1 << TIF_USING_IWMMXT) /* Checks for any syscall work in entry-common.S */ @@ -158,7 +162,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp *, * Change these and you break ASM code in entry-common.S */ #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ - _TIF_NOTIFY_RESUME | _TIF_UPROBE) + _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ + _TIF_NOTIFY_SIGNAL) #endif /* __KERNEL__ */ #endif /* __ASM_ARM_THREAD_INFO_H */ diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index bd619da73c84..9b3c737575e9 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -53,7 +53,7 @@ __ret_fast_syscall: cmp r2, #TASK_SIZE blne addr_limit_check_failed ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing - tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK + movs r1, r1, lsl #16 bne fast_work_pending @@ -90,7 +90,7 @@ __ret_fast_syscall: cmp r2, #TASK_SIZE blne addr_limit_check_failed ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing - tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK + movs r1, r1, lsl #16 beq no_work_pending UNWIND(.fnend ) ENDPROC(ret_fast_syscall) @@ -131,7 +131,7 @@ ENTRY(ret_to_user_from_irq) cmp r2, #TASK_SIZE blne addr_limit_check_failed ldr r1, [tsk, #TI_FLAGS] - tst r1, #_TIF_WORK_MASK + movs r1, r1, lsl #16 bne slow_work_pending no_work_pending: asm_trace_hardirqs_on save = 0 diff --git a/arch/arm/kernel/entry-v7m.S b/arch/arm/kernel/entry-v7m.S index de1f20624be1..d0e898608d30 100644 --- a/arch/arm/kernel/entry-v7m.S +++ b/arch/arm/kernel/entry-v7m.S @@ -59,7 +59,7 @@ __irq_entry: get_thread_info tsk ldr r2, [tsk, #TI_FLAGS] - tst r2, #_TIF_WORK_MASK + movs r2, r2, lsl #16 beq 2f @ no work pending mov r0, #V7M_SCB_ICSR_PENDSVSET str r0, [r1, V7M_SCB_ICSR] @ raise PendSV diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index 2f81d3af5f9a..a3a38d0a4c85 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -655,7 +655,7 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall) if (unlikely(!user_mode(regs))) return 0; local_irq_enable(); - if (thread_flags & _TIF_SIGPENDING) { + if (thread_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) { int restart = do_signal(regs, syscall); if (unlikely(restart)) { /* From 01af0730c9c6f308ad39e317dad12a9196d79758 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 9 Oct 2020 15:56:07 -0600 Subject: [PATCH 1405/1477] UPSTREAM: xtensa: add support for TIF_NOTIFY_SIGNAL [ Upstream commit bec58f40d6c5372d812c93cc3947f3bc97440e57 ] Wire up TIF_NOTIFY_SIGNAL handling for xtensa. Thanks to Max Filippov for making the asm correct. Cc: linux-xtensa@linux-xtensa.org Change-Id: I246a2ab88d6fe797d5e067d3b6515cf787ba19cf Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit bf0b619593cd5560c55b10dbf290e6e148d1596c) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/include/asm/thread_info.h | 5 ++++- arch/xtensa/kernel/entry.S | 4 ++-- arch/xtensa/kernel/signal.c | 3 ++- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/xtensa/include/asm/thread_info.h b/arch/xtensa/include/asm/thread_info.h index 6acbbe0d87d3..a312333a9add 100644 --- a/arch/xtensa/include/asm/thread_info.h +++ b/arch/xtensa/include/asm/thread_info.h @@ -111,18 +111,21 @@ static inline struct thread_info *current_thread_info(void) #define TIF_NEED_RESCHED 2 /* rescheduling necessary */ #define TIF_SINGLESTEP 3 /* restore singlestep on return to user mode */ #define TIF_SYSCALL_TRACEPOINT 4 /* syscall tracepoint instrumentation */ -#define TIF_MEMDIE 5 /* is terminating due to OOM killer */ +#define TIF_NOTIFY_SIGNAL 5 /* signal notifications exist */ #define TIF_RESTORE_SIGMASK 6 /* restore signal mask in do_signal() */ #define TIF_NOTIFY_RESUME 7 /* callback before returning to user */ #define TIF_DB_DISABLED 8 /* debug trap disabled for syscall */ #define TIF_SYSCALL_AUDIT 9 /* syscall auditing active */ #define TIF_SECCOMP 10 /* secure computing */ +#define TIF_MEMDIE 11 /* is terminating due to OOM killer */ #define _TIF_SYSCALL_TRACE (1< Date: Thu, 8 Oct 2020 09:11:42 -0600 Subject: [PATCH 1406/1477] UPSTREAM: alpha: add support for TIF_NOTIFY_SIGNAL [ Upstream commit 5a9a8897c253a075805401d38d987ec1ac1824b6 ] Wire up TIF_NOTIFY_SIGNAL handling for alpha. Cc: linux-alpha@vger.kernel.org Change-Id: I44b3be73e9a332bfc92b8f5c34426770c9642dae Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 30b78a17ac8f21751426edbe72fabfc1d825e9a0) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- arch/alpha/include/asm/thread_info.h | 2 ++ arch/alpha/kernel/entry.S | 2 +- arch/alpha/kernel/signal.c | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/alpha/include/asm/thread_info.h b/arch/alpha/include/asm/thread_info.h index 807d7b9a1860..2592356e3215 100644 --- a/arch/alpha/include/asm/thread_info.h +++ b/arch/alpha/include/asm/thread_info.h @@ -62,6 +62,7 @@ register struct thread_info *__current_thread_info __asm__("$8"); #define TIF_SIGPENDING 2 /* signal pending */ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ #define TIF_SYSCALL_AUDIT 4 /* syscall audit active */ +#define TIF_NOTIFY_SIGNAL 5 /* signal notifications exist */ #define TIF_DIE_IF_KERNEL 9 /* dik recursion lock */ #define TIF_MEMDIE 13 /* is terminating due to OOM killer */ #define TIF_POLLING_NRFLAG 14 /* idle is polling for TIF_NEED_RESCHED */ @@ -71,6 +72,7 @@ register struct thread_info *__current_thread_info __asm__("$8"); #define _TIF_NEED_RESCHED (1< Date: Fri, 9 Oct 2020 14:35:34 -0600 Subject: [PATCH 1407/1477] UPSTREAM: c6x: add support for TIF_NOTIFY_SIGNAL [ Upstream commit 6d665a4d8b4264def0fbb72da3a500d9904ffe3e ] Wire up TIF_NOTIFY_SIGNAL handling for c6x. Cc: linux-c6x-dev@linux-c6x.org Change-Id: I0e38ee478eff25064bb03277a29ba273b0656b28 Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit c82617d9decc3c5af2ab2c66055701f7fbd944f6) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- arch/c6x/include/asm/thread_info.h | 1 + arch/c6x/kernel/asm-offsets.c | 1 + arch/c6x/kernel/signal.c | 3 ++- 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/c6x/include/asm/thread_info.h b/arch/c6x/include/asm/thread_info.h index f70382844b96..dd8913d57189 100644 --- a/arch/c6x/include/asm/thread_info.h +++ b/arch/c6x/include/asm/thread_info.h @@ -82,6 +82,7 @@ struct thread_info *current_thread_info(void) #define TIF_SIGPENDING 2 /* signal pending */ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ #define TIF_RESTORE_SIGMASK 4 /* restore signal mask in do_signal() */ +#define TIF_NOTIFY_SIGNAL 5 /* signal notifications exist */ #define TIF_MEMDIE 17 /* OOM killer killed process */ diff --git a/arch/c6x/kernel/asm-offsets.c b/arch/c6x/kernel/asm-offsets.c index 0f8fde494875..4a264ef87dcb 100644 --- a/arch/c6x/kernel/asm-offsets.c +++ b/arch/c6x/kernel/asm-offsets.c @@ -116,6 +116,7 @@ void foo(void) DEFINE(_TIF_NOTIFY_RESUME, (1< #include +#include #include #include @@ -313,7 +314,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, u32 thread_info_flags, int syscall) { /* deal with pending signal delivery */ - if (thread_info_flags & (1 << TIF_SIGPENDING)) + if (thread_info_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) do_signal(regs, syscall); if (thread_info_flags & (1 << TIF_NOTIFY_RESUME)) From 1dcd12493bf34eee5ebe6233bc5c12ee59397068 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 9 Oct 2020 14:42:33 -0600 Subject: [PATCH 1408/1477] UPSTREAM: h8300: add support for TIF_NOTIFY_SIGNAL [ Upstream commit 2f9799ad0111ee742ccc02dd2ea2c87646746fc1 ] Wire up TIF_NOTIFY_SIGNAL handling for h8300. Cc: uclinux-h8-devel@lists.sourceforge.jp Change-Id: Ibd30f09fc25229b5a410ec37485aac0a188ff717 Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 48e9e35d33d66519d12deea401487ea705deb596) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- arch/h8300/include/asm/thread_info.h | 4 +++- arch/h8300/kernel/signal.c | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/h8300/include/asm/thread_info.h b/arch/h8300/include/asm/thread_info.h index 0cdaa302d3d2..a518214d4ddd 100644 --- a/arch/h8300/include/asm/thread_info.h +++ b/arch/h8300/include/asm/thread_info.h @@ -73,6 +73,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SYSCALL_TRACEPOINT 8 /* for ftrace syscall instrumentation */ #define TIF_POLLING_NRFLAG 9 /* true if poll_idle() is polling TIF_NEED_RESCHED */ +#define TIF_NOTIFY_SIGNAL 10 /* signal notifications exist */ /* as above, but as bit values */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) @@ -83,6 +84,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) +#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) /* work to do in syscall trace */ #define _TIF_WORK_SYSCALL_MASK (_TIF_SYSCALL_TRACE | _TIF_SINGLESTEP | \ @@ -92,7 +94,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_ALLWORK_MASK (_TIF_SYSCALL_TRACE | _TIF_SIGPENDING | \ _TIF_NEED_RESCHED | _TIF_SYSCALL_AUDIT | \ _TIF_SINGLESTEP | _TIF_NOTIFY_RESUME | \ - _TIF_SYSCALL_TRACEPOINT) + _TIF_SYSCALL_TRACEPOINT | _TIF_NOTIFY_SIGNAL) /* work to do on interrupt/exception return */ #define _TIF_WORK_MASK (_TIF_ALLWORK_MASK & ~(_TIF_SYSCALL_TRACE | \ diff --git a/arch/h8300/kernel/signal.c b/arch/h8300/kernel/signal.c index 75d9b7e626b2..75a1c36b105a 100644 --- a/arch/h8300/kernel/signal.c +++ b/arch/h8300/kernel/signal.c @@ -279,7 +279,7 @@ static void do_signal(struct pt_regs *regs) asmlinkage void do_notify_resume(struct pt_regs *regs, u32 thread_info_flags) { - if (thread_info_flags & _TIF_SIGPENDING) + if (thread_info_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) do_signal(regs); if (thread_info_flags & _TIF_NOTIFY_RESUME) From 6eaa6653e433069fad85c7871a5a963c39e5c5df Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 9 Oct 2020 14:49:43 -0600 Subject: [PATCH 1409/1477] UPSTREAM: ia64: add support for TIF_NOTIFY_SIGNAL [ Upstream commit b269c229b0e89aedb7943c06673b56b6052cf5e5 ] Wire up TIF_NOTIFY_SIGNAL handling for ia64. Cc: linux-ia64@vger.kernel.org [axboe: added fixes from Mike Rapoport ] Change-Id: Ib1e6bb561f1a6ddf9be818d35e295b53f0d1f1fd Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 751fedb9ba5d4af62196d9b3014d4e62945c6e3f) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- arch/ia64/include/asm/thread_info.h | 4 +++- arch/ia64/kernel/process.c | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h index 64a1011f6812..51d20cb37706 100644 --- a/arch/ia64/include/asm/thread_info.h +++ b/arch/ia64/include/asm/thread_info.h @@ -103,6 +103,7 @@ struct thread_info { #define TIF_SYSCALL_TRACE 2 /* syscall trace active */ #define TIF_SYSCALL_AUDIT 3 /* syscall auditing active */ #define TIF_SINGLESTEP 4 /* restore singlestep on return to user mode */ +#define TIF_NOTIFY_SIGNAL 5 /* signal notification exist */ #define TIF_NOTIFY_RESUME 6 /* resumption notification requested */ #define TIF_MEMDIE 17 /* is terminating due to OOM killer */ #define TIF_MCA_INIT 18 /* this task is processing MCA or INIT */ @@ -115,6 +116,7 @@ struct thread_info { #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) #define _TIF_SYSCALL_TRACEAUDIT (_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) +#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_MCA_INIT (1 << TIF_MCA_INIT) @@ -124,7 +126,7 @@ struct thread_info { /* "work to do on user-return" bits */ #define TIF_ALLWORK_MASK (_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SYSCALL_AUDIT|\ - _TIF_NEED_RESCHED|_TIF_SYSCALL_TRACE) + _TIF_NEED_RESCHED|_TIF_SYSCALL_TRACE|_TIF_NOTIFY_SIGNAL) /* like TIF_ALLWORK_BITS but sans TIF_SYSCALL_TRACE or TIF_SYSCALL_AUDIT */ #define TIF_WORK_MASK (TIF_ALLWORK_MASK&~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT)) diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index c9ff8796b509..70649a5b3e88 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -171,7 +171,8 @@ do_notify_resume_user(sigset_t *unused, struct sigscratch *scr, long in_syscall) } /* deal with pending signal delivery */ - if (test_thread_flag(TIF_SIGPENDING)) { + if (test_thread_flag(TIF_SIGPENDING) || + test_thread_flag(TIF_NOTIFY_SIGNAL)) { local_irq_enable(); /* force interrupt enable */ ia64_do_signal(scr, in_syscall); } From 52a756bf17c91e6e966fba6e1631f859d60c2215 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 9 Oct 2020 15:18:43 -0600 Subject: [PATCH 1410/1477] UPSTREAM: nds32: add support for TIF_NOTIFY_SIGNAL [ Upstream commit b13e8bf615fe26fb6a6dfe1b5a1c65e1624dfee2 ] Wire up TIF_NOTIFY_SIGNAL handling for nds32. Cc: Nick Hu Cc: Greentime Hu Cc: Vincent Chen Change-Id: Ic184206ffc5c6981fb590eb0103bc2f623f5402d Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 57e833a0a03de1b1d1cfe0db2f9620b50df47502) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- arch/nds32/include/asm/thread_info.h | 2 ++ arch/nds32/kernel/ex-exit.S | 2 +- arch/nds32/kernel/signal.c | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/nds32/include/asm/thread_info.h b/arch/nds32/include/asm/thread_info.h index c135111ec44e..d3967ad184f0 100644 --- a/arch/nds32/include/asm/thread_info.h +++ b/arch/nds32/include/asm/thread_info.h @@ -48,6 +48,7 @@ struct thread_info { #define TIF_NEED_RESCHED 2 #define TIF_SINGLESTEP 3 #define TIF_NOTIFY_RESUME 4 /* callback before returning to user */ +#define TIF_NOTIFY_SIGNAL 5 /* signal notifications exist */ #define TIF_SYSCALL_TRACE 8 #define TIF_POLLING_NRFLAG 17 #define TIF_MEMDIE 18 @@ -57,6 +58,7 @@ struct thread_info { #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) +#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) diff --git a/arch/nds32/kernel/ex-exit.S b/arch/nds32/kernel/ex-exit.S index 6a2966c2d8c8..b30699911b81 100644 --- a/arch/nds32/kernel/ex-exit.S +++ b/arch/nds32/kernel/ex-exit.S @@ -120,7 +120,7 @@ work_pending: andi $p1, $r1, #_TIF_NEED_RESCHED bnez $p1, work_resched - andi $p1, $r1, #_TIF_SIGPENDING|#_TIF_NOTIFY_RESUME + andi $p1, $r1, #_TIF_SIGPENDING|#_TIF_NOTIFY_RESUME|#_TIF_NOTIFY_SIGNAL beqz $p1, no_work_pending move $r0, $sp ! 'regs' diff --git a/arch/nds32/kernel/signal.c b/arch/nds32/kernel/signal.c index 2acb94812af9..7e3ca430a223 100644 --- a/arch/nds32/kernel/signal.c +++ b/arch/nds32/kernel/signal.c @@ -376,7 +376,7 @@ static void do_signal(struct pt_regs *regs) asmlinkage void do_notify_resume(struct pt_regs *regs, unsigned int thread_flags) { - if (thread_flags & _TIF_SIGPENDING) + if (thread_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) do_signal(regs); if (thread_flags & _TIF_NOTIFY_RESUME) From c9c70c8cb62003388327108abd4ed0261aa75673 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 9 Oct 2020 14:29:17 -0600 Subject: [PATCH 1411/1477] UPSTREAM: riscv: add support for TIF_NOTIFY_SIGNAL [ Upstream commit 24a31b81e38309b1604f24520110aae1f83f3cbf ] Wire up TIF_NOTIFY_SIGNAL handling for riscv. Cc: linux-riscv@lists.infradead.org Change-Id: Ia1d9bc9779bf2935dc5a7c83e86261123d4d6b0f Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 78a53ff02656da3c1e6a3e11e30ab23cf4bcb0f7) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- arch/riscv/include/asm/thread_info.h | 5 ++++- arch/riscv/kernel/signal.c | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h index d79ae9d98999..c55e0a1f07a0 100644 --- a/arch/riscv/include/asm/thread_info.h +++ b/arch/riscv/include/asm/thread_info.h @@ -80,6 +80,7 @@ struct thread_info { #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing */ #define TIF_SECCOMP 8 /* syscall secure computing */ +#define TIF_NOTIFY_SIGNAL 9 /* signal notifications exist */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) @@ -88,9 +89,11 @@ struct thread_info { #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) +#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) #define _TIF_WORK_MASK \ - (_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NEED_RESCHED) + (_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NEED_RESCHED | \ + _TIF_NOTIFY_SIGNAL) #define _TIF_SYSCALL_WORK \ (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT | \ diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index 529c123cf0a4..50a8225c58bc 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -312,7 +312,7 @@ asmlinkage __visible void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) { /* Handle pending signal delivery */ - if (thread_info_flags & _TIF_SIGPENDING) + if (thread_info_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) do_signal(regs); if (thread_info_flags & _TIF_NOTIFY_RESUME) From 7140fddd84ff74583af0582b1c110ca734adcbac Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 9 Oct 2020 15:44:37 -0600 Subject: [PATCH 1412/1477] UPSTREAM: sparc: add support for TIF_NOTIFY_SIGNAL [ Upstream commit f50a7052f5e70ee7a6a5e2ed08660994dc3df2a5 ] Wire up TIF_NOTIFY_SIGNAL handling for sparc. Cc: sparclinux@vger.kernel.org Change-Id: I6dc91c146efed1287ead67fa44db9185576cbce8 Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit e1402ba4df2025d66b81a8c215e452b95d1796d8) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- arch/sparc/include/asm/thread_info_32.h | 4 +++- arch/sparc/include/asm/thread_info_64.h | 6 ++++-- arch/sparc/kernel/signal_32.c | 2 +- arch/sparc/kernel/signal_64.c | 2 +- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/arch/sparc/include/asm/thread_info_32.h b/arch/sparc/include/asm/thread_info_32.h index 548b366165dd..45b4955b253f 100644 --- a/arch/sparc/include/asm/thread_info_32.h +++ b/arch/sparc/include/asm/thread_info_32.h @@ -104,6 +104,7 @@ register struct thread_info *current_thread_info_reg asm("g6"); #define TIF_SIGPENDING 2 /* signal pending */ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ #define TIF_RESTORE_SIGMASK 4 /* restore signal mask in do_signal() */ +#define TIF_NOTIFY_SIGNAL 5 /* signal notifications exist */ #define TIF_USEDFPU 8 /* FPU was used by this task * this quantum (SMP) */ #define TIF_POLLING_NRFLAG 9 /* true if poll_idle() is polling @@ -115,11 +116,12 @@ register struct thread_info *current_thread_info_reg asm("g6"); #define _TIF_NOTIFY_RESUME (1< Date: Tue, 2 Mar 2021 17:22:11 -0700 Subject: [PATCH 1413/1477] UPSTREAM: ia64: don't call handle_signal() unless there's actually a signal queued [ Upstream commit f5f4fc4649ae542b1a25670b17aaf3cbb6187acc ] Sergei and John both reported that ia64 failed to boot in 5.11, and it was related to signals. Turns out the ia64 signal handling is a bit odd, it doesn't check the return value of get_signal() for whether there's a signal to deliver or not. With the introduction of TIF_NOTIFY_SIGNAL, then task_work could trigger it. Fix it by only calling handle_signal() if we actually have a real signal to deliver. This brings it in line with all other archs, too. Fixes: b269c229b0e8 ("ia64: add support for TIF_NOTIFY_SIGNAL") Reported-by: Sergei Trofimovich Reported-by: John Paul Adrian Glaubitz Tested-by: Sergei Trofimovich Change-Id: Id225cfc12645aa47e37c3107670ba440116c9b04 Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit a1240cc413ebb3ec15ee156d3f18f2d5bf1dfe1f) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- arch/ia64/kernel/signal.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c index e67b22fc3c60..c1b299760bf7 100644 --- a/arch/ia64/kernel/signal.c +++ b/arch/ia64/kernel/signal.c @@ -341,7 +341,8 @@ ia64_do_signal (struct sigscratch *scr, long in_syscall) * need to push through a forced SIGSEGV. */ while (1) { - get_signal(&ksig); + if (!get_signal(&ksig)) + break; /* * get_signal() may have run a debugger (via notify_parent()) From ad4ba3038aa424af79a640172bae309015292041 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Wed, 6 Jan 2021 12:34:36 -0800 Subject: [PATCH 1414/1477] UPSTREAM: ARC: unbork 5.11 bootup: fix snafu in _TIF_NOTIFY_SIGNAL handling [ Upstream commit bb12433bf56e76789c6b08b36c546f745a6aa6e1 ] Linux 5.11.rcX was failing to boot on ARC HSDK board. Turns out we have a couple of issues, this being the first one, and I'm to blame as I didn't pay attention during review. TIF_NOTIFY_SIGNAL support requires checking multiple TIF_* bits in kernel return code path. Old code only needed to check a single bit so BBIT0 worked. New code needs to check multiple bits so AND instruction. So needs to use bit mask variant _TIF_SIGPENDING Cc: Jens Axboe Fixes: 53855e12588743ea128 ("arc: add support for TIF_NOTIFY_SIGNAL") Link: https://github.com/foss-for-synopsys-dwc-arc-processors/linux/issues/34 Change-Id: I00aa9a6c3118f72e90575fb7ca4ebc08300b542e Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman (cherry picked from commit db911277a2b38b4ef98c7192728d5f4cba7863fe) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- arch/arc/kernel/entry.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S index 676381f694d3..6ee9cb559808 100644 --- a/arch/arc/kernel/entry.S +++ b/arch/arc/kernel/entry.S @@ -308,7 +308,7 @@ resume_user_mode_begin: mov r0, sp ; pt_regs for arg to do_signal()/do_notify_resume() GET_CURR_THR_INFO_FLAGS r9 - and.f 0, r9, TIF_SIGPENDING|TIF_NOTIFY_SIGNAL + and.f 0, r9, _TIF_SIGPENDING|_TIF_NOTIFY_SIGNAL bz .Lchk_notify_resume ; Normal Trap/IRQ entry only saves Scratch (caller-saved) regs From 198756681580197f7a3d9028dbba1518c69d1fd3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 18 Sep 2021 18:08:56 -0400 Subject: [PATCH 1415/1477] UPSTREAM: alpha: fix TIF_NOTIFY_SIGNAL handling [ Upstream commit e2c7554cc6d85f95e3c6635f270ec839ab9fe05e ] it needs to be added to _TIF_WORK_MASK, or we might not reach do_work_pending() in the first place... Fixes: 5a9a8897c253a "alpha: add support for TIF_NOTIFY_SIGNAL" Change-Id: I9f19ad6bc9833ce0cc103163af7904862425badb Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 6e2bce21acb4f0c35521e68d74e4317fd6afd97d) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- arch/alpha/include/asm/thread_info.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/alpha/include/asm/thread_info.h b/arch/alpha/include/asm/thread_info.h index 2592356e3215..0ce1eee0924b 100644 --- a/arch/alpha/include/asm/thread_info.h +++ b/arch/alpha/include/asm/thread_info.h @@ -77,7 +77,7 @@ register struct thread_info *__current_thread_info __asm__("$8"); /* Work to do on interrupt/exception return. */ #define _TIF_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \ - _TIF_NOTIFY_RESUME) + _TIF_NOTIFY_RESUME | _TIF_NOTIFY_SIGNAL) /* Work to do on any return to userspace. */ #define _TIF_ALLWORK_MASK (_TIF_WORK_MASK \ From 8492c5dd3bec227cf0a27d71381999ba87e95a80 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 9 Oct 2020 16:01:33 -0600 Subject: [PATCH 1416/1477] UPSTREAM: task_work: remove legacy TWA_SIGNAL path [ Upstream commit 03941ccfda161c2680147fa5ab92aead2a79cac1 ] All archs now support TIF_NOTIFY_SIGNAL. Change-Id: I3fc938645ac7be18300432713909ccfaa7cd2711 Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 61bdeb142e8f7550826373295cb35b3571d70f62) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- kernel/task_work.c | 30 +----------------------------- 1 file changed, 1 insertion(+), 29 deletions(-) diff --git a/kernel/task_work.c b/kernel/task_work.c index 2c47ce899ee8..3d4852891fa8 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -5,34 +5,6 @@ static struct callback_head work_exited; /* all we need is ->next == NULL */ -/* - * TWA_SIGNAL signaling - use TIF_NOTIFY_SIGNAL, if available, as it's faster - * than TIF_SIGPENDING as there's no dependency on ->sighand. The latter is - * shared for threads, and can cause contention on sighand->lock. Even for - * the non-threaded case TIF_NOTIFY_SIGNAL is more efficient, as no locking - * or IRQ disabling is involved for notification (or running) purposes. - */ -static void task_work_notify_signal(struct task_struct *task) -{ -#if defined(TIF_NOTIFY_SIGNAL) - set_notify_signal(task); -#else - unsigned long flags; - - /* - * Only grab the sighand lock if we don't already have some - * task_work pending. This pairs with the smp_store_mb() - * in get_signal(), see comment there. - */ - if (!(READ_ONCE(task->jobctl) & JOBCTL_TASK_WORK) && - lock_task_sighand(task, &flags)) { - task->jobctl |= JOBCTL_TASK_WORK; - signal_wake_up(task, 0); - unlock_task_sighand(task, &flags); - } -#endif -} - /** * task_work_add - ask the @task to execute @work->func() * @task: the task which should run the callback @@ -79,7 +51,7 @@ int task_work_add(struct task_struct *task, struct callback_head *work, set_notify_resume(task); break; case TWA_SIGNAL: - task_work_notify_signal(task); + set_notify_signal(task); break; default: WARN_ON_ONCE(1); From 723de95c0cd013296b29318e7f677e50c94880a5 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 9 Oct 2020 16:04:39 -0600 Subject: [PATCH 1417/1477] UPSTREAM: kernel: remove checking for TIF_NOTIFY_SIGNAL [ Upstream commit e296dc4996b8094ccde45d19090d804c4103513e ] It's available everywhere now, no need to check or add dummy defines. Change-Id: I2e0950c13a90b463b848bb6bc095db02ae08a8cd Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 90a2c3821bbfe8435bde901953871576a1bf8c6d) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- include/linux/entry-common.h | 4 ---- include/linux/sched/signal.h | 2 -- include/linux/tracehook.h | 4 ---- kernel/signal.c | 2 -- 4 files changed, 12 deletions(-) diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index 1a128baf3628..46c42479f950 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -37,10 +37,6 @@ # define _TIF_UPROBE (0) #endif -#ifndef _TIF_NOTIFY_SIGNAL -# define _TIF_NOTIFY_SIGNAL (0) -#endif - /* * TIF flags handled in syscall_enter_from_user_mode() */ diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 0ca6f248dc4f..fb0b3899a86f 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -367,7 +367,6 @@ static inline int task_sigpending(struct task_struct *p) static inline int signal_pending(struct task_struct *p) { -#if defined(TIF_NOTIFY_SIGNAL) /* * TIF_NOTIFY_SIGNAL isn't really a signal, but it requires the same * behavior in terms of ensuring that we break out of wait loops @@ -375,7 +374,6 @@ static inline int signal_pending(struct task_struct *p) */ if (unlikely(test_tsk_thread_flag(p, TIF_NOTIFY_SIGNAL))) return 1; -#endif return task_sigpending(p); } diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index f7d82e4fafd6..ee9ab7dbc8c3 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -205,12 +205,10 @@ static inline void tracehook_notify_resume(struct pt_regs *regs) */ static inline void tracehook_notify_signal(void) { -#if defined(TIF_NOTIFY_SIGNAL) clear_thread_flag(TIF_NOTIFY_SIGNAL); smp_mb__after_atomic(); if (current->task_works) task_work_run(); -#endif } /* @@ -218,11 +216,9 @@ static inline void tracehook_notify_signal(void) */ static inline void set_notify_signal(struct task_struct *task) { -#if defined(TIF_NOTIFY_SIGNAL) if (!test_and_set_tsk_thread_flag(task, TIF_NOTIFY_SIGNAL) && !wake_up_state(task, TASK_INTERRUPTIBLE)) kick_process(task); -#endif } #endif /* */ diff --git a/kernel/signal.c b/kernel/signal.c index 6eb4b9da515d..bf54af58427e 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2557,14 +2557,12 @@ bool get_signal(struct ksignal *ksig) * that the arch handlers don't all have to do it. If we get here * without TIF_SIGPENDING, just exit after running signal work. */ -#ifdef TIF_NOTIFY_SIGNAL if (!IS_ENABLED(CONFIG_GENERIC_ENTRY)) { if (test_thread_flag(TIF_NOTIFY_SIGNAL)) tracehook_notify_signal(); if (!task_sigpending(current)) return false; } -#endif if (unlikely(uprobe_deny_signal())) return false; From b25b8c55ba1747463638adddf9f3575b6b37f62a Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 10 Jun 2021 15:11:11 -0500 Subject: [PATCH 1418/1477] UPSTREAM: coredump: Limit what can interrupt coredumps [ Upstream commit 06af8679449d4ed282df13191fc52d5ba28ec536 ] Olivier Langlois has been struggling with coredumps being incompletely written in processes using io_uring. Olivier Langlois writes: > io_uring is a big user of task_work and any event that io_uring made a > task waiting for that occurs during the core dump generation will > generate a TIF_NOTIFY_SIGNAL. > > Here are the detailed steps of the problem: > 1. io_uring calls vfs_poll() to install a task to a file wait queue > with io_async_wake() as the wakeup function cb from io_arm_poll_handler() > 2. wakeup function ends up calling task_work_add() with TWA_SIGNAL > 3. task_work_add() sets the TIF_NOTIFY_SIGNAL bit by calling > set_notify_signal() The coredump code deliberately supports being interrupted by SIGKILL, and depends upon prepare_signal to filter out all other signals. Now that signal_pending includes wake ups for TIF_NOTIFY_SIGNAL this hack in dump_emitted by the coredump code no longer works. Make the coredump code more robust by explicitly testing for all of the wakeup conditions the coredump code supports. This prevents new wakeup conditions from breaking the coredump code, as well as fixing the current issue. The filesystem code that the coredump code uses already limits itself to only aborting on fatal_signal_pending. So it should not develop surprising wake-up reasons either. v2: Don't remove the now unnecessary code in prepare_signal. Cc: stable@vger.kernel.org Fixes: 12db8b690010 ("entry: Add support for TIF_NOTIFY_SIGNAL") Reported-by: Olivier Langlois Change-Id: I84870bf0a620a97af50d9b495dd225f9ee2b66b8 Signed-off-by: "Eric W. Biederman" Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 4b4d2c79921a8327e9e853dc93c06b27edb3a859) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- fs/coredump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/coredump.c b/fs/coredump.c index b1b74dbfa22a..7c5edadf5208 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -520,7 +520,7 @@ static bool dump_interrupted(void) * but then we need to teach dump_write() to restart and clear * TIF_SIGPENDING. */ - return signal_pending(current); + return fatal_signal_pending(current) || freezing(current); } static void wait_for_dump_helpers(struct file *file) From 6e4362caf9f2ecad6ba38d8dc2753c5a6e5f6141 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 22 Mar 2021 09:39:12 -0600 Subject: [PATCH 1419/1477] UPSTREAM: kernel: allow fork with TIF_NOTIFY_SIGNAL pending [ Upstream commit 66ae0d1e2d9fe6ec70e73fcfdcf4b390e271c1ac ] fork() fails if signal_pending() is true, but there are two conditions that can lead to that: 1) An actual signal is pending. We want fork to fail for that one, like we always have. 2) TIF_NOTIFY_SIGNAL is pending, because the task has pending task_work. We don't need to make it fail for that case. Allow fork() to proceed if just task_work is pending, by changing the signal_pending() check to task_sigpending(). Change-Id: Iec007746b42f5d62581a8b5f6cca4006e707b8e3 Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 0f735cf52bd0b2aaca865e3d2e3dc276479e41ba) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- kernel/fork.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/fork.c b/kernel/fork.c index 563798b75acd..11445f72c1c0 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1988,7 +1988,7 @@ static __latent_entropy struct task_struct *copy_process( recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); retval = -ERESTARTNOINTR; - if (signal_pending(current)) + if (task_sigpending(current)) goto fork_out; retval = -ENOMEM; From cf487d3c6af594a11b4968ad973b5a39b3181aac Mon Sep 17 00:00:00 2001 From: Seth Forshee Date: Wed, 4 May 2022 13:08:40 -0500 Subject: [PATCH 1420/1477] UPSTREAM: entry/kvm: Exit to user mode when TIF_NOTIFY_SIGNAL is set [ Upstream commit 3e684903a8574ffc9475fdf13c4780a7adb506ad ] A livepatch transition may stall indefinitely when a kvm vCPU is heavily loaded. To the host, the vCPU task is a user thread which is spending a very long time in the ioctl(KVM_RUN) syscall. During livepatch transition, set_notify_signal() will be called on such tasks to interrupt the syscall so that the task can be transitioned. This interrupts guest execution, but when xfer_to_guest_mode_work() sees that TIF_NOTIFY_SIGNAL is set but not TIF_SIGPENDING it concludes that an exit to user mode is unnecessary, and guest execution is resumed without transitioning the task for the livepatch. This handling of TIF_NOTIFY_SIGNAL is incorrect, as set_notify_signal() is expected to break tasks out of interruptible kernel loops and cause them to return to userspace. Change xfer_to_guest_mode_work() to handle TIF_NOTIFY_SIGNAL the same as TIF_SIGPENDING, signaling to the vCPU run loop that an exit to userpsace is needed. Any pending task_work will be run when get_signal() is called from exit_to_user_mode_loop(), so there is no longer any need to run task work from xfer_to_guest_mode_work(). Suggested-by: "Eric W. Biederman" Cc: Petr Mladek Change-Id: If14e86a516403671ccb122cea32cc704f774e8ce Signed-off-by: Seth Forshee Message-Id: <20220504180840.2907296-1-sforshee@digitalocean.com> Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 000de389ad7b8094bcf714ee27e2362eb5054a1a) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- kernel/entry/kvm.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/kernel/entry/kvm.c b/kernel/entry/kvm.c index 049fd06b4c3d..7b946847be78 100644 --- a/kernel/entry/kvm.c +++ b/kernel/entry/kvm.c @@ -8,10 +8,7 @@ static int xfer_to_guest_mode_work(struct kvm_vcpu *vcpu, unsigned long ti_work) do { int ret; - if (ti_work & _TIF_NOTIFY_SIGNAL) - tracehook_notify_signal(); - - if (ti_work & _TIF_SIGPENDING) { + if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) { kvm_handle_signal_exit(vcpu); return -EINTR; } From 150dea15cb17798a8964efbcd90c9ee7e6111441 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 17 Feb 2021 08:48:00 -0700 Subject: [PATCH 1421/1477] UPSTREAM: arch: setup PF_IO_WORKER threads like PF_KTHREAD [ Upstream commit 4727dc20e0422211a0e0c72b1ace4ed6096df8a6 ] PF_IO_WORKER are kernel threads too, but they aren't PF_KTHREAD in the sense that we don't assign ->set_child_tid with our own structure. Just ensure that every arch sets up the PF_IO_WORKER threads like kthreads in the arch implementation of copy_thread(). Change-Id: Iec4a3c42a39f016b323476d7238f3d36aaf0e6cf Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 320c8057eceb18c5d836fcbe0ffb0035fcfe28ff) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- arch/alpha/kernel/process.c | 2 +- arch/arc/kernel/process.c | 2 +- arch/arm/kernel/process.c | 2 +- arch/arm64/kernel/process.c | 2 +- arch/csky/kernel/process.c | 2 +- arch/h8300/kernel/process.c | 2 +- arch/hexagon/kernel/process.c | 2 +- arch/ia64/kernel/process.c | 2 +- arch/m68k/kernel/process.c | 2 +- arch/microblaze/kernel/process.c | 2 +- arch/mips/kernel/process.c | 2 +- arch/nds32/kernel/process.c | 2 +- arch/nios2/kernel/process.c | 2 +- arch/openrisc/kernel/process.c | 2 +- arch/riscv/kernel/process.c | 2 +- arch/s390/kernel/process.c | 2 +- arch/sh/kernel/process_32.c | 2 +- arch/sparc/kernel/process_32.c | 2 +- arch/sparc/kernel/process_64.c | 2 +- arch/um/kernel/process.c | 2 +- arch/x86/kernel/process.c | 2 +- arch/xtensa/kernel/process.c | 2 +- 22 files changed, 22 insertions(+), 22 deletions(-) diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index 4c7b0414a3ff..08335b2294b3 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -249,7 +249,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, childti->pcb.ksp = (unsigned long) childstack; childti->pcb.flags = 1; /* set FEN, clear everything else */ - if (unlikely(p->flags & PF_KTHREAD)) { + if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { /* kernel thread */ memset(childstack, 0, sizeof(struct switch_stack) + sizeof(struct pt_regs)); diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index a85e9c625ab5..8cf2caae93f1 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -191,7 +191,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, childksp[0] = 0; /* fp */ childksp[1] = (unsigned long)ret_from_fork; /* blink */ - if (unlikely(p->flags & PF_KTHREAD)) { + if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { memset(c_regs, 0, sizeof(struct pt_regs)); c_callee->r13 = kthread_arg; diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 9f199b1e8383..2647e48c537e 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -243,7 +243,7 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, thread->cpu_domain = get_domain(); #endif - if (likely(!(p->flags & PF_KTHREAD))) { + if (likely(!(p->flags & (PF_KTHREAD | PF_IO_WORKER)))) { *childregs = *current_pt_regs(); childregs->ARM_r0 = 0; if (stack_start) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index caa63b96d509..e36544e9fc6c 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -394,7 +394,7 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, ptrauth_thread_init_kernel(p); - if (likely(!(p->flags & PF_KTHREAD))) { + if (likely(!(p->flags & (PF_KTHREAD | PF_IO_WORKER)))) { *childregs = *current_pt_regs(); childregs->regs[0] = 0; diff --git a/arch/csky/kernel/process.c b/arch/csky/kernel/process.c index 69af6bc87e64..3d0ca22cd0e2 100644 --- a/arch/csky/kernel/process.c +++ b/arch/csky/kernel/process.c @@ -49,7 +49,7 @@ int copy_thread(unsigned long clone_flags, /* setup thread.sp for switch_to !!! */ p->thread.sp = (unsigned long)childstack; - if (unlikely(p->flags & PF_KTHREAD)) { + if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { memset(childregs, 0, sizeof(struct pt_regs)); childstack->r15 = (unsigned long) ret_from_kernel_thread; childstack->r10 = kthread_arg; diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c index bc1364db58fe..46b1342ce515 100644 --- a/arch/h8300/kernel/process.c +++ b/arch/h8300/kernel/process.c @@ -112,7 +112,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, childregs = (struct pt_regs *) (THREAD_SIZE + task_stack_page(p)) - 1; - if (unlikely(p->flags & PF_KTHREAD)) { + if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { memset(childregs, 0, sizeof(struct pt_regs)); childregs->retpc = (unsigned long) ret_from_kernel_thread; childregs->er4 = topstk; /* arg */ diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c index 6a980cba7b29..c61165c99ae0 100644 --- a/arch/hexagon/kernel/process.c +++ b/arch/hexagon/kernel/process.c @@ -73,7 +73,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg, sizeof(*ss)); ss->lr = (unsigned long)ret_from_fork; p->thread.switch_sp = ss; - if (unlikely(p->flags & PF_KTHREAD)) { + if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { memset(childregs, 0, sizeof(struct pt_regs)); /* r24 <- fn, r25 <- arg */ ss->r24 = usp; diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 70649a5b3e88..8159b7af5509 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -338,7 +338,7 @@ copy_thread(unsigned long clone_flags, unsigned long user_stack_base, ia64_drop_fpu(p); /* don't pick up stale state from a CPU's fph */ - if (unlikely(p->flags & PF_KTHREAD)) { + if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { if (unlikely(!user_stack_base)) { /* fork_idle() called us */ return 0; diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c index 08359a6e058f..da83cc83e791 100644 --- a/arch/m68k/kernel/process.c +++ b/arch/m68k/kernel/process.c @@ -157,7 +157,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg, */ p->thread.fs = get_fs().seg; - if (unlikely(p->flags & PF_KTHREAD)) { + if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { /* kernel thread */ memset(frame, 0, sizeof(struct fork_frame)); frame->regs.sr = PS_S; diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c index f99860771ff4..ee000ae17e39 100644 --- a/arch/microblaze/kernel/process.c +++ b/arch/microblaze/kernel/process.c @@ -59,7 +59,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg, struct pt_regs *childregs = task_pt_regs(p); struct thread_info *ti = task_thread_info(p); - if (unlikely(p->flags & PF_KTHREAD)) { + if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { /* if we're creating a new kernel thread then just zeroing all * the registers. That's OK for a brand new thread.*/ memset(childregs, 0, sizeof(struct pt_regs)); diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 75ebd8d7bd5d..98ecaf6f3edb 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -135,7 +135,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, /* Put the stack after the struct pt_regs. */ childksp = (unsigned long) childregs; p->thread.cp0_status = (read_c0_status() & ~(ST0_CU2|ST0_CU1)) | ST0_KERNEL_CUMASK; - if (unlikely(p->flags & PF_KTHREAD)) { + if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { /* kernel thread */ unsigned long status = p->thread.cp0_status; memset(childregs, 0, sizeof(struct pt_regs)); diff --git a/arch/nds32/kernel/process.c b/arch/nds32/kernel/process.c index e01ad5d17224..c1327e552ec6 100644 --- a/arch/nds32/kernel/process.c +++ b/arch/nds32/kernel/process.c @@ -156,7 +156,7 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, memset(&p->thread.cpu_context, 0, sizeof(struct cpu_context)); - if (unlikely(p->flags & PF_KTHREAD)) { + if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { memset(childregs, 0, sizeof(struct pt_regs)); /* kernel thread fn */ p->thread.cpu_context.r6 = stack_start; diff --git a/arch/nios2/kernel/process.c b/arch/nios2/kernel/process.c index 50b4eb19a6cc..c5f916ca6845 100644 --- a/arch/nios2/kernel/process.c +++ b/arch/nios2/kernel/process.c @@ -109,7 +109,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg, struct switch_stack *childstack = ((struct switch_stack *)childregs) - 1; - if (unlikely(p->flags & PF_KTHREAD)) { + if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { memset(childstack, 0, sizeof(struct switch_stack) + sizeof(struct pt_regs)); diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c index 3c98728cce24..83fba4ee4453 100644 --- a/arch/openrisc/kernel/process.c +++ b/arch/openrisc/kernel/process.c @@ -167,7 +167,7 @@ copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg, sp -= sizeof(struct pt_regs); kregs = (struct pt_regs *)sp; - if (unlikely(p->flags & PF_KTHREAD)) { + if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { memset(kregs, 0, sizeof(struct pt_regs)); kregs->gpr[20] = usp; /* fn, kernel thread */ kregs->gpr[22] = arg; diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 9a8b2e60adcf..7868050ff426 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -114,7 +114,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg, memset(&p->thread.s, 0, sizeof(p->thread.s)); /* p->thread holds context to be restored by __switch_to() */ - if (unlikely(p->flags & PF_KTHREAD)) { + if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { /* Kernel thread */ memset(childregs, 0, sizeof(struct pt_regs)); childregs->gp = gp_in_global; diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 137a170f47d4..bd7da4049707 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -127,7 +127,7 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp, frame->sf.gprs[9] = (unsigned long) frame; /* Store access registers to kernel stack of new process. */ - if (unlikely(p->flags & PF_KTHREAD)) { + if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { /* kernel thread */ memset(&frame->childregs, 0, sizeof(struct pt_regs)); frame->childregs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT | diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c index 80a5d1c66a51..1aa508eb0823 100644 --- a/arch/sh/kernel/process_32.c +++ b/arch/sh/kernel/process_32.c @@ -114,7 +114,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg, childregs = task_pt_regs(p); p->thread.sp = (unsigned long) childregs; - if (unlikely(p->flags & PF_KTHREAD)) { + if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { memset(childregs, 0, sizeof(struct pt_regs)); p->thread.pc = (unsigned long) ret_from_kernel_thread; childregs->regs[4] = arg; diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index a02363735915..0f9c606e1e78 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c @@ -309,7 +309,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg, ti->ksp = (unsigned long) new_stack; p->thread.kregs = childregs; - if (unlikely(p->flags & PF_KTHREAD)) { + if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { extern int nwindows; unsigned long psr; memset(new_stack, 0, STACKFRAME_SZ + TRACEREG_SZ); diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index 6f8c7822fc06..7afd0a859a78 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -597,7 +597,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg, sizeof(struct sparc_stackf)); t->fpsaved[0] = 0; - if (unlikely(p->flags & PF_KTHREAD)) { + if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { memset(child_trap_frame, 0, child_stack_sz); __thread_flag_byte_ptr(t)[TI_FLAG_BYTE_CWP] = (current_pt_regs()->tstate + 1) & TSTATE_CWP; diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 067fb85fbf11..e6c9b11b2033 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -157,7 +157,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg, struct task_struct * p, unsigned long tls) { void (*handler)(void); - int kthread = current->flags & PF_KTHREAD; + int kthread = current->flags & (PF_KTHREAD | PF_IO_WORKER); int ret = 0; p->thread = (struct thread_struct) INIT_THREAD; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 383afcc1098b..4d46c52d9c05 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -162,7 +162,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg, #endif /* Kernel thread ? */ - if (unlikely(p->flags & PF_KTHREAD)) { + if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { memset(childregs, 0, sizeof(struct pt_regs)); kthread_frame_init(frame, sp, arg); return 0; diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c index 397a7de56377..9534ef515d74 100644 --- a/arch/xtensa/kernel/process.c +++ b/arch/xtensa/kernel/process.c @@ -217,7 +217,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp_thread_fn, p->thread.sp = (unsigned long)childregs; - if (!(p->flags & PF_KTHREAD)) { + if (!(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { struct pt_regs *regs = current_pt_regs(); unsigned long usp = usp_thread_fn ? usp_thread_fn : regs->areg[1]; From 1f4eb355466be778b32961e797fa009fa7a40ba0 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 23 Feb 2021 11:57:20 -0700 Subject: [PATCH 1422/1477] UPSTREAM: arch: ensure parisc/powerpc handle PF_IO_WORKER in copy_thread() [ Upstream commit 0100e6bbdbb79404e56939313662b42737026574 ] In the arch addition of PF_IO_WORKER, I missed parisc and powerpc for some reason. Fix that up, ensuring they handle PF_IO_WORKER like they do PF_KTHREAD in copy_thread(). Reported-by: Bruno Goncalves Fixes: 4727dc20e042 ("arch: setup PF_IO_WORKER threads like PF_KTHREAD") Change-Id: I3d0289912eb9e4545fc0b680df6890b6b837ebdd Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit dd26e2cec74f88cb7910deec77897d04ade299bd) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- arch/parisc/kernel/process.c | 2 +- arch/powerpc/kernel/process.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index a92a23d6acd9..5e4381280c97 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -200,7 +200,7 @@ copy_thread(unsigned long clone_flags, unsigned long usp, extern void * const ret_from_kernel_thread; extern void * const child_return; - if (unlikely(p->flags & PF_KTHREAD)) { + if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { /* kernel thread */ memset(cregs, 0, sizeof(struct pt_regs)); if (!usp) /* idle thread */ diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index c43cc26bde5d..cf375d67eacb 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1684,7 +1684,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, /* Copy registers */ sp -= sizeof(struct pt_regs); childregs = (struct pt_regs *) sp; - if (unlikely(p->flags & PF_KTHREAD)) { + if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { /* kernel thread */ memset(childregs, 0, sizeof(struct pt_regs)); childregs->gpr[1] = sp + sizeof(struct pt_regs); From bcb749b0b192fe2679975fa4b440f65fec0e3ccd Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 5 May 2021 13:03:10 +0200 Subject: [PATCH 1423/1477] UPSTREAM: x86/process: setup io_threads more like normal user space threads [ Upstream commit 50b7b6f29de3e18e9d6c09641256a0296361cfee ] As io_threads are fully set up USER threads it's clearer to separate the code path from the KTHREAD logic. The only remaining difference to user space threads is that io_threads never return to user space again. Instead they loop within the given worker function. The fact that they never return to user space means they don't have an user space thread stack. In order to indicate that to tools like gdb we reset the stack and instruction pointers to 0. This allows gdb attach to user space processes using io-uring, which like means that they have io_threads, without printing worrying message like this: warning: Selected architecture i386:x86-64 is not compatible with reported target architecture i386 warning: Architecture rejected target-supplied description The output will be something like this: (gdb) info threads Id Target Id Frame * 1 LWP 4863 "io_uring-cp-for" syscall () at ../sysdeps/unix/sysv/linux/x86_64/syscall.S:38 2 LWP 4864 "iou-mgr-4863" 0x0000000000000000 in ?? () 3 LWP 4865 "iou-wrk-4863" 0x0000000000000000 in ?? () (gdb) thread 3 [Switching to thread 3 (LWP 4865)] #0 0x0000000000000000 in ?? () (gdb) bt #0 0x0000000000000000 in ?? () Backtrace stopped: Cannot access memory at address 0x0 Fixes: 4727dc20e042 ("arch: setup PF_IO_WORKER threads like PF_KTHREAD") Link: https://lore.kernel.org/io-uring/044d0bad-6888-a211-e1d3-159a4aeed52d@polymtl.ca/T/#m1bbf5727e3d4e839603f6ec7ed79c7eebfba6267 Change-Id: I83793e9a4fbc5f9024c9aeace0640043c81a93b0 Signed-off-by: Stefan Metzmacher cc: Linus Torvalds cc: Jens Axboe cc: Andy Lutomirski cc: linux-kernel@vger.kernel.org cc: io-uring@vger.kernel.org cc: x86@kernel.org Link: https://lore.kernel.org/r/20210505110310.237537-1-metze@samba.org Reviewed-by: Thomas Gleixner Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit f0a5f0dc0131c6483908601f6e4907befb609c97) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/process.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 4d46c52d9c05..5e17c3939dd1 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -162,7 +162,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg, #endif /* Kernel thread ? */ - if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { + if (unlikely(p->flags & PF_KTHREAD)) { memset(childregs, 0, sizeof(struct pt_regs)); kthread_frame_init(frame, sp, arg); return 0; @@ -178,6 +178,23 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg, task_user_gs(p) = get_user_gs(current_pt_regs()); #endif + if (unlikely(p->flags & PF_IO_WORKER)) { + /* + * An IO thread is a user space thread, but it doesn't + * return to ret_after_fork(). + * + * In order to indicate that to tools like gdb, + * we reset the stack and instruction pointers. + * + * It does the same kernel frame setup to return to a kernel + * function that a kernel thread does. + */ + childregs->sp = 0; + childregs->ip = 0; + kthread_frame_init(frame, sp, arg); + return 0; + } + /* Set a new TLS for the child thread? */ if (clone_flags & CLONE_SETTLS) ret = set_new_tls(p, tls); From 52f564e57bef2140e5ce088313655ee3a51341e6 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 26 Mar 2021 09:05:22 -0600 Subject: [PATCH 1424/1477] UPSTREAM: kernel: stop masking signals in create_io_thread() [ Upstream commit b16b3855d89fba640996fefdd3a113c0aa0e380d ] This is racy - move the blocking into when the task is created and we're marking it as PF_IO_WORKER anyway. The IO threads are now prepared to handle signals like SIGSTOP as well, so clear that from the mask to allow proper stopping of IO threads. Acked-by: "Eric W. Biederman" Reported-by: Oleg Nesterov Change-Id: I6317c88e0723c6c97555f8ceacfee3692372ac4c Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 9ded44b69c711455dbbddf6ec39b77ac41e4eed7) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- kernel/fork.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index 11445f72c1c0..e0baadf90aed 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1995,8 +1995,14 @@ static __latent_entropy struct task_struct *copy_process( p = dup_task_struct(current, node); if (!p) goto fork_out; - if (args->io_thread) + if (args->io_thread) { + /* + * Mark us an IO worker, and block any signal that isn't + * fatal or STOP + */ p->flags |= PF_IO_WORKER; + siginitsetinv(&p->blocked, sigmask(SIGKILL)|sigmask(SIGSTOP)); + } cpufreq_task_times_init(p); @@ -2494,14 +2500,8 @@ struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node) .stack_size = (unsigned long)arg, .io_thread = 1, }; - struct task_struct *tsk; - tsk = copy_process(NULL, 0, node, &args); - if (!IS_ERR(tsk)) { - sigfillset(&tsk->blocked); - sigdelsetmask(&tsk->blocked, sigmask(SIGKILL)); - } - return tsk; + return copy_process(NULL, 0, node, &args); } /* From 86acb6a52929154c7b8b5eb9818fc9c9012daf2b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 26 Mar 2021 08:57:10 -0600 Subject: [PATCH 1425/1477] UPSTREAM: kernel: don't call do_exit() for PF_IO_WORKER threads [ Upstream commit 10442994ba195efef6fdcc0c3699e4633cb5161b ] Right now we're never calling get_signal() from PF_IO_WORKER threads, but in preparation for doing so, don't handle a fatal signal for them. The workers have state they need to cleanup when exiting, so just return instead of calling do_exit() on their behalf. The threads themselves will detect a fatal signal and do proper shutdown. Change-Id: Iedc3fae8cb496d003852c87fdefacc1ad7601cc5 Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 831cb78a2a5e86fe705ef4e3095c7cbc587c6a57) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- kernel/signal.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/kernel/signal.c b/kernel/signal.c index bf54af58427e..a8499e105c30 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2786,6 +2786,14 @@ relock: do_coredump(&ksig->info); } + /* + * PF_IO_WORKER threads will catch and exit on fatal signals + * themselves. They have cleanup that must be performed, so + * we cannot call do_exit() on their behalf. + */ + if (current->flags & PF_IO_WORKER) + goto out; + /* * Death signals, no core dump. */ @@ -2793,7 +2801,7 @@ relock: /* NOTREACHED */ } spin_unlock_irq(&sighand->siglock); - +out: ksig->sig = signr; if (!(ksig->ka.sa.sa_flags & SA_EXPOSE_TAGBITS)) From 518e02ed06c84a12b070090365285751f3442b00 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 1 Apr 2021 19:53:29 -0600 Subject: [PATCH 1426/1477] UPSTREAM: task_work: add helper for more targeted task_work canceling [ Upstream commit c7aab1a7c52b82d9afd7e03c398eb03dc2aa0507 ] The only exported helper we have right now is task_work_cancel(), which cancels any task_work from a given task where func matches the queued work item. This is a bit too coarse for some use cases. Add a task_work_cancel_match() that allows to more specifically target individual work items outside of purely the callback function used. task_work_cancel() can be trivially implemented on top of that, hence do so. Reviewed-by: Oleg Nesterov Change-Id: Ia33480d209b26d433a3ca196972d6931aa4f8dde Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit ed3005032993da7a3fe2e6095436e0bc2e83d011) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- include/linux/task_work.h | 2 ++ kernel/task_work.c | 35 ++++++++++++++++++++++++++++------- 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/include/linux/task_work.h b/include/linux/task_work.h index 0d848a1e9e62..5b8a93f288bb 100644 --- a/include/linux/task_work.h +++ b/include/linux/task_work.h @@ -22,6 +22,8 @@ enum task_work_notify_mode { int task_work_add(struct task_struct *task, struct callback_head *twork, enum task_work_notify_mode mode); +struct callback_head *task_work_cancel_match(struct task_struct *task, + bool (*match)(struct callback_head *, void *data), void *data); struct callback_head *task_work_cancel(struct task_struct *, task_work_func_t); void task_work_run(void); diff --git a/kernel/task_work.c b/kernel/task_work.c index 3d4852891fa8..1698fbe6f0e1 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -62,18 +62,17 @@ int task_work_add(struct task_struct *task, struct callback_head *work, } /** - * task_work_cancel - cancel a pending work added by task_work_add() + * task_work_cancel_match - cancel a pending work added by task_work_add() * @task: the task which should execute the work - * @func: identifies the work to remove - * - * Find the last queued pending work with ->func == @func and remove - * it from queue. + * @match: match function to call * * RETURNS: * The found work or NULL if not found. */ struct callback_head * -task_work_cancel(struct task_struct *task, task_work_func_t func) +task_work_cancel_match(struct task_struct *task, + bool (*match)(struct callback_head *, void *data), + void *data) { struct callback_head **pprev = &task->task_works; struct callback_head *work; @@ -89,7 +88,7 @@ task_work_cancel(struct task_struct *task, task_work_func_t func) */ raw_spin_lock_irqsave(&task->pi_lock, flags); while ((work = READ_ONCE(*pprev))) { - if (work->func != func) + if (!match(work, data)) pprev = &work->next; else if (cmpxchg(pprev, work, work->next) == work) break; @@ -99,6 +98,28 @@ task_work_cancel(struct task_struct *task, task_work_func_t func) return work; } +static bool task_work_func_match(struct callback_head *cb, void *data) +{ + return cb->func == data; +} + +/** + * task_work_cancel - cancel a pending work added by task_work_add() + * @task: the task which should execute the work + * @func: identifies the work to remove + * + * Find the last queued pending work with ->func == @func and remove + * it from queue. + * + * RETURNS: + * The found work or NULL if not found. + */ +struct callback_head * +task_work_cancel(struct task_struct *task, task_work_func_t func) +{ + return task_work_cancel_match(task, task_work_func_match, func); +} + /** * task_work_run - execute the works added by task_work_add() * From 5e6347b58643f36c00336c0a477ff3aa08d1f27a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 22 Dec 2022 14:30:11 -0700 Subject: [PATCH 1427/1477] UPSTREAM: io_uring: import 5.15-stable io_uring No upstream commit exists. This imports the io_uring codebase from 5.15.85, wholesale. Changes from that code base: - Drop IOCB_ALLOC_CACHE, we don't have that in 5.10. - Drop MKDIRAT/SYMLINKAT/LINKAT. Would require further VFS backports, and we don't support these in 5.10 to begin with. - sock_from_file() old style calling convention. - Use compat_get_bitmap() only for CONFIG_COMPAT=y Change-Id: I7ce5226d6b39763ffc246fd6357cece9aafd4b59 Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 788d0824269bef539fe31a785b1517882eafed93) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- fs/Makefile | 2 - fs/io-wq.c | 1242 ---- include/linux/io_uring.h | 48 +- include/linux/sched.h | 3 + include/linux/syscalls.h | 2 +- include/trace/events/io_uring.h | 121 +- include/uapi/linux/io_uring.h | 115 +- io_uring/Makefile | 6 + io_uring/io-wq.c | 1398 +++++ {fs => io_uring}/io-wq.h | 47 +- {fs => io_uring}/io_uring.c | 9481 +++++++++++++++++-------------- kernel/exit.c | 2 +- kernel/fork.c | 1 + kernel/sched/core.c | 2 +- 15 files changed, 6855 insertions(+), 5617 deletions(-) delete mode 100644 fs/io-wq.c create mode 100644 io_uring/Makefile create mode 100644 io_uring/io-wq.c rename {fs => io_uring}/io-wq.h (81%) rename {fs => io_uring}/io_uring.c (51%) diff --git a/Makefile b/Makefile index f691a755b49e..a0b2911bfcfe 100644 --- a/Makefile +++ b/Makefile @@ -1229,7 +1229,7 @@ endif $(Q)$(MAKE) $(hdr-inst)=$(hdr-prefix)arch/$(SRCARCH)/include/uapi ifeq ($(KBUILD_EXTMOD),) -core-y += kernel/ certs/ mm/ fs/ ipc/ security/ crypto/ block/ +core-y += kernel/ certs/ mm/ fs/ ipc/ security/ crypto/ block/ io_uring/ vmlinux-dirs := $(patsubst %/,%,$(filter %/, \ $(core-y) $(core-m) $(drivers-y) $(drivers-m) \ diff --git a/fs/Makefile b/fs/Makefile index 359c63fefa1b..c7851875b668 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -34,8 +34,6 @@ obj-$(CONFIG_TIMERFD) += timerfd.o obj-$(CONFIG_EVENTFD) += eventfd.o obj-$(CONFIG_USERFAULTFD) += userfaultfd.o obj-$(CONFIG_AIO) += aio.o -obj-$(CONFIG_IO_URING) += io_uring.o -obj-$(CONFIG_IO_WQ) += io-wq.o obj-$(CONFIG_FS_DAX) += dax.o obj-$(CONFIG_FS_ENCRYPTION) += crypto/ obj-$(CONFIG_FS_VERITY) += verity/ diff --git a/fs/io-wq.c b/fs/io-wq.c deleted file mode 100644 index 3d5fc76b92d0..000000000000 --- a/fs/io-wq.c +++ /dev/null @@ -1,1242 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Basic worker thread pool for io_uring - * - * Copyright (C) 2019 Jens Axboe - * - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "../kernel/sched/sched.h" -#include "io-wq.h" - -#define WORKER_IDLE_TIMEOUT (5 * HZ) - -enum { - IO_WORKER_F_UP = 1, /* up and active */ - IO_WORKER_F_RUNNING = 2, /* account as running */ - IO_WORKER_F_FREE = 4, /* worker on free list */ - IO_WORKER_F_FIXED = 8, /* static idle worker */ - IO_WORKER_F_BOUND = 16, /* is doing bounded work */ -}; - -enum { - IO_WQ_BIT_EXIT = 0, /* wq exiting */ - IO_WQ_BIT_CANCEL = 1, /* cancel work on list */ - IO_WQ_BIT_ERROR = 2, /* error on setup */ -}; - -enum { - IO_WQE_FLAG_STALLED = 1, /* stalled on hash */ -}; - -/* - * One for each thread in a wqe pool - */ -struct io_worker { - refcount_t ref; - unsigned flags; - struct hlist_nulls_node nulls_node; - struct list_head all_list; - struct task_struct *task; - struct io_wqe *wqe; - - struct io_wq_work *cur_work; - spinlock_t lock; - - struct rcu_head rcu; - struct mm_struct *mm; -#ifdef CONFIG_BLK_CGROUP - struct cgroup_subsys_state *blkcg_css; -#endif - const struct cred *cur_creds; - const struct cred *saved_creds; - struct files_struct *restore_files; - struct nsproxy *restore_nsproxy; - struct fs_struct *restore_fs; -}; - -#if BITS_PER_LONG == 64 -#define IO_WQ_HASH_ORDER 6 -#else -#define IO_WQ_HASH_ORDER 5 -#endif - -#define IO_WQ_NR_HASH_BUCKETS (1u << IO_WQ_HASH_ORDER) - -struct io_wqe_acct { - unsigned nr_workers; - unsigned max_workers; - atomic_t nr_running; -}; - -enum { - IO_WQ_ACCT_BOUND, - IO_WQ_ACCT_UNBOUND, -}; - -/* - * Per-node worker thread pool - */ -struct io_wqe { - struct { - raw_spinlock_t lock; - struct io_wq_work_list work_list; - unsigned long hash_map; - unsigned flags; - } ____cacheline_aligned_in_smp; - - int node; - struct io_wqe_acct acct[2]; - - struct hlist_nulls_head free_list; - struct list_head all_list; - - struct io_wq *wq; - struct io_wq_work *hash_tail[IO_WQ_NR_HASH_BUCKETS]; -}; - -/* - * Per io_wq state - */ -struct io_wq { - struct io_wqe **wqes; - unsigned long state; - - free_work_fn *free_work; - io_wq_work_fn *do_work; - - struct task_struct *manager; - struct user_struct *user; - refcount_t refs; - struct completion done; - - struct hlist_node cpuhp_node; - - refcount_t use_refs; -}; - -static enum cpuhp_state io_wq_online; - -static bool io_worker_get(struct io_worker *worker) -{ - return refcount_inc_not_zero(&worker->ref); -} - -static void io_worker_release(struct io_worker *worker) -{ - if (refcount_dec_and_test(&worker->ref)) - wake_up_process(worker->task); -} - -/* - * Note: drops the wqe->lock if returning true! The caller must re-acquire - * the lock in that case. Some callers need to restart handling if this - * happens, so we can't just re-acquire the lock on behalf of the caller. - */ -static bool __io_worker_unuse(struct io_wqe *wqe, struct io_worker *worker) -{ - bool dropped_lock = false; - - if (worker->saved_creds) { - revert_creds(worker->saved_creds); - worker->cur_creds = worker->saved_creds = NULL; - } - - if (current->files != worker->restore_files) { - __acquire(&wqe->lock); - raw_spin_unlock_irq(&wqe->lock); - dropped_lock = true; - - task_lock(current); - current->files = worker->restore_files; - current->nsproxy = worker->restore_nsproxy; - task_unlock(current); - } - - if (current->fs != worker->restore_fs) - current->fs = worker->restore_fs; - - /* - * If we have an active mm, we need to drop the wq lock before unusing - * it. If we do, return true and let the caller retry the idle loop. - */ - if (worker->mm) { - if (!dropped_lock) { - __acquire(&wqe->lock); - raw_spin_unlock_irq(&wqe->lock); - dropped_lock = true; - } - __set_current_state(TASK_RUNNING); - kthread_unuse_mm(worker->mm); - mmput(worker->mm); - worker->mm = NULL; - } - -#ifdef CONFIG_BLK_CGROUP - if (worker->blkcg_css) { - kthread_associate_blkcg(NULL); - worker->blkcg_css = NULL; - } -#endif - if (current->signal->rlim[RLIMIT_FSIZE].rlim_cur != RLIM_INFINITY) - current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY; - return dropped_lock; -} - -static inline struct io_wqe_acct *io_work_get_acct(struct io_wqe *wqe, - struct io_wq_work *work) -{ - if (work->flags & IO_WQ_WORK_UNBOUND) - return &wqe->acct[IO_WQ_ACCT_UNBOUND]; - - return &wqe->acct[IO_WQ_ACCT_BOUND]; -} - -static inline struct io_wqe_acct *io_wqe_get_acct(struct io_wqe *wqe, - struct io_worker *worker) -{ - if (worker->flags & IO_WORKER_F_BOUND) - return &wqe->acct[IO_WQ_ACCT_BOUND]; - - return &wqe->acct[IO_WQ_ACCT_UNBOUND]; -} - -static void io_worker_exit(struct io_worker *worker) -{ - struct io_wqe *wqe = worker->wqe; - struct io_wqe_acct *acct = io_wqe_get_acct(wqe, worker); - - /* - * If we're not at zero, someone else is holding a brief reference - * to the worker. Wait for that to go away. - */ - set_current_state(TASK_INTERRUPTIBLE); - if (!refcount_dec_and_test(&worker->ref)) - schedule(); - __set_current_state(TASK_RUNNING); - - preempt_disable(); - current->flags &= ~PF_IO_WORKER; - if (worker->flags & IO_WORKER_F_RUNNING) - atomic_dec(&acct->nr_running); - if (!(worker->flags & IO_WORKER_F_BOUND)) - atomic_dec(&wqe->wq->user->processes); - worker->flags = 0; - preempt_enable(); - - raw_spin_lock_irq(&wqe->lock); - hlist_nulls_del_rcu(&worker->nulls_node); - list_del_rcu(&worker->all_list); - if (__io_worker_unuse(wqe, worker)) { - __release(&wqe->lock); - raw_spin_lock_irq(&wqe->lock); - } - acct->nr_workers--; - raw_spin_unlock_irq(&wqe->lock); - - kfree_rcu(worker, rcu); - if (refcount_dec_and_test(&wqe->wq->refs)) - complete(&wqe->wq->done); -} - -static inline bool io_wqe_run_queue(struct io_wqe *wqe) - __must_hold(wqe->lock) -{ - if (!wq_list_empty(&wqe->work_list) && - !(wqe->flags & IO_WQE_FLAG_STALLED)) - return true; - return false; -} - -/* - * Check head of free list for an available worker. If one isn't available, - * caller must wake up the wq manager to create one. - */ -static bool io_wqe_activate_free_worker(struct io_wqe *wqe) - __must_hold(RCU) -{ - struct hlist_nulls_node *n; - struct io_worker *worker; - - n = rcu_dereference(hlist_nulls_first_rcu(&wqe->free_list)); - if (is_a_nulls(n)) - return false; - - worker = hlist_nulls_entry(n, struct io_worker, nulls_node); - if (io_worker_get(worker)) { - wake_up_process(worker->task); - io_worker_release(worker); - return true; - } - - return false; -} - -/* - * We need a worker. If we find a free one, we're good. If not, and we're - * below the max number of workers, wake up the manager to create one. - */ -static void io_wqe_wake_worker(struct io_wqe *wqe, struct io_wqe_acct *acct) -{ - bool ret; - - /* - * Most likely an attempt to queue unbounded work on an io_wq that - * wasn't setup with any unbounded workers. - */ - if (unlikely(!acct->max_workers)) - pr_warn_once("io-wq is not configured for unbound workers"); - - rcu_read_lock(); - ret = io_wqe_activate_free_worker(wqe); - rcu_read_unlock(); - - if (!ret && acct->nr_workers < acct->max_workers) - wake_up_process(wqe->wq->manager); -} - -static void io_wqe_inc_running(struct io_wqe *wqe, struct io_worker *worker) -{ - struct io_wqe_acct *acct = io_wqe_get_acct(wqe, worker); - - atomic_inc(&acct->nr_running); -} - -static void io_wqe_dec_running(struct io_wqe *wqe, struct io_worker *worker) - __must_hold(wqe->lock) -{ - struct io_wqe_acct *acct = io_wqe_get_acct(wqe, worker); - - if (atomic_dec_and_test(&acct->nr_running) && io_wqe_run_queue(wqe)) - io_wqe_wake_worker(wqe, acct); -} - -static void io_worker_start(struct io_wqe *wqe, struct io_worker *worker) -{ - allow_kernel_signal(SIGINT); - - current->flags |= PF_IO_WORKER; - - worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING); - worker->restore_files = current->files; - worker->restore_nsproxy = current->nsproxy; - worker->restore_fs = current->fs; - io_wqe_inc_running(wqe, worker); -} - -/* - * Worker will start processing some work. Move it to the busy list, if - * it's currently on the freelist - */ -static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker, - struct io_wq_work *work) - __must_hold(wqe->lock) -{ - bool worker_bound, work_bound; - - if (worker->flags & IO_WORKER_F_FREE) { - worker->flags &= ~IO_WORKER_F_FREE; - hlist_nulls_del_init_rcu(&worker->nulls_node); - } - - /* - * If worker is moving from bound to unbound (or vice versa), then - * ensure we update the running accounting. - */ - worker_bound = (worker->flags & IO_WORKER_F_BOUND) != 0; - work_bound = (work->flags & IO_WQ_WORK_UNBOUND) == 0; - if (worker_bound != work_bound) { - io_wqe_dec_running(wqe, worker); - if (work_bound) { - worker->flags |= IO_WORKER_F_BOUND; - wqe->acct[IO_WQ_ACCT_UNBOUND].nr_workers--; - wqe->acct[IO_WQ_ACCT_BOUND].nr_workers++; - atomic_dec(&wqe->wq->user->processes); - } else { - worker->flags &= ~IO_WORKER_F_BOUND; - wqe->acct[IO_WQ_ACCT_UNBOUND].nr_workers++; - wqe->acct[IO_WQ_ACCT_BOUND].nr_workers--; - atomic_inc(&wqe->wq->user->processes); - } - io_wqe_inc_running(wqe, worker); - } -} - -/* - * No work, worker going to sleep. Move to freelist, and unuse mm if we - * have one attached. Dropping the mm may potentially sleep, so we drop - * the lock in that case and return success. Since the caller has to - * retry the loop in that case (we changed task state), we don't regrab - * the lock if we return success. - */ -static bool __io_worker_idle(struct io_wqe *wqe, struct io_worker *worker) - __must_hold(wqe->lock) -{ - if (!(worker->flags & IO_WORKER_F_FREE)) { - worker->flags |= IO_WORKER_F_FREE; - hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list); - } - - return __io_worker_unuse(wqe, worker); -} - -static inline unsigned int io_get_work_hash(struct io_wq_work *work) -{ - return work->flags >> IO_WQ_HASH_SHIFT; -} - -static struct io_wq_work *io_get_next_work(struct io_wqe *wqe) - __must_hold(wqe->lock) -{ - struct io_wq_work_node *node, *prev; - struct io_wq_work *work, *tail; - unsigned int hash; - - wq_list_for_each(node, prev, &wqe->work_list) { - work = container_of(node, struct io_wq_work, list); - - /* not hashed, can run anytime */ - if (!io_wq_is_hashed(work)) { - wq_list_del(&wqe->work_list, node, prev); - return work; - } - - /* hashed, can run if not already running */ - hash = io_get_work_hash(work); - if (!(wqe->hash_map & BIT(hash))) { - wqe->hash_map |= BIT(hash); - /* all items with this hash lie in [work, tail] */ - tail = wqe->hash_tail[hash]; - wqe->hash_tail[hash] = NULL; - wq_list_cut(&wqe->work_list, &tail->list, prev); - return work; - } - } - - return NULL; -} - -static void io_wq_switch_mm(struct io_worker *worker, struct io_wq_work *work) -{ - if (worker->mm) { - kthread_unuse_mm(worker->mm); - mmput(worker->mm); - worker->mm = NULL; - } - - if (mmget_not_zero(work->identity->mm)) { - kthread_use_mm(work->identity->mm); - worker->mm = work->identity->mm; - return; - } - - /* failed grabbing mm, ensure work gets cancelled */ - work->flags |= IO_WQ_WORK_CANCEL; -} - -static inline void io_wq_switch_blkcg(struct io_worker *worker, - struct io_wq_work *work) -{ -#ifdef CONFIG_BLK_CGROUP - if (!(work->flags & IO_WQ_WORK_BLKCG)) - return; - if (work->identity->blkcg_css != worker->blkcg_css) { - kthread_associate_blkcg(work->identity->blkcg_css); - worker->blkcg_css = work->identity->blkcg_css; - } -#endif -} - -static void io_wq_switch_creds(struct io_worker *worker, - struct io_wq_work *work) -{ - const struct cred *old_creds = override_creds(work->identity->creds); - - worker->cur_creds = work->identity->creds; - if (worker->saved_creds) - put_cred(old_creds); /* creds set by previous switch */ - else - worker->saved_creds = old_creds; -} - -static void io_impersonate_work(struct io_worker *worker, - struct io_wq_work *work) -{ - if ((work->flags & IO_WQ_WORK_FILES) && - current->files != work->identity->files) { - task_lock(current); - current->files = work->identity->files; - current->nsproxy = work->identity->nsproxy; - task_unlock(current); - if (!work->identity->files) { - /* failed grabbing files, ensure work gets cancelled */ - work->flags |= IO_WQ_WORK_CANCEL; - } - } - if ((work->flags & IO_WQ_WORK_FS) && current->fs != work->identity->fs) - current->fs = work->identity->fs; - if ((work->flags & IO_WQ_WORK_MM) && work->identity->mm != worker->mm) - io_wq_switch_mm(worker, work); - if ((work->flags & IO_WQ_WORK_CREDS) && - worker->cur_creds != work->identity->creds) - io_wq_switch_creds(worker, work); - if (work->flags & IO_WQ_WORK_FSIZE) - current->signal->rlim[RLIMIT_FSIZE].rlim_cur = work->identity->fsize; - else if (current->signal->rlim[RLIMIT_FSIZE].rlim_cur != RLIM_INFINITY) - current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY; - io_wq_switch_blkcg(worker, work); -#ifdef CONFIG_AUDIT - current->loginuid = work->identity->loginuid; - current->sessionid = work->identity->sessionid; -#endif -} - -static void io_assign_current_work(struct io_worker *worker, - struct io_wq_work *work) -{ - if (work) { - /* flush pending signals before assigning new work */ - if (signal_pending(current)) - flush_signals(current); - cond_resched(); - } - -#ifdef CONFIG_AUDIT - current->loginuid = KUIDT_INIT(AUDIT_UID_UNSET); - current->sessionid = AUDIT_SID_UNSET; -#endif - - spin_lock_irq(&worker->lock); - worker->cur_work = work; - spin_unlock_irq(&worker->lock); -} - -static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work); - -static void io_worker_handle_work(struct io_worker *worker) - __releases(wqe->lock) -{ - struct io_wqe *wqe = worker->wqe; - struct io_wq *wq = wqe->wq; - - do { - struct io_wq_work *work; -get_next: - /* - * If we got some work, mark us as busy. If we didn't, but - * the list isn't empty, it means we stalled on hashed work. - * Mark us stalled so we don't keep looking for work when we - * can't make progress, any work completion or insertion will - * clear the stalled flag. - */ - work = io_get_next_work(wqe); - if (work) - __io_worker_busy(wqe, worker, work); - else if (!wq_list_empty(&wqe->work_list)) - wqe->flags |= IO_WQE_FLAG_STALLED; - - raw_spin_unlock_irq(&wqe->lock); - if (!work) - break; - io_assign_current_work(worker, work); - - /* handle a whole dependent link */ - do { - struct io_wq_work *old_work, *next_hashed, *linked; - unsigned int hash = io_get_work_hash(work); - - next_hashed = wq_next_work(work); - io_impersonate_work(worker, work); - /* - * OK to set IO_WQ_WORK_CANCEL even for uncancellable - * work, the worker function will do the right thing. - */ - if (test_bit(IO_WQ_BIT_CANCEL, &wq->state)) - work->flags |= IO_WQ_WORK_CANCEL; - - old_work = work; - linked = wq->do_work(work); - - work = next_hashed; - if (!work && linked && !io_wq_is_hashed(linked)) { - work = linked; - linked = NULL; - } - io_assign_current_work(worker, work); - wq->free_work(old_work); - - if (linked) - io_wqe_enqueue(wqe, linked); - - if (hash != -1U && !next_hashed) { - raw_spin_lock_irq(&wqe->lock); - wqe->hash_map &= ~BIT_ULL(hash); - wqe->flags &= ~IO_WQE_FLAG_STALLED; - /* skip unnecessary unlock-lock wqe->lock */ - if (!work) - goto get_next; - raw_spin_unlock_irq(&wqe->lock); - } - } while (work); - - raw_spin_lock_irq(&wqe->lock); - } while (1); -} - -static int io_wqe_worker(void *data) -{ - struct io_worker *worker = data; - struct io_wqe *wqe = worker->wqe; - struct io_wq *wq = wqe->wq; - - io_worker_start(wqe, worker); - - while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) { - set_current_state(TASK_INTERRUPTIBLE); -loop: - raw_spin_lock_irq(&wqe->lock); - if (io_wqe_run_queue(wqe)) { - __set_current_state(TASK_RUNNING); - io_worker_handle_work(worker); - goto loop; - } - /* drops the lock on success, retry */ - if (__io_worker_idle(wqe, worker)) { - __release(&wqe->lock); - goto loop; - } - raw_spin_unlock_irq(&wqe->lock); - if (signal_pending(current)) - flush_signals(current); - if (schedule_timeout(WORKER_IDLE_TIMEOUT)) - continue; - /* timed out, exit unless we're the fixed worker */ - if (test_bit(IO_WQ_BIT_EXIT, &wq->state) || - !(worker->flags & IO_WORKER_F_FIXED)) - break; - } - - if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) { - raw_spin_lock_irq(&wqe->lock); - if (!wq_list_empty(&wqe->work_list)) - io_worker_handle_work(worker); - else - raw_spin_unlock_irq(&wqe->lock); - } - - io_worker_exit(worker); - return 0; -} - -/* - * Called when a worker is scheduled in. Mark us as currently running. - */ -void io_wq_worker_running(struct task_struct *tsk) -{ - struct io_worker *worker = kthread_data(tsk); - struct io_wqe *wqe = worker->wqe; - - if (!(worker->flags & IO_WORKER_F_UP)) - return; - if (worker->flags & IO_WORKER_F_RUNNING) - return; - worker->flags |= IO_WORKER_F_RUNNING; - io_wqe_inc_running(wqe, worker); -} - -/* - * Called when worker is going to sleep. If there are no workers currently - * running and we have work pending, wake up a free one or have the manager - * set one up. - */ -void io_wq_worker_sleeping(struct task_struct *tsk) -{ - struct io_worker *worker = kthread_data(tsk); - struct io_wqe *wqe = worker->wqe; - - if (!(worker->flags & IO_WORKER_F_UP)) - return; - if (!(worker->flags & IO_WORKER_F_RUNNING)) - return; - - worker->flags &= ~IO_WORKER_F_RUNNING; - - raw_spin_lock_irq(&wqe->lock); - io_wqe_dec_running(wqe, worker); - raw_spin_unlock_irq(&wqe->lock); -} - -static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index) -{ - struct io_wqe_acct *acct = &wqe->acct[index]; - struct io_worker *worker; - - worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, wqe->node); - if (!worker) - return false; - - refcount_set(&worker->ref, 1); - worker->nulls_node.pprev = NULL; - worker->wqe = wqe; - spin_lock_init(&worker->lock); - - worker->task = kthread_create_on_node(io_wqe_worker, worker, wqe->node, - "io_wqe_worker-%d/%d", index, wqe->node); - if (IS_ERR(worker->task)) { - kfree(worker); - return false; - } - kthread_bind_mask(worker->task, cpumask_of_node(wqe->node)); - - raw_spin_lock_irq(&wqe->lock); - hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list); - list_add_tail_rcu(&worker->all_list, &wqe->all_list); - worker->flags |= IO_WORKER_F_FREE; - if (index == IO_WQ_ACCT_BOUND) - worker->flags |= IO_WORKER_F_BOUND; - if (!acct->nr_workers && (worker->flags & IO_WORKER_F_BOUND)) - worker->flags |= IO_WORKER_F_FIXED; - acct->nr_workers++; - raw_spin_unlock_irq(&wqe->lock); - - if (index == IO_WQ_ACCT_UNBOUND) - atomic_inc(&wq->user->processes); - - refcount_inc(&wq->refs); - wake_up_process(worker->task); - return true; -} - -static inline bool io_wqe_need_worker(struct io_wqe *wqe, int index) - __must_hold(wqe->lock) -{ - struct io_wqe_acct *acct = &wqe->acct[index]; - - /* if we have available workers or no work, no need */ - if (!hlist_nulls_empty(&wqe->free_list) || !io_wqe_run_queue(wqe)) - return false; - return acct->nr_workers < acct->max_workers; -} - -static bool io_wqe_worker_send_sig(struct io_worker *worker, void *data) -{ - send_sig(SIGINT, worker->task, 1); - return false; -} - -/* - * Iterate the passed in list and call the specific function for each - * worker that isn't exiting - */ -static bool io_wq_for_each_worker(struct io_wqe *wqe, - bool (*func)(struct io_worker *, void *), - void *data) -{ - struct io_worker *worker; - bool ret = false; - - list_for_each_entry_rcu(worker, &wqe->all_list, all_list) { - if (io_worker_get(worker)) { - /* no task if node is/was offline */ - if (worker->task) - ret = func(worker, data); - io_worker_release(worker); - if (ret) - break; - } - } - - return ret; -} - -static bool io_wq_worker_wake(struct io_worker *worker, void *data) -{ - wake_up_process(worker->task); - return false; -} - -/* - * Manager thread. Tasked with creating new workers, if we need them. - */ -static int io_wq_manager(void *data) -{ - struct io_wq *wq = data; - int node; - - /* create fixed workers */ - refcount_set(&wq->refs, 1); - for_each_node(node) { - if (!node_online(node)) - continue; - if (create_io_worker(wq, wq->wqes[node], IO_WQ_ACCT_BOUND)) - continue; - set_bit(IO_WQ_BIT_ERROR, &wq->state); - set_bit(IO_WQ_BIT_EXIT, &wq->state); - goto out; - } - - complete(&wq->done); - - while (!kthread_should_stop()) { - if (current->task_works) - task_work_run(); - - for_each_node(node) { - struct io_wqe *wqe = wq->wqes[node]; - bool fork_worker[2] = { false, false }; - - if (!node_online(node)) - continue; - - raw_spin_lock_irq(&wqe->lock); - if (io_wqe_need_worker(wqe, IO_WQ_ACCT_BOUND)) - fork_worker[IO_WQ_ACCT_BOUND] = true; - if (io_wqe_need_worker(wqe, IO_WQ_ACCT_UNBOUND)) - fork_worker[IO_WQ_ACCT_UNBOUND] = true; - raw_spin_unlock_irq(&wqe->lock); - if (fork_worker[IO_WQ_ACCT_BOUND]) - create_io_worker(wq, wqe, IO_WQ_ACCT_BOUND); - if (fork_worker[IO_WQ_ACCT_UNBOUND]) - create_io_worker(wq, wqe, IO_WQ_ACCT_UNBOUND); - } - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(HZ); - } - - if (current->task_works) - task_work_run(); - -out: - if (refcount_dec_and_test(&wq->refs)) { - complete(&wq->done); - return 0; - } - /* if ERROR is set and we get here, we have workers to wake */ - if (test_bit(IO_WQ_BIT_ERROR, &wq->state)) { - rcu_read_lock(); - for_each_node(node) - io_wq_for_each_worker(wq->wqes[node], io_wq_worker_wake, NULL); - rcu_read_unlock(); - } - return 0; -} - -static bool io_wq_can_queue(struct io_wqe *wqe, struct io_wqe_acct *acct, - struct io_wq_work *work) -{ - bool free_worker; - - if (!(work->flags & IO_WQ_WORK_UNBOUND)) - return true; - if (atomic_read(&acct->nr_running)) - return true; - - rcu_read_lock(); - free_worker = !hlist_nulls_empty(&wqe->free_list); - rcu_read_unlock(); - if (free_worker) - return true; - - if (atomic_read(&wqe->wq->user->processes) >= acct->max_workers && - !(capable(CAP_SYS_RESOURCE) || capable(CAP_SYS_ADMIN))) - return false; - - return true; -} - -static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe) -{ - struct io_wq *wq = wqe->wq; - - do { - struct io_wq_work *old_work = work; - - work->flags |= IO_WQ_WORK_CANCEL; - work = wq->do_work(work); - wq->free_work(old_work); - } while (work); -} - -static void io_wqe_insert_work(struct io_wqe *wqe, struct io_wq_work *work) -{ - unsigned int hash; - struct io_wq_work *tail; - - if (!io_wq_is_hashed(work)) { -append: - wq_list_add_tail(&work->list, &wqe->work_list); - return; - } - - hash = io_get_work_hash(work); - tail = wqe->hash_tail[hash]; - wqe->hash_tail[hash] = work; - if (!tail) - goto append; - - wq_list_add_after(&work->list, &tail->list, &wqe->work_list); -} - -static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work) -{ - struct io_wqe_acct *acct = io_work_get_acct(wqe, work); - bool do_wake; - unsigned long flags; - - /* - * Do early check to see if we need a new unbound worker, and if we do, - * if we're allowed to do so. This isn't 100% accurate as there's a - * gap between this check and incrementing the value, but that's OK. - * It's close enough to not be an issue, fork() has the same delay. - */ - if (unlikely(!io_wq_can_queue(wqe, acct, work))) { - io_run_cancel(work, wqe); - return; - } - - raw_spin_lock_irqsave(&wqe->lock, flags); - io_wqe_insert_work(wqe, work); - wqe->flags &= ~IO_WQE_FLAG_STALLED; - do_wake = (work->flags & IO_WQ_WORK_CONCURRENT) || - !atomic_read(&acct->nr_running); - raw_spin_unlock_irqrestore(&wqe->lock, flags); - - if (do_wake) - io_wqe_wake_worker(wqe, acct); -} - -void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work) -{ - struct io_wqe *wqe = wq->wqes[numa_node_id()]; - - io_wqe_enqueue(wqe, work); -} - -/* - * Work items that hash to the same value will not be done in parallel. - * Used to limit concurrent writes, generally hashed by inode. - */ -void io_wq_hash_work(struct io_wq_work *work, void *val) -{ - unsigned int bit; - - bit = hash_ptr(val, IO_WQ_HASH_ORDER); - work->flags |= (IO_WQ_WORK_HASHED | (bit << IO_WQ_HASH_SHIFT)); -} - -void io_wq_cancel_all(struct io_wq *wq) -{ - int node; - - set_bit(IO_WQ_BIT_CANCEL, &wq->state); - - rcu_read_lock(); - for_each_node(node) { - struct io_wqe *wqe = wq->wqes[node]; - - io_wq_for_each_worker(wqe, io_wqe_worker_send_sig, NULL); - } - rcu_read_unlock(); -} - -struct io_cb_cancel_data { - work_cancel_fn *fn; - void *data; - int nr_running; - int nr_pending; - bool cancel_all; -}; - -static bool io_wq_worker_cancel(struct io_worker *worker, void *data) -{ - struct io_cb_cancel_data *match = data; - unsigned long flags; - - /* - * Hold the lock to avoid ->cur_work going out of scope, caller - * may dereference the passed in work. - */ - spin_lock_irqsave(&worker->lock, flags); - if (worker->cur_work && - !(worker->cur_work->flags & IO_WQ_WORK_NO_CANCEL) && - match->fn(worker->cur_work, match->data)) { - send_sig(SIGINT, worker->task, 1); - match->nr_running++; - } - spin_unlock_irqrestore(&worker->lock, flags); - - return match->nr_running && !match->cancel_all; -} - -static inline void io_wqe_remove_pending(struct io_wqe *wqe, - struct io_wq_work *work, - struct io_wq_work_node *prev) -{ - unsigned int hash = io_get_work_hash(work); - struct io_wq_work *prev_work = NULL; - - if (io_wq_is_hashed(work) && work == wqe->hash_tail[hash]) { - if (prev) - prev_work = container_of(prev, struct io_wq_work, list); - if (prev_work && io_get_work_hash(prev_work) == hash) - wqe->hash_tail[hash] = prev_work; - else - wqe->hash_tail[hash] = NULL; - } - wq_list_del(&wqe->work_list, &work->list, prev); -} - -static void io_wqe_cancel_pending_work(struct io_wqe *wqe, - struct io_cb_cancel_data *match) -{ - struct io_wq_work_node *node, *prev; - struct io_wq_work *work; - unsigned long flags; - -retry: - raw_spin_lock_irqsave(&wqe->lock, flags); - wq_list_for_each(node, prev, &wqe->work_list) { - work = container_of(node, struct io_wq_work, list); - if (!match->fn(work, match->data)) - continue; - io_wqe_remove_pending(wqe, work, prev); - raw_spin_unlock_irqrestore(&wqe->lock, flags); - io_run_cancel(work, wqe); - match->nr_pending++; - if (!match->cancel_all) - return; - - /* not safe to continue after unlock */ - goto retry; - } - raw_spin_unlock_irqrestore(&wqe->lock, flags); -} - -static void io_wqe_cancel_running_work(struct io_wqe *wqe, - struct io_cb_cancel_data *match) -{ - rcu_read_lock(); - io_wq_for_each_worker(wqe, io_wq_worker_cancel, match); - rcu_read_unlock(); -} - -enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel, - void *data, bool cancel_all) -{ - struct io_cb_cancel_data match = { - .fn = cancel, - .data = data, - .cancel_all = cancel_all, - }; - int node; - - /* - * First check pending list, if we're lucky we can just remove it - * from there. CANCEL_OK means that the work is returned as-new, - * no completion will be posted for it. - */ - for_each_node(node) { - struct io_wqe *wqe = wq->wqes[node]; - - io_wqe_cancel_pending_work(wqe, &match); - if (match.nr_pending && !match.cancel_all) - return IO_WQ_CANCEL_OK; - } - - /* - * Now check if a free (going busy) or busy worker has the work - * currently running. If we find it there, we'll return CANCEL_RUNNING - * as an indication that we attempt to signal cancellation. The - * completion will run normally in this case. - */ - for_each_node(node) { - struct io_wqe *wqe = wq->wqes[node]; - - io_wqe_cancel_running_work(wqe, &match); - if (match.nr_running && !match.cancel_all) - return IO_WQ_CANCEL_RUNNING; - } - - if (match.nr_running) - return IO_WQ_CANCEL_RUNNING; - if (match.nr_pending) - return IO_WQ_CANCEL_OK; - return IO_WQ_CANCEL_NOTFOUND; -} - -struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) -{ - int ret = -ENOMEM, node; - struct io_wq *wq; - - if (WARN_ON_ONCE(!data->free_work || !data->do_work)) - return ERR_PTR(-EINVAL); - if (WARN_ON_ONCE(!bounded)) - return ERR_PTR(-EINVAL); - - wq = kzalloc(sizeof(*wq), GFP_KERNEL); - if (!wq) - return ERR_PTR(-ENOMEM); - - wq->wqes = kcalloc(nr_node_ids, sizeof(struct io_wqe *), GFP_KERNEL); - if (!wq->wqes) - goto err_wq; - - ret = cpuhp_state_add_instance_nocalls(io_wq_online, &wq->cpuhp_node); - if (ret) - goto err_wqes; - - wq->free_work = data->free_work; - wq->do_work = data->do_work; - - /* caller must already hold a reference to this */ - wq->user = data->user; - - ret = -ENOMEM; - for_each_node(node) { - struct io_wqe *wqe; - int alloc_node = node; - - if (!node_online(alloc_node)) - alloc_node = NUMA_NO_NODE; - wqe = kzalloc_node(sizeof(struct io_wqe), GFP_KERNEL, alloc_node); - if (!wqe) - goto err; - wq->wqes[node] = wqe; - wqe->node = alloc_node; - wqe->acct[IO_WQ_ACCT_BOUND].max_workers = bounded; - atomic_set(&wqe->acct[IO_WQ_ACCT_BOUND].nr_running, 0); - if (wq->user) { - wqe->acct[IO_WQ_ACCT_UNBOUND].max_workers = - task_rlimit(current, RLIMIT_NPROC); - } - atomic_set(&wqe->acct[IO_WQ_ACCT_UNBOUND].nr_running, 0); - wqe->wq = wq; - raw_spin_lock_init(&wqe->lock); - INIT_WQ_LIST(&wqe->work_list); - INIT_HLIST_NULLS_HEAD(&wqe->free_list, 0); - INIT_LIST_HEAD(&wqe->all_list); - } - - init_completion(&wq->done); - - wq->manager = kthread_create(io_wq_manager, wq, "io_wq_manager"); - if (!IS_ERR(wq->manager)) { - wake_up_process(wq->manager); - wait_for_completion(&wq->done); - if (test_bit(IO_WQ_BIT_ERROR, &wq->state)) { - ret = -ENOMEM; - goto err; - } - refcount_set(&wq->use_refs, 1); - reinit_completion(&wq->done); - return wq; - } - - ret = PTR_ERR(wq->manager); - complete(&wq->done); -err: - cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node); - for_each_node(node) - kfree(wq->wqes[node]); -err_wqes: - kfree(wq->wqes); -err_wq: - kfree(wq); - return ERR_PTR(ret); -} - -bool io_wq_get(struct io_wq *wq, struct io_wq_data *data) -{ - if (data->free_work != wq->free_work || data->do_work != wq->do_work) - return false; - - return refcount_inc_not_zero(&wq->use_refs); -} - -static void __io_wq_destroy(struct io_wq *wq) -{ - int node; - - cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node); - - set_bit(IO_WQ_BIT_EXIT, &wq->state); - if (wq->manager) - kthread_stop(wq->manager); - - rcu_read_lock(); - for_each_node(node) - io_wq_for_each_worker(wq->wqes[node], io_wq_worker_wake, NULL); - rcu_read_unlock(); - - wait_for_completion(&wq->done); - - for_each_node(node) - kfree(wq->wqes[node]); - kfree(wq->wqes); - kfree(wq); -} - -void io_wq_destroy(struct io_wq *wq) -{ - if (refcount_dec_and_test(&wq->use_refs)) - __io_wq_destroy(wq); -} - -struct task_struct *io_wq_get_task(struct io_wq *wq) -{ - return wq->manager; -} - -static bool io_wq_worker_affinity(struct io_worker *worker, void *data) -{ - struct task_struct *task = worker->task; - struct rq_flags rf; - struct rq *rq; - - rq = task_rq_lock(task, &rf); - do_set_cpus_allowed(task, cpumask_of_node(worker->wqe->node)); - task->flags |= PF_NO_SETAFFINITY; - task_rq_unlock(rq, task, &rf); - return false; -} - -static int io_wq_cpu_online(unsigned int cpu, struct hlist_node *node) -{ - struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node); - int i; - - rcu_read_lock(); - for_each_node(i) - io_wq_for_each_worker(wq->wqes[i], io_wq_worker_affinity, NULL); - rcu_read_unlock(); - return 0; -} - -static __init int io_wq_init(void) -{ - int ret; - - ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "io-wq/online", - io_wq_cpu_online, NULL); - if (ret < 0) - return ret; - io_wq_online = ret; - return 0; -} -subsys_initcall(io_wq_init); diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 35b2d845704d..649a4d7c241b 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -5,50 +5,20 @@ #include #include -struct io_identity { - struct files_struct *files; - struct mm_struct *mm; -#ifdef CONFIG_BLK_CGROUP - struct cgroup_subsys_state *blkcg_css; -#endif - const struct cred *creds; - struct nsproxy *nsproxy; - struct fs_struct *fs; - unsigned long fsize; -#ifdef CONFIG_AUDIT - kuid_t loginuid; - unsigned int sessionid; -#endif - refcount_t count; -}; - -struct io_uring_task { - /* submission side */ - struct xarray xa; - struct wait_queue_head wait; - struct file *last; - struct percpu_counter inflight; - struct io_identity __identity; - struct io_identity *identity; - atomic_t in_idle; - bool sqpoll; -}; - #if defined(CONFIG_IO_URING) struct sock *io_uring_get_socket(struct file *file); -void __io_uring_task_cancel(void); -void __io_uring_files_cancel(struct files_struct *files); +void __io_uring_cancel(bool cancel_all); void __io_uring_free(struct task_struct *tsk); +static inline void io_uring_files_cancel(void) +{ + if (current->io_uring) + __io_uring_cancel(false); +} static inline void io_uring_task_cancel(void) { - if (current->io_uring && !xa_empty(¤t->io_uring->xa)) - __io_uring_task_cancel(); -} -static inline void io_uring_files_cancel(struct files_struct *files) -{ - if (current->io_uring && !xa_empty(¤t->io_uring->xa)) - __io_uring_files_cancel(files); + if (current->io_uring) + __io_uring_cancel(true); } static inline void io_uring_free(struct task_struct *tsk) { @@ -63,7 +33,7 @@ static inline struct sock *io_uring_get_socket(struct file *file) static inline void io_uring_task_cancel(void) { } -static inline void io_uring_files_cancel(struct files_struct *files) +static inline void io_uring_files_cancel(void) { } static inline void io_uring_free(struct task_struct *tsk) diff --git a/include/linux/sched.h b/include/linux/sched.h index e947c66d5e65..b8b8f18bbbbb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -901,6 +901,9 @@ struct task_struct { /* CLONE_CHILD_CLEARTID: */ int __user *clear_child_tid; + /* PF_IO_WORKER */ + void *pf_io_worker; + u64 utime; u64 stime; #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 112ff24ea927..1c170be3f746 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -341,7 +341,7 @@ asmlinkage long sys_io_uring_setup(u32 entries, struct io_uring_params __user *p); asmlinkage long sys_io_uring_enter(unsigned int fd, u32 to_submit, u32 min_complete, u32 flags, - const sigset_t __user *sig, size_t sigsz); + const void __user *argp, size_t argsz); asmlinkage long sys_io_uring_register(unsigned int fd, unsigned int op, void __user *arg, unsigned int nr_args); diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h index 9f0d3b7d56b0..0dd30de00e5b 100644 --- a/include/trace/events/io_uring.h +++ b/include/trace/events/io_uring.h @@ -12,11 +12,11 @@ struct io_wq_work; /** * io_uring_create - called after a new io_uring context was prepared * - * @fd: corresponding file descriptor - * @ctx: pointer to a ring context structure + * @fd: corresponding file descriptor + * @ctx: pointer to a ring context structure * @sq_entries: actual SQ size * @cq_entries: actual CQ size - * @flags: SQ ring flags, provided to io_uring_setup(2) + * @flags: SQ ring flags, provided to io_uring_setup(2) * * Allows to trace io_uring creation and provide pointer to a context, that can * be used later to find correlated events. @@ -49,15 +49,15 @@ TRACE_EVENT(io_uring_create, ); /** - * io_uring_register - called after a buffer/file/eventfd was succesfully + * io_uring_register - called after a buffer/file/eventfd was successfully * registered for a ring * - * @ctx: pointer to a ring context structure - * @opcode: describes which operation to perform + * @ctx: pointer to a ring context structure + * @opcode: describes which operation to perform * @nr_user_files: number of registered files * @nr_user_bufs: number of registered buffers * @cq_ev_fd: whether eventfs registered or not - * @ret: return code + * @ret: return code * * Allows to trace fixed files/buffers/eventfds, that could be registered to * avoid an overhead of getting references to them for every operation. This @@ -142,16 +142,16 @@ TRACE_EVENT(io_uring_queue_async_work, TP_ARGS(ctx, rw, req, work, flags), TP_STRUCT__entry ( - __field( void *, ctx ) - __field( int, rw ) - __field( void *, req ) + __field( void *, ctx ) + __field( int, rw ) + __field( void *, req ) __field( struct io_wq_work *, work ) __field( unsigned int, flags ) ), TP_fast_assign( __entry->ctx = ctx; - __entry->rw = rw; + __entry->rw = rw; __entry->req = req; __entry->work = work; __entry->flags = flags; @@ -196,10 +196,10 @@ TRACE_EVENT(io_uring_defer, /** * io_uring_link - called before the io_uring request added into link_list of - * another request + * another request * - * @ctx: pointer to a ring context structure - * @req: pointer to a linked request + * @ctx: pointer to a ring context structure + * @req: pointer to a linked request * @target_req: pointer to a previous request, that would contain @req * * Allows to track linked requests, to understand dependencies between requests @@ -212,8 +212,8 @@ TRACE_EVENT(io_uring_link, TP_ARGS(ctx, req, target_req), TP_STRUCT__entry ( - __field( void *, ctx ) - __field( void *, req ) + __field( void *, ctx ) + __field( void *, req ) __field( void *, target_req ) ), @@ -244,7 +244,7 @@ TRACE_EVENT(io_uring_cqring_wait, TP_ARGS(ctx, min_events), TP_STRUCT__entry ( - __field( void *, ctx ) + __field( void *, ctx ) __field( int, min_events ) ), @@ -272,7 +272,7 @@ TRACE_EVENT(io_uring_fail_link, TP_ARGS(req, link), TP_STRUCT__entry ( - __field( void *, req ) + __field( void *, req ) __field( void *, link ) ), @@ -290,38 +290,42 @@ TRACE_EVENT(io_uring_fail_link, * @ctx: pointer to a ring context structure * @user_data: user data associated with the request * @res: result of the request + * @cflags: completion flags * */ TRACE_EVENT(io_uring_complete, - TP_PROTO(void *ctx, u64 user_data, long res), + TP_PROTO(void *ctx, u64 user_data, int res, unsigned cflags), - TP_ARGS(ctx, user_data, res), + TP_ARGS(ctx, user_data, res, cflags), TP_STRUCT__entry ( __field( void *, ctx ) __field( u64, user_data ) - __field( long, res ) + __field( int, res ) + __field( unsigned, cflags ) ), TP_fast_assign( __entry->ctx = ctx; __entry->user_data = user_data; __entry->res = res; + __entry->cflags = cflags; ), - TP_printk("ring %p, user_data 0x%llx, result %ld", + TP_printk("ring %p, user_data 0x%llx, result %d, cflags %x", __entry->ctx, (unsigned long long)__entry->user_data, - __entry->res) + __entry->res, __entry->cflags) ); - /** * io_uring_submit_sqe - called before submitting one SQE * * @ctx: pointer to a ring context structure + * @req: pointer to a submitted request * @opcode: opcode of request * @user_data: user data associated with the request + * @flags request flags * @force_nonblock: whether a context blocking or not * @sq_thread: true if sq_thread has submitted this SQE * @@ -330,41 +334,60 @@ TRACE_EVENT(io_uring_complete, */ TRACE_EVENT(io_uring_submit_sqe, - TP_PROTO(void *ctx, u8 opcode, u64 user_data, bool force_nonblock, - bool sq_thread), + TP_PROTO(void *ctx, void *req, u8 opcode, u64 user_data, u32 flags, + bool force_nonblock, bool sq_thread), - TP_ARGS(ctx, opcode, user_data, force_nonblock, sq_thread), + TP_ARGS(ctx, req, opcode, user_data, flags, force_nonblock, sq_thread), TP_STRUCT__entry ( __field( void *, ctx ) + __field( void *, req ) __field( u8, opcode ) __field( u64, user_data ) + __field( u32, flags ) __field( bool, force_nonblock ) __field( bool, sq_thread ) ), TP_fast_assign( __entry->ctx = ctx; + __entry->req = req; __entry->opcode = opcode; __entry->user_data = user_data; + __entry->flags = flags; __entry->force_nonblock = force_nonblock; __entry->sq_thread = sq_thread; ), - TP_printk("ring %p, op %d, data 0x%llx, non block %d, sq_thread %d", - __entry->ctx, __entry->opcode, - (unsigned long long) __entry->user_data, - __entry->force_nonblock, __entry->sq_thread) + TP_printk("ring %p, req %p, op %d, data 0x%llx, flags %u, " + "non block %d, sq_thread %d", __entry->ctx, __entry->req, + __entry->opcode, (unsigned long long)__entry->user_data, + __entry->flags, __entry->force_nonblock, __entry->sq_thread) ); +/* + * io_uring_poll_arm - called after arming a poll wait if successful + * + * @ctx: pointer to a ring context structure + * @req: pointer to the armed request + * @opcode: opcode of request + * @user_data: user data associated with the request + * @mask: request poll events mask + * @events: registered events of interest + * + * Allows to track which fds are waiting for and what are the events of + * interest. + */ TRACE_EVENT(io_uring_poll_arm, - TP_PROTO(void *ctx, u8 opcode, u64 user_data, int mask, int events), + TP_PROTO(void *ctx, void *req, u8 opcode, u64 user_data, + int mask, int events), - TP_ARGS(ctx, opcode, user_data, mask, events), + TP_ARGS(ctx, req, opcode, user_data, mask, events), TP_STRUCT__entry ( __field( void *, ctx ) + __field( void *, req ) __field( u8, opcode ) __field( u64, user_data ) __field( int, mask ) @@ -373,16 +396,17 @@ TRACE_EVENT(io_uring_poll_arm, TP_fast_assign( __entry->ctx = ctx; + __entry->req = req; __entry->opcode = opcode; __entry->user_data = user_data; __entry->mask = mask; __entry->events = events; ), - TP_printk("ring %p, op %d, data 0x%llx, mask 0x%x, events 0x%x", - __entry->ctx, __entry->opcode, - (unsigned long long) __entry->user_data, - __entry->mask, __entry->events) + TP_printk("ring %p, req %p, op %d, data 0x%llx, mask 0x%x, events 0x%x", + __entry->ctx, __entry->req, __entry->opcode, + (unsigned long long) __entry->user_data, + __entry->mask, __entry->events) ); TRACE_EVENT(io_uring_poll_wake, @@ -437,27 +461,40 @@ TRACE_EVENT(io_uring_task_add, __entry->mask) ); +/* + * io_uring_task_run - called when task_work_run() executes the poll events + * notification callbacks + * + * @ctx: pointer to a ring context structure + * @req: pointer to the armed request + * @opcode: opcode of request + * @user_data: user data associated with the request + * + * Allows to track when notified poll events are processed + */ TRACE_EVENT(io_uring_task_run, - TP_PROTO(void *ctx, u8 opcode, u64 user_data), + TP_PROTO(void *ctx, void *req, u8 opcode, u64 user_data), - TP_ARGS(ctx, opcode, user_data), + TP_ARGS(ctx, req, opcode, user_data), TP_STRUCT__entry ( __field( void *, ctx ) + __field( void *, req ) __field( u8, opcode ) __field( u64, user_data ) ), TP_fast_assign( __entry->ctx = ctx; + __entry->req = req; __entry->opcode = opcode; __entry->user_data = user_data; ), - TP_printk("ring %p, op %d, data 0x%llx", - __entry->ctx, __entry->opcode, - (unsigned long long) __entry->user_data) + TP_printk("ring %p, req %p, op %d, data 0x%llx", + __entry->ctx, __entry->req, __entry->opcode, + (unsigned long long) __entry->user_data) ); #endif /* _TRACE_IO_URING_H */ diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 98d8e06dea22..6481db937002 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -42,23 +42,25 @@ struct io_uring_sqe { __u32 statx_flags; __u32 fadvise_advice; __u32 splice_flags; + __u32 rename_flags; + __u32 unlink_flags; + __u32 hardlink_flags; }; __u64 user_data; /* data to be passed back at completion time */ + /* pack this to avoid bogus arm OABI complaints */ union { - struct { - /* pack this to avoid bogus arm OABI complaints */ - union { - /* index into fixed buffers, if used */ - __u16 buf_index; - /* for grouped buffer selection */ - __u16 buf_group; - } __attribute__((packed)); - /* personality to use, if used */ - __u16 personality; - __s32 splice_fd_in; - }; - __u64 __pad2[3]; + /* index into fixed buffers, if used */ + __u16 buf_index; + /* for grouped buffer selection */ + __u16 buf_group; + } __attribute__((packed)); + /* personality to use, if used */ + __u16 personality; + union { + __s32 splice_fd_in; + __u32 file_index; }; + __u64 __pad2[2]; }; enum { @@ -132,6 +134,9 @@ enum { IORING_OP_PROVIDE_BUFFERS, IORING_OP_REMOVE_BUFFERS, IORING_OP_TEE, + IORING_OP_SHUTDOWN, + IORING_OP_RENAMEAT, + IORING_OP_UNLINKAT, /* this goes last, obviously */ IORING_OP_LAST, @@ -145,14 +150,34 @@ enum { /* * sqe->timeout_flags */ -#define IORING_TIMEOUT_ABS (1U << 0) - +#define IORING_TIMEOUT_ABS (1U << 0) +#define IORING_TIMEOUT_UPDATE (1U << 1) +#define IORING_TIMEOUT_BOOTTIME (1U << 2) +#define IORING_TIMEOUT_REALTIME (1U << 3) +#define IORING_LINK_TIMEOUT_UPDATE (1U << 4) +#define IORING_TIMEOUT_CLOCK_MASK (IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME) +#define IORING_TIMEOUT_UPDATE_MASK (IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE) /* * sqe->splice_flags * extends splice(2) flags */ #define SPLICE_F_FD_IN_FIXED (1U << 31) /* the last bit of __u32 */ +/* + * POLL_ADD flags. Note that since sqe->poll_events is the flag space, the + * command flags for POLL_ADD are stored in sqe->len. + * + * IORING_POLL_ADD_MULTI Multishot poll. Sets IORING_CQE_F_MORE if + * the poll handler will continue to report + * CQEs on behalf of the same SQE. + * + * IORING_POLL_UPDATE Update existing poll request, matching + * sqe->addr as the old user_data field. + */ +#define IORING_POLL_ADD_MULTI (1U << 0) +#define IORING_POLL_UPDATE_EVENTS (1U << 1) +#define IORING_POLL_UPDATE_USER_DATA (1U << 2) + /* * IO completion data structure (Completion Queue Entry) */ @@ -166,8 +191,10 @@ struct io_uring_cqe { * cqe->flags * * IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID + * IORING_CQE_F_MORE If set, parent SQE will generate more CQE entries */ #define IORING_CQE_F_BUFFER (1U << 0) +#define IORING_CQE_F_MORE (1U << 1) enum { IORING_CQE_BUFFER_SHIFT = 16, @@ -226,6 +253,7 @@ struct io_cqring_offsets { #define IORING_ENTER_GETEVENTS (1U << 0) #define IORING_ENTER_SQ_WAKEUP (1U << 1) #define IORING_ENTER_SQ_WAIT (1U << 2) +#define IORING_ENTER_EXT_ARG (1U << 3) /* * Passed in for io_uring_setup(2). Copied back with updated info on success @@ -253,6 +281,10 @@ struct io_uring_params { #define IORING_FEAT_CUR_PERSONALITY (1U << 4) #define IORING_FEAT_FAST_POLL (1U << 5) #define IORING_FEAT_POLL_32BITS (1U << 6) +#define IORING_FEAT_SQPOLL_NONFIXED (1U << 7) +#define IORING_FEAT_EXT_ARG (1U << 8) +#define IORING_FEAT_NATIVE_WORKERS (1U << 9) +#define IORING_FEAT_RSRC_TAGS (1U << 10) /* * io_uring_register(2) opcodes and arguments @@ -272,16 +304,62 @@ enum { IORING_REGISTER_RESTRICTIONS = 11, IORING_REGISTER_ENABLE_RINGS = 12, + /* extended with tagging */ + IORING_REGISTER_FILES2 = 13, + IORING_REGISTER_FILES_UPDATE2 = 14, + IORING_REGISTER_BUFFERS2 = 15, + IORING_REGISTER_BUFFERS_UPDATE = 16, + + /* set/clear io-wq thread affinities */ + IORING_REGISTER_IOWQ_AFF = 17, + IORING_UNREGISTER_IOWQ_AFF = 18, + + /* set/get max number of io-wq workers */ + IORING_REGISTER_IOWQ_MAX_WORKERS = 19, + /* this goes last */ IORING_REGISTER_LAST }; +/* io-wq worker categories */ +enum { + IO_WQ_BOUND, + IO_WQ_UNBOUND, +}; + +/* deprecated, see struct io_uring_rsrc_update */ struct io_uring_files_update { __u32 offset; __u32 resv; __aligned_u64 /* __s32 * */ fds; }; +struct io_uring_rsrc_register { + __u32 nr; + __u32 resv; + __u64 resv2; + __aligned_u64 data; + __aligned_u64 tags; +}; + +struct io_uring_rsrc_update { + __u32 offset; + __u32 resv; + __aligned_u64 data; +}; + +struct io_uring_rsrc_update2 { + __u32 offset; + __u32 resv; + __aligned_u64 data; + __aligned_u64 tags; + __u32 nr; + __u32 resv2; +}; + +/* Skip updating fd indexes set to this value in the fd table */ +#define IORING_REGISTER_FILES_SKIP (-2) + #define IO_URING_OP_SUPPORTED (1U << 0) struct io_uring_probe_op { @@ -329,4 +407,11 @@ enum { IORING_RESTRICTION_LAST }; +struct io_uring_getevents_arg { + __u64 sigmask; + __u32 sigmask_sz; + __u32 pad; + __u64 ts; +}; + #endif diff --git a/io_uring/Makefile b/io_uring/Makefile new file mode 100644 index 000000000000..3680425df947 --- /dev/null +++ b/io_uring/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for io_uring + +obj-$(CONFIG_IO_URING) += io_uring.o +obj-$(CONFIG_IO_WQ) += io-wq.o diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c new file mode 100644 index 000000000000..6031fb319d87 --- /dev/null +++ b/io_uring/io-wq.c @@ -0,0 +1,1398 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Basic worker thread pool for io_uring + * + * Copyright (C) 2019 Jens Axboe + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "io-wq.h" + +#define WORKER_IDLE_TIMEOUT (5 * HZ) + +enum { + IO_WORKER_F_UP = 1, /* up and active */ + IO_WORKER_F_RUNNING = 2, /* account as running */ + IO_WORKER_F_FREE = 4, /* worker on free list */ + IO_WORKER_F_BOUND = 8, /* is doing bounded work */ +}; + +enum { + IO_WQ_BIT_EXIT = 0, /* wq exiting */ +}; + +enum { + IO_ACCT_STALLED_BIT = 0, /* stalled on hash */ +}; + +/* + * One for each thread in a wqe pool + */ +struct io_worker { + refcount_t ref; + unsigned flags; + struct hlist_nulls_node nulls_node; + struct list_head all_list; + struct task_struct *task; + struct io_wqe *wqe; + + struct io_wq_work *cur_work; + spinlock_t lock; + + struct completion ref_done; + + unsigned long create_state; + struct callback_head create_work; + int create_index; + + union { + struct rcu_head rcu; + struct work_struct work; + }; +}; + +#if BITS_PER_LONG == 64 +#define IO_WQ_HASH_ORDER 6 +#else +#define IO_WQ_HASH_ORDER 5 +#endif + +#define IO_WQ_NR_HASH_BUCKETS (1u << IO_WQ_HASH_ORDER) + +struct io_wqe_acct { + unsigned nr_workers; + unsigned max_workers; + int index; + atomic_t nr_running; + struct io_wq_work_list work_list; + unsigned long flags; +}; + +enum { + IO_WQ_ACCT_BOUND, + IO_WQ_ACCT_UNBOUND, + IO_WQ_ACCT_NR, +}; + +/* + * Per-node worker thread pool + */ +struct io_wqe { + raw_spinlock_t lock; + struct io_wqe_acct acct[2]; + + int node; + + struct hlist_nulls_head free_list; + struct list_head all_list; + + struct wait_queue_entry wait; + + struct io_wq *wq; + struct io_wq_work *hash_tail[IO_WQ_NR_HASH_BUCKETS]; + + cpumask_var_t cpu_mask; +}; + +/* + * Per io_wq state + */ +struct io_wq { + unsigned long state; + + free_work_fn *free_work; + io_wq_work_fn *do_work; + + struct io_wq_hash *hash; + + atomic_t worker_refs; + struct completion worker_done; + + struct hlist_node cpuhp_node; + + struct task_struct *task; + + struct io_wqe *wqes[]; +}; + +static enum cpuhp_state io_wq_online; + +struct io_cb_cancel_data { + work_cancel_fn *fn; + void *data; + int nr_running; + int nr_pending; + bool cancel_all; +}; + +static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index); +static void io_wqe_dec_running(struct io_worker *worker); +static bool io_acct_cancel_pending_work(struct io_wqe *wqe, + struct io_wqe_acct *acct, + struct io_cb_cancel_data *match); +static void create_worker_cb(struct callback_head *cb); +static void io_wq_cancel_tw_create(struct io_wq *wq); + +static bool io_worker_get(struct io_worker *worker) +{ + return refcount_inc_not_zero(&worker->ref); +} + +static void io_worker_release(struct io_worker *worker) +{ + if (refcount_dec_and_test(&worker->ref)) + complete(&worker->ref_done); +} + +static inline struct io_wqe_acct *io_get_acct(struct io_wqe *wqe, bool bound) +{ + return &wqe->acct[bound ? IO_WQ_ACCT_BOUND : IO_WQ_ACCT_UNBOUND]; +} + +static inline struct io_wqe_acct *io_work_get_acct(struct io_wqe *wqe, + struct io_wq_work *work) +{ + return io_get_acct(wqe, !(work->flags & IO_WQ_WORK_UNBOUND)); +} + +static inline struct io_wqe_acct *io_wqe_get_acct(struct io_worker *worker) +{ + return io_get_acct(worker->wqe, worker->flags & IO_WORKER_F_BOUND); +} + +static void io_worker_ref_put(struct io_wq *wq) +{ + if (atomic_dec_and_test(&wq->worker_refs)) + complete(&wq->worker_done); +} + +static void io_worker_cancel_cb(struct io_worker *worker) +{ + struct io_wqe_acct *acct = io_wqe_get_acct(worker); + struct io_wqe *wqe = worker->wqe; + struct io_wq *wq = wqe->wq; + + atomic_dec(&acct->nr_running); + raw_spin_lock(&worker->wqe->lock); + acct->nr_workers--; + raw_spin_unlock(&worker->wqe->lock); + io_worker_ref_put(wq); + clear_bit_unlock(0, &worker->create_state); + io_worker_release(worker); +} + +static bool io_task_worker_match(struct callback_head *cb, void *data) +{ + struct io_worker *worker; + + if (cb->func != create_worker_cb) + return false; + worker = container_of(cb, struct io_worker, create_work); + return worker == data; +} + +static void io_worker_exit(struct io_worker *worker) +{ + struct io_wqe *wqe = worker->wqe; + struct io_wq *wq = wqe->wq; + + while (1) { + struct callback_head *cb = task_work_cancel_match(wq->task, + io_task_worker_match, worker); + + if (!cb) + break; + io_worker_cancel_cb(worker); + } + + if (refcount_dec_and_test(&worker->ref)) + complete(&worker->ref_done); + wait_for_completion(&worker->ref_done); + + raw_spin_lock(&wqe->lock); + if (worker->flags & IO_WORKER_F_FREE) + hlist_nulls_del_rcu(&worker->nulls_node); + list_del_rcu(&worker->all_list); + preempt_disable(); + io_wqe_dec_running(worker); + worker->flags = 0; + current->flags &= ~PF_IO_WORKER; + preempt_enable(); + raw_spin_unlock(&wqe->lock); + + kfree_rcu(worker, rcu); + io_worker_ref_put(wqe->wq); + do_exit(0); +} + +static inline bool io_acct_run_queue(struct io_wqe_acct *acct) +{ + if (!wq_list_empty(&acct->work_list) && + !test_bit(IO_ACCT_STALLED_BIT, &acct->flags)) + return true; + return false; +} + +/* + * Check head of free list for an available worker. If one isn't available, + * caller must create one. + */ +static bool io_wqe_activate_free_worker(struct io_wqe *wqe, + struct io_wqe_acct *acct) + __must_hold(RCU) +{ + struct hlist_nulls_node *n; + struct io_worker *worker; + + /* + * Iterate free_list and see if we can find an idle worker to + * activate. If a given worker is on the free_list but in the process + * of exiting, keep trying. + */ + hlist_nulls_for_each_entry_rcu(worker, n, &wqe->free_list, nulls_node) { + if (!io_worker_get(worker)) + continue; + if (io_wqe_get_acct(worker) != acct) { + io_worker_release(worker); + continue; + } + if (wake_up_process(worker->task)) { + io_worker_release(worker); + return true; + } + io_worker_release(worker); + } + + return false; +} + +/* + * We need a worker. If we find a free one, we're good. If not, and we're + * below the max number of workers, create one. + */ +static bool io_wqe_create_worker(struct io_wqe *wqe, struct io_wqe_acct *acct) +{ + /* + * Most likely an attempt to queue unbounded work on an io_wq that + * wasn't setup with any unbounded workers. + */ + if (unlikely(!acct->max_workers)) + pr_warn_once("io-wq is not configured for unbound workers"); + + raw_spin_lock(&wqe->lock); + if (acct->nr_workers >= acct->max_workers) { + raw_spin_unlock(&wqe->lock); + return true; + } + acct->nr_workers++; + raw_spin_unlock(&wqe->lock); + atomic_inc(&acct->nr_running); + atomic_inc(&wqe->wq->worker_refs); + return create_io_worker(wqe->wq, wqe, acct->index); +} + +static void io_wqe_inc_running(struct io_worker *worker) +{ + struct io_wqe_acct *acct = io_wqe_get_acct(worker); + + atomic_inc(&acct->nr_running); +} + +static void create_worker_cb(struct callback_head *cb) +{ + struct io_worker *worker; + struct io_wq *wq; + struct io_wqe *wqe; + struct io_wqe_acct *acct; + bool do_create = false; + + worker = container_of(cb, struct io_worker, create_work); + wqe = worker->wqe; + wq = wqe->wq; + acct = &wqe->acct[worker->create_index]; + raw_spin_lock(&wqe->lock); + if (acct->nr_workers < acct->max_workers) { + acct->nr_workers++; + do_create = true; + } + raw_spin_unlock(&wqe->lock); + if (do_create) { + create_io_worker(wq, wqe, worker->create_index); + } else { + atomic_dec(&acct->nr_running); + io_worker_ref_put(wq); + } + clear_bit_unlock(0, &worker->create_state); + io_worker_release(worker); +} + +static bool io_queue_worker_create(struct io_worker *worker, + struct io_wqe_acct *acct, + task_work_func_t func) +{ + struct io_wqe *wqe = worker->wqe; + struct io_wq *wq = wqe->wq; + + /* raced with exit, just ignore create call */ + if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) + goto fail; + if (!io_worker_get(worker)) + goto fail; + /* + * create_state manages ownership of create_work/index. We should + * only need one entry per worker, as the worker going to sleep + * will trigger the condition, and waking will clear it once it + * runs the task_work. + */ + if (test_bit(0, &worker->create_state) || + test_and_set_bit_lock(0, &worker->create_state)) + goto fail_release; + + atomic_inc(&wq->worker_refs); + init_task_work(&worker->create_work, func); + worker->create_index = acct->index; + if (!task_work_add(wq->task, &worker->create_work, TWA_SIGNAL)) { + /* + * EXIT may have been set after checking it above, check after + * adding the task_work and remove any creation item if it is + * now set. wq exit does that too, but we can have added this + * work item after we canceled in io_wq_exit_workers(). + */ + if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) + io_wq_cancel_tw_create(wq); + io_worker_ref_put(wq); + return true; + } + io_worker_ref_put(wq); + clear_bit_unlock(0, &worker->create_state); +fail_release: + io_worker_release(worker); +fail: + atomic_dec(&acct->nr_running); + io_worker_ref_put(wq); + return false; +} + +static void io_wqe_dec_running(struct io_worker *worker) + __must_hold(wqe->lock) +{ + struct io_wqe_acct *acct = io_wqe_get_acct(worker); + struct io_wqe *wqe = worker->wqe; + + if (!(worker->flags & IO_WORKER_F_UP)) + return; + + if (atomic_dec_and_test(&acct->nr_running) && io_acct_run_queue(acct)) { + atomic_inc(&acct->nr_running); + atomic_inc(&wqe->wq->worker_refs); + raw_spin_unlock(&wqe->lock); + io_queue_worker_create(worker, acct, create_worker_cb); + raw_spin_lock(&wqe->lock); + } +} + +/* + * Worker will start processing some work. Move it to the busy list, if + * it's currently on the freelist + */ +static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker, + struct io_wq_work *work) + __must_hold(wqe->lock) +{ + if (worker->flags & IO_WORKER_F_FREE) { + worker->flags &= ~IO_WORKER_F_FREE; + hlist_nulls_del_init_rcu(&worker->nulls_node); + } +} + +/* + * No work, worker going to sleep. Move to freelist, and unuse mm if we + * have one attached. Dropping the mm may potentially sleep, so we drop + * the lock in that case and return success. Since the caller has to + * retry the loop in that case (we changed task state), we don't regrab + * the lock if we return success. + */ +static void __io_worker_idle(struct io_wqe *wqe, struct io_worker *worker) + __must_hold(wqe->lock) +{ + if (!(worker->flags & IO_WORKER_F_FREE)) { + worker->flags |= IO_WORKER_F_FREE; + hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list); + } +} + +static inline unsigned int io_get_work_hash(struct io_wq_work *work) +{ + return work->flags >> IO_WQ_HASH_SHIFT; +} + +static bool io_wait_on_hash(struct io_wqe *wqe, unsigned int hash) +{ + struct io_wq *wq = wqe->wq; + bool ret = false; + + spin_lock_irq(&wq->hash->wait.lock); + if (list_empty(&wqe->wait.entry)) { + __add_wait_queue(&wq->hash->wait, &wqe->wait); + if (!test_bit(hash, &wq->hash->map)) { + __set_current_state(TASK_RUNNING); + list_del_init(&wqe->wait.entry); + ret = true; + } + } + spin_unlock_irq(&wq->hash->wait.lock); + return ret; +} + +static struct io_wq_work *io_get_next_work(struct io_wqe_acct *acct, + struct io_worker *worker) + __must_hold(wqe->lock) +{ + struct io_wq_work_node *node, *prev; + struct io_wq_work *work, *tail; + unsigned int stall_hash = -1U; + struct io_wqe *wqe = worker->wqe; + + wq_list_for_each(node, prev, &acct->work_list) { + unsigned int hash; + + work = container_of(node, struct io_wq_work, list); + + /* not hashed, can run anytime */ + if (!io_wq_is_hashed(work)) { + wq_list_del(&acct->work_list, node, prev); + return work; + } + + hash = io_get_work_hash(work); + /* all items with this hash lie in [work, tail] */ + tail = wqe->hash_tail[hash]; + + /* hashed, can run if not already running */ + if (!test_and_set_bit(hash, &wqe->wq->hash->map)) { + wqe->hash_tail[hash] = NULL; + wq_list_cut(&acct->work_list, &tail->list, prev); + return work; + } + if (stall_hash == -1U) + stall_hash = hash; + /* fast forward to a next hash, for-each will fix up @prev */ + node = &tail->list; + } + + if (stall_hash != -1U) { + bool unstalled; + + /* + * Set this before dropping the lock to avoid racing with new + * work being added and clearing the stalled bit. + */ + set_bit(IO_ACCT_STALLED_BIT, &acct->flags); + raw_spin_unlock(&wqe->lock); + unstalled = io_wait_on_hash(wqe, stall_hash); + raw_spin_lock(&wqe->lock); + if (unstalled) { + clear_bit(IO_ACCT_STALLED_BIT, &acct->flags); + if (wq_has_sleeper(&wqe->wq->hash->wait)) + wake_up(&wqe->wq->hash->wait); + } + } + + return NULL; +} + +static bool io_flush_signals(void) +{ + if (unlikely(test_thread_flag(TIF_NOTIFY_SIGNAL))) { + __set_current_state(TASK_RUNNING); + tracehook_notify_signal(); + return true; + } + return false; +} + +static void io_assign_current_work(struct io_worker *worker, + struct io_wq_work *work) +{ + if (work) { + io_flush_signals(); + cond_resched(); + } + + spin_lock(&worker->lock); + worker->cur_work = work; + spin_unlock(&worker->lock); +} + +static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work); + +static void io_worker_handle_work(struct io_worker *worker) + __releases(wqe->lock) +{ + struct io_wqe_acct *acct = io_wqe_get_acct(worker); + struct io_wqe *wqe = worker->wqe; + struct io_wq *wq = wqe->wq; + bool do_kill = test_bit(IO_WQ_BIT_EXIT, &wq->state); + + do { + struct io_wq_work *work; +get_next: + /* + * If we got some work, mark us as busy. If we didn't, but + * the list isn't empty, it means we stalled on hashed work. + * Mark us stalled so we don't keep looking for work when we + * can't make progress, any work completion or insertion will + * clear the stalled flag. + */ + work = io_get_next_work(acct, worker); + if (work) + __io_worker_busy(wqe, worker, work); + + raw_spin_unlock(&wqe->lock); + if (!work) + break; + io_assign_current_work(worker, work); + __set_current_state(TASK_RUNNING); + + /* handle a whole dependent link */ + do { + struct io_wq_work *next_hashed, *linked; + unsigned int hash = io_get_work_hash(work); + + next_hashed = wq_next_work(work); + + if (unlikely(do_kill) && (work->flags & IO_WQ_WORK_UNBOUND)) + work->flags |= IO_WQ_WORK_CANCEL; + wq->do_work(work); + io_assign_current_work(worker, NULL); + + linked = wq->free_work(work); + work = next_hashed; + if (!work && linked && !io_wq_is_hashed(linked)) { + work = linked; + linked = NULL; + } + io_assign_current_work(worker, work); + if (linked) + io_wqe_enqueue(wqe, linked); + + if (hash != -1U && !next_hashed) { + /* serialize hash clear with wake_up() */ + spin_lock_irq(&wq->hash->wait.lock); + clear_bit(hash, &wq->hash->map); + clear_bit(IO_ACCT_STALLED_BIT, &acct->flags); + spin_unlock_irq(&wq->hash->wait.lock); + if (wq_has_sleeper(&wq->hash->wait)) + wake_up(&wq->hash->wait); + raw_spin_lock(&wqe->lock); + /* skip unnecessary unlock-lock wqe->lock */ + if (!work) + goto get_next; + raw_spin_unlock(&wqe->lock); + } + } while (work); + + raw_spin_lock(&wqe->lock); + } while (1); +} + +static int io_wqe_worker(void *data) +{ + struct io_worker *worker = data; + struct io_wqe_acct *acct = io_wqe_get_acct(worker); + struct io_wqe *wqe = worker->wqe; + struct io_wq *wq = wqe->wq; + bool last_timeout = false; + char buf[TASK_COMM_LEN]; + + worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING); + + snprintf(buf, sizeof(buf), "iou-wrk-%d", wq->task->pid); + set_task_comm(current, buf); + + while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) { + long ret; + + set_current_state(TASK_INTERRUPTIBLE); +loop: + raw_spin_lock(&wqe->lock); + if (io_acct_run_queue(acct)) { + io_worker_handle_work(worker); + goto loop; + } + /* timed out, exit unless we're the last worker */ + if (last_timeout && acct->nr_workers > 1) { + acct->nr_workers--; + raw_spin_unlock(&wqe->lock); + __set_current_state(TASK_RUNNING); + break; + } + last_timeout = false; + __io_worker_idle(wqe, worker); + raw_spin_unlock(&wqe->lock); + if (io_flush_signals()) + continue; + ret = schedule_timeout(WORKER_IDLE_TIMEOUT); + if (signal_pending(current)) { + struct ksignal ksig; + + if (!get_signal(&ksig)) + continue; + break; + } + last_timeout = !ret; + } + + if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) { + raw_spin_lock(&wqe->lock); + io_worker_handle_work(worker); + } + + io_worker_exit(worker); + return 0; +} + +/* + * Called when a worker is scheduled in. Mark us as currently running. + */ +void io_wq_worker_running(struct task_struct *tsk) +{ + struct io_worker *worker = tsk->pf_io_worker; + + if (!worker) + return; + if (!(worker->flags & IO_WORKER_F_UP)) + return; + if (worker->flags & IO_WORKER_F_RUNNING) + return; + worker->flags |= IO_WORKER_F_RUNNING; + io_wqe_inc_running(worker); +} + +/* + * Called when worker is going to sleep. If there are no workers currently + * running and we have work pending, wake up a free one or create a new one. + */ +void io_wq_worker_sleeping(struct task_struct *tsk) +{ + struct io_worker *worker = tsk->pf_io_worker; + + if (!worker) + return; + if (!(worker->flags & IO_WORKER_F_UP)) + return; + if (!(worker->flags & IO_WORKER_F_RUNNING)) + return; + + worker->flags &= ~IO_WORKER_F_RUNNING; + + raw_spin_lock(&worker->wqe->lock); + io_wqe_dec_running(worker); + raw_spin_unlock(&worker->wqe->lock); +} + +static void io_init_new_worker(struct io_wqe *wqe, struct io_worker *worker, + struct task_struct *tsk) +{ + tsk->pf_io_worker = worker; + worker->task = tsk; + set_cpus_allowed_ptr(tsk, wqe->cpu_mask); + tsk->flags |= PF_NO_SETAFFINITY; + + raw_spin_lock(&wqe->lock); + hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list); + list_add_tail_rcu(&worker->all_list, &wqe->all_list); + worker->flags |= IO_WORKER_F_FREE; + raw_spin_unlock(&wqe->lock); + wake_up_new_task(tsk); +} + +static bool io_wq_work_match_all(struct io_wq_work *work, void *data) +{ + return true; +} + +static inline bool io_should_retry_thread(long err) +{ + /* + * Prevent perpetual task_work retry, if the task (or its group) is + * exiting. + */ + if (fatal_signal_pending(current)) + return false; + + switch (err) { + case -EAGAIN: + case -ERESTARTSYS: + case -ERESTARTNOINTR: + case -ERESTARTNOHAND: + return true; + default: + return false; + } +} + +static void create_worker_cont(struct callback_head *cb) +{ + struct io_worker *worker; + struct task_struct *tsk; + struct io_wqe *wqe; + + worker = container_of(cb, struct io_worker, create_work); + clear_bit_unlock(0, &worker->create_state); + wqe = worker->wqe; + tsk = create_io_thread(io_wqe_worker, worker, wqe->node); + if (!IS_ERR(tsk)) { + io_init_new_worker(wqe, worker, tsk); + io_worker_release(worker); + return; + } else if (!io_should_retry_thread(PTR_ERR(tsk))) { + struct io_wqe_acct *acct = io_wqe_get_acct(worker); + + atomic_dec(&acct->nr_running); + raw_spin_lock(&wqe->lock); + acct->nr_workers--; + if (!acct->nr_workers) { + struct io_cb_cancel_data match = { + .fn = io_wq_work_match_all, + .cancel_all = true, + }; + + while (io_acct_cancel_pending_work(wqe, acct, &match)) + raw_spin_lock(&wqe->lock); + } + raw_spin_unlock(&wqe->lock); + io_worker_ref_put(wqe->wq); + kfree(worker); + return; + } + + /* re-create attempts grab a new worker ref, drop the existing one */ + io_worker_release(worker); + schedule_work(&worker->work); +} + +static void io_workqueue_create(struct work_struct *work) +{ + struct io_worker *worker = container_of(work, struct io_worker, work); + struct io_wqe_acct *acct = io_wqe_get_acct(worker); + + if (!io_queue_worker_create(worker, acct, create_worker_cont)) + kfree(worker); +} + +static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index) +{ + struct io_wqe_acct *acct = &wqe->acct[index]; + struct io_worker *worker; + struct task_struct *tsk; + + __set_current_state(TASK_RUNNING); + + worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, wqe->node); + if (!worker) { +fail: + atomic_dec(&acct->nr_running); + raw_spin_lock(&wqe->lock); + acct->nr_workers--; + raw_spin_unlock(&wqe->lock); + io_worker_ref_put(wq); + return false; + } + + refcount_set(&worker->ref, 1); + worker->wqe = wqe; + spin_lock_init(&worker->lock); + init_completion(&worker->ref_done); + + if (index == IO_WQ_ACCT_BOUND) + worker->flags |= IO_WORKER_F_BOUND; + + tsk = create_io_thread(io_wqe_worker, worker, wqe->node); + if (!IS_ERR(tsk)) { + io_init_new_worker(wqe, worker, tsk); + } else if (!io_should_retry_thread(PTR_ERR(tsk))) { + kfree(worker); + goto fail; + } else { + INIT_WORK(&worker->work, io_workqueue_create); + schedule_work(&worker->work); + } + + return true; +} + +/* + * Iterate the passed in list and call the specific function for each + * worker that isn't exiting + */ +static bool io_wq_for_each_worker(struct io_wqe *wqe, + bool (*func)(struct io_worker *, void *), + void *data) +{ + struct io_worker *worker; + bool ret = false; + + list_for_each_entry_rcu(worker, &wqe->all_list, all_list) { + if (io_worker_get(worker)) { + /* no task if node is/was offline */ + if (worker->task) + ret = func(worker, data); + io_worker_release(worker); + if (ret) + break; + } + } + + return ret; +} + +static bool io_wq_worker_wake(struct io_worker *worker, void *data) +{ + set_notify_signal(worker->task); + wake_up_process(worker->task); + return false; +} + +static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe) +{ + struct io_wq *wq = wqe->wq; + + do { + work->flags |= IO_WQ_WORK_CANCEL; + wq->do_work(work); + work = wq->free_work(work); + } while (work); +} + +static void io_wqe_insert_work(struct io_wqe *wqe, struct io_wq_work *work) +{ + struct io_wqe_acct *acct = io_work_get_acct(wqe, work); + unsigned int hash; + struct io_wq_work *tail; + + if (!io_wq_is_hashed(work)) { +append: + wq_list_add_tail(&work->list, &acct->work_list); + return; + } + + hash = io_get_work_hash(work); + tail = wqe->hash_tail[hash]; + wqe->hash_tail[hash] = work; + if (!tail) + goto append; + + wq_list_add_after(&work->list, &tail->list, &acct->work_list); +} + +static bool io_wq_work_match_item(struct io_wq_work *work, void *data) +{ + return work == data; +} + +static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work) +{ + struct io_wqe_acct *acct = io_work_get_acct(wqe, work); + unsigned work_flags = work->flags; + bool do_create; + + /* + * If io-wq is exiting for this task, or if the request has explicitly + * been marked as one that should not get executed, cancel it here. + */ + if (test_bit(IO_WQ_BIT_EXIT, &wqe->wq->state) || + (work->flags & IO_WQ_WORK_CANCEL)) { + io_run_cancel(work, wqe); + return; + } + + raw_spin_lock(&wqe->lock); + io_wqe_insert_work(wqe, work); + clear_bit(IO_ACCT_STALLED_BIT, &acct->flags); + + rcu_read_lock(); + do_create = !io_wqe_activate_free_worker(wqe, acct); + rcu_read_unlock(); + + raw_spin_unlock(&wqe->lock); + + if (do_create && ((work_flags & IO_WQ_WORK_CONCURRENT) || + !atomic_read(&acct->nr_running))) { + bool did_create; + + did_create = io_wqe_create_worker(wqe, acct); + if (likely(did_create)) + return; + + raw_spin_lock(&wqe->lock); + /* fatal condition, failed to create the first worker */ + if (!acct->nr_workers) { + struct io_cb_cancel_data match = { + .fn = io_wq_work_match_item, + .data = work, + .cancel_all = false, + }; + + if (io_acct_cancel_pending_work(wqe, acct, &match)) + raw_spin_lock(&wqe->lock); + } + raw_spin_unlock(&wqe->lock); + } +} + +void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work) +{ + struct io_wqe *wqe = wq->wqes[numa_node_id()]; + + io_wqe_enqueue(wqe, work); +} + +/* + * Work items that hash to the same value will not be done in parallel. + * Used to limit concurrent writes, generally hashed by inode. + */ +void io_wq_hash_work(struct io_wq_work *work, void *val) +{ + unsigned int bit; + + bit = hash_ptr(val, IO_WQ_HASH_ORDER); + work->flags |= (IO_WQ_WORK_HASHED | (bit << IO_WQ_HASH_SHIFT)); +} + +static bool io_wq_worker_cancel(struct io_worker *worker, void *data) +{ + struct io_cb_cancel_data *match = data; + + /* + * Hold the lock to avoid ->cur_work going out of scope, caller + * may dereference the passed in work. + */ + spin_lock(&worker->lock); + if (worker->cur_work && + match->fn(worker->cur_work, match->data)) { + set_notify_signal(worker->task); + match->nr_running++; + } + spin_unlock(&worker->lock); + + return match->nr_running && !match->cancel_all; +} + +static inline void io_wqe_remove_pending(struct io_wqe *wqe, + struct io_wq_work *work, + struct io_wq_work_node *prev) +{ + struct io_wqe_acct *acct = io_work_get_acct(wqe, work); + unsigned int hash = io_get_work_hash(work); + struct io_wq_work *prev_work = NULL; + + if (io_wq_is_hashed(work) && work == wqe->hash_tail[hash]) { + if (prev) + prev_work = container_of(prev, struct io_wq_work, list); + if (prev_work && io_get_work_hash(prev_work) == hash) + wqe->hash_tail[hash] = prev_work; + else + wqe->hash_tail[hash] = NULL; + } + wq_list_del(&acct->work_list, &work->list, prev); +} + +static bool io_acct_cancel_pending_work(struct io_wqe *wqe, + struct io_wqe_acct *acct, + struct io_cb_cancel_data *match) + __releases(wqe->lock) +{ + struct io_wq_work_node *node, *prev; + struct io_wq_work *work; + + wq_list_for_each(node, prev, &acct->work_list) { + work = container_of(node, struct io_wq_work, list); + if (!match->fn(work, match->data)) + continue; + io_wqe_remove_pending(wqe, work, prev); + raw_spin_unlock(&wqe->lock); + io_run_cancel(work, wqe); + match->nr_pending++; + /* not safe to continue after unlock */ + return true; + } + + return false; +} + +static void io_wqe_cancel_pending_work(struct io_wqe *wqe, + struct io_cb_cancel_data *match) +{ + int i; +retry: + raw_spin_lock(&wqe->lock); + for (i = 0; i < IO_WQ_ACCT_NR; i++) { + struct io_wqe_acct *acct = io_get_acct(wqe, i == 0); + + if (io_acct_cancel_pending_work(wqe, acct, match)) { + if (match->cancel_all) + goto retry; + return; + } + } + raw_spin_unlock(&wqe->lock); +} + +static void io_wqe_cancel_running_work(struct io_wqe *wqe, + struct io_cb_cancel_data *match) +{ + rcu_read_lock(); + io_wq_for_each_worker(wqe, io_wq_worker_cancel, match); + rcu_read_unlock(); +} + +enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel, + void *data, bool cancel_all) +{ + struct io_cb_cancel_data match = { + .fn = cancel, + .data = data, + .cancel_all = cancel_all, + }; + int node; + + /* + * First check pending list, if we're lucky we can just remove it + * from there. CANCEL_OK means that the work is returned as-new, + * no completion will be posted for it. + */ + for_each_node(node) { + struct io_wqe *wqe = wq->wqes[node]; + + io_wqe_cancel_pending_work(wqe, &match); + if (match.nr_pending && !match.cancel_all) + return IO_WQ_CANCEL_OK; + } + + /* + * Now check if a free (going busy) or busy worker has the work + * currently running. If we find it there, we'll return CANCEL_RUNNING + * as an indication that we attempt to signal cancellation. The + * completion will run normally in this case. + */ + for_each_node(node) { + struct io_wqe *wqe = wq->wqes[node]; + + io_wqe_cancel_running_work(wqe, &match); + if (match.nr_running && !match.cancel_all) + return IO_WQ_CANCEL_RUNNING; + } + + if (match.nr_running) + return IO_WQ_CANCEL_RUNNING; + if (match.nr_pending) + return IO_WQ_CANCEL_OK; + return IO_WQ_CANCEL_NOTFOUND; +} + +static int io_wqe_hash_wake(struct wait_queue_entry *wait, unsigned mode, + int sync, void *key) +{ + struct io_wqe *wqe = container_of(wait, struct io_wqe, wait); + int i; + + list_del_init(&wait->entry); + + rcu_read_lock(); + for (i = 0; i < IO_WQ_ACCT_NR; i++) { + struct io_wqe_acct *acct = &wqe->acct[i]; + + if (test_and_clear_bit(IO_ACCT_STALLED_BIT, &acct->flags)) + io_wqe_activate_free_worker(wqe, acct); + } + rcu_read_unlock(); + return 1; +} + +struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) +{ + int ret, node, i; + struct io_wq *wq; + + if (WARN_ON_ONCE(!data->free_work || !data->do_work)) + return ERR_PTR(-EINVAL); + if (WARN_ON_ONCE(!bounded)) + return ERR_PTR(-EINVAL); + + wq = kzalloc(struct_size(wq, wqes, nr_node_ids), GFP_KERNEL); + if (!wq) + return ERR_PTR(-ENOMEM); + ret = cpuhp_state_add_instance_nocalls(io_wq_online, &wq->cpuhp_node); + if (ret) + goto err_wq; + + refcount_inc(&data->hash->refs); + wq->hash = data->hash; + wq->free_work = data->free_work; + wq->do_work = data->do_work; + + ret = -ENOMEM; + for_each_node(node) { + struct io_wqe *wqe; + int alloc_node = node; + + if (!node_online(alloc_node)) + alloc_node = NUMA_NO_NODE; + wqe = kzalloc_node(sizeof(struct io_wqe), GFP_KERNEL, alloc_node); + if (!wqe) + goto err; + wq->wqes[node] = wqe; + if (!alloc_cpumask_var(&wqe->cpu_mask, GFP_KERNEL)) + goto err; + cpumask_copy(wqe->cpu_mask, cpumask_of_node(node)); + wqe->node = alloc_node; + wqe->acct[IO_WQ_ACCT_BOUND].max_workers = bounded; + wqe->acct[IO_WQ_ACCT_UNBOUND].max_workers = + task_rlimit(current, RLIMIT_NPROC); + INIT_LIST_HEAD(&wqe->wait.entry); + wqe->wait.func = io_wqe_hash_wake; + for (i = 0; i < IO_WQ_ACCT_NR; i++) { + struct io_wqe_acct *acct = &wqe->acct[i]; + + acct->index = i; + atomic_set(&acct->nr_running, 0); + INIT_WQ_LIST(&acct->work_list); + } + wqe->wq = wq; + raw_spin_lock_init(&wqe->lock); + INIT_HLIST_NULLS_HEAD(&wqe->free_list, 0); + INIT_LIST_HEAD(&wqe->all_list); + } + + wq->task = get_task_struct(data->task); + atomic_set(&wq->worker_refs, 1); + init_completion(&wq->worker_done); + return wq; +err: + io_wq_put_hash(data->hash); + cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node); + for_each_node(node) { + if (!wq->wqes[node]) + continue; + free_cpumask_var(wq->wqes[node]->cpu_mask); + kfree(wq->wqes[node]); + } +err_wq: + kfree(wq); + return ERR_PTR(ret); +} + +static bool io_task_work_match(struct callback_head *cb, void *data) +{ + struct io_worker *worker; + + if (cb->func != create_worker_cb && cb->func != create_worker_cont) + return false; + worker = container_of(cb, struct io_worker, create_work); + return worker->wqe->wq == data; +} + +void io_wq_exit_start(struct io_wq *wq) +{ + set_bit(IO_WQ_BIT_EXIT, &wq->state); +} + +static void io_wq_cancel_tw_create(struct io_wq *wq) +{ + struct callback_head *cb; + + while ((cb = task_work_cancel_match(wq->task, io_task_work_match, wq)) != NULL) { + struct io_worker *worker; + + worker = container_of(cb, struct io_worker, create_work); + io_worker_cancel_cb(worker); + } +} + +static void io_wq_exit_workers(struct io_wq *wq) +{ + int node; + + if (!wq->task) + return; + + io_wq_cancel_tw_create(wq); + + rcu_read_lock(); + for_each_node(node) { + struct io_wqe *wqe = wq->wqes[node]; + + io_wq_for_each_worker(wqe, io_wq_worker_wake, NULL); + } + rcu_read_unlock(); + io_worker_ref_put(wq); + wait_for_completion(&wq->worker_done); + + for_each_node(node) { + spin_lock_irq(&wq->hash->wait.lock); + list_del_init(&wq->wqes[node]->wait.entry); + spin_unlock_irq(&wq->hash->wait.lock); + } + put_task_struct(wq->task); + wq->task = NULL; +} + +static void io_wq_destroy(struct io_wq *wq) +{ + int node; + + cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node); + + for_each_node(node) { + struct io_wqe *wqe = wq->wqes[node]; + struct io_cb_cancel_data match = { + .fn = io_wq_work_match_all, + .cancel_all = true, + }; + io_wqe_cancel_pending_work(wqe, &match); + free_cpumask_var(wqe->cpu_mask); + kfree(wqe); + } + io_wq_put_hash(wq->hash); + kfree(wq); +} + +void io_wq_put_and_exit(struct io_wq *wq) +{ + WARN_ON_ONCE(!test_bit(IO_WQ_BIT_EXIT, &wq->state)); + + io_wq_exit_workers(wq); + io_wq_destroy(wq); +} + +struct online_data { + unsigned int cpu; + bool online; +}; + +static bool io_wq_worker_affinity(struct io_worker *worker, void *data) +{ + struct online_data *od = data; + + if (od->online) + cpumask_set_cpu(od->cpu, worker->wqe->cpu_mask); + else + cpumask_clear_cpu(od->cpu, worker->wqe->cpu_mask); + return false; +} + +static int __io_wq_cpu_online(struct io_wq *wq, unsigned int cpu, bool online) +{ + struct online_data od = { + .cpu = cpu, + .online = online + }; + int i; + + rcu_read_lock(); + for_each_node(i) + io_wq_for_each_worker(wq->wqes[i], io_wq_worker_affinity, &od); + rcu_read_unlock(); + return 0; +} + +static int io_wq_cpu_online(unsigned int cpu, struct hlist_node *node) +{ + struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node); + + return __io_wq_cpu_online(wq, cpu, true); +} + +static int io_wq_cpu_offline(unsigned int cpu, struct hlist_node *node) +{ + struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node); + + return __io_wq_cpu_online(wq, cpu, false); +} + +int io_wq_cpu_affinity(struct io_wq *wq, cpumask_var_t mask) +{ + int i; + + rcu_read_lock(); + for_each_node(i) { + struct io_wqe *wqe = wq->wqes[i]; + + if (mask) + cpumask_copy(wqe->cpu_mask, mask); + else + cpumask_copy(wqe->cpu_mask, cpumask_of_node(i)); + } + rcu_read_unlock(); + return 0; +} + +/* + * Set max number of unbounded workers, returns old value. If new_count is 0, + * then just return the old value. + */ +int io_wq_max_workers(struct io_wq *wq, int *new_count) +{ + int prev[IO_WQ_ACCT_NR]; + bool first_node = true; + int i, node; + + BUILD_BUG_ON((int) IO_WQ_ACCT_BOUND != (int) IO_WQ_BOUND); + BUILD_BUG_ON((int) IO_WQ_ACCT_UNBOUND != (int) IO_WQ_UNBOUND); + BUILD_BUG_ON((int) IO_WQ_ACCT_NR != 2); + + for (i = 0; i < 2; i++) { + if (new_count[i] > task_rlimit(current, RLIMIT_NPROC)) + new_count[i] = task_rlimit(current, RLIMIT_NPROC); + } + + for (i = 0; i < IO_WQ_ACCT_NR; i++) + prev[i] = 0; + + rcu_read_lock(); + for_each_node(node) { + struct io_wqe *wqe = wq->wqes[node]; + struct io_wqe_acct *acct; + + raw_spin_lock(&wqe->lock); + for (i = 0; i < IO_WQ_ACCT_NR; i++) { + acct = &wqe->acct[i]; + if (first_node) + prev[i] = max_t(int, acct->max_workers, prev[i]); + if (new_count[i]) + acct->max_workers = new_count[i]; + } + raw_spin_unlock(&wqe->lock); + first_node = false; + } + rcu_read_unlock(); + + for (i = 0; i < IO_WQ_ACCT_NR; i++) + new_count[i] = prev[i]; + + return 0; +} + +static __init int io_wq_init(void) +{ + int ret; + + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "io-wq/online", + io_wq_cpu_online, io_wq_cpu_offline); + if (ret < 0) + return ret; + io_wq_online = ret; + return 0; +} +subsys_initcall(io_wq_init); diff --git a/fs/io-wq.h b/io_uring/io-wq.h similarity index 81% rename from fs/io-wq.h rename to io_uring/io-wq.h index 75113bcd5889..bf5c4c533760 100644 --- a/fs/io-wq.h +++ b/io_uring/io-wq.h @@ -1,7 +1,7 @@ #ifndef INTERNAL_IO_WQ_H #define INTERNAL_IO_WQ_H -#include +#include struct io_wq; @@ -9,16 +9,8 @@ enum { IO_WQ_WORK_CANCEL = 1, IO_WQ_WORK_HASHED = 2, IO_WQ_WORK_UNBOUND = 4, - IO_WQ_WORK_NO_CANCEL = 8, IO_WQ_WORK_CONCURRENT = 16, - IO_WQ_WORK_FILES = 32, - IO_WQ_WORK_FS = 64, - IO_WQ_WORK_MM = 128, - IO_WQ_WORK_CREDS = 256, - IO_WQ_WORK_BLKCG = 512, - IO_WQ_WORK_FSIZE = 1024, - IO_WQ_HASH_SHIFT = 24, /* upper 8 bits are used for hash key */ }; @@ -52,6 +44,7 @@ static inline void wq_list_add_after(struct io_wq_work_node *node, static inline void wq_list_add_tail(struct io_wq_work_node *node, struct io_wq_work_list *list) { + node->next = NULL; if (!list->first) { list->last = node; WRITE_ONCE(list->first, node); @@ -59,7 +52,6 @@ static inline void wq_list_add_tail(struct io_wq_work_node *node, list->last->next = node; list->last = node; } - node->next = NULL; } static inline void wq_list_cut(struct io_wq_work_list *list, @@ -95,7 +87,6 @@ static inline void wq_list_del(struct io_wq_work_list *list, struct io_wq_work { struct io_wq_work_node list; - struct io_identity *identity; unsigned flags; }; @@ -107,37 +98,48 @@ static inline struct io_wq_work *wq_next_work(struct io_wq_work *work) return container_of(work->list.next, struct io_wq_work, list); } -typedef void (free_work_fn)(struct io_wq_work *); -typedef struct io_wq_work *(io_wq_work_fn)(struct io_wq_work *); +typedef struct io_wq_work *(free_work_fn)(struct io_wq_work *); +typedef void (io_wq_work_fn)(struct io_wq_work *); + +struct io_wq_hash { + refcount_t refs; + unsigned long map; + struct wait_queue_head wait; +}; + +static inline void io_wq_put_hash(struct io_wq_hash *hash) +{ + if (refcount_dec_and_test(&hash->refs)) + kfree(hash); +} struct io_wq_data { - struct user_struct *user; - + struct io_wq_hash *hash; + struct task_struct *task; io_wq_work_fn *do_work; free_work_fn *free_work; }; struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data); -bool io_wq_get(struct io_wq *wq, struct io_wq_data *data); -void io_wq_destroy(struct io_wq *wq); +void io_wq_exit_start(struct io_wq *wq); +void io_wq_put_and_exit(struct io_wq *wq); void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work); void io_wq_hash_work(struct io_wq_work *work, void *val); +int io_wq_cpu_affinity(struct io_wq *wq, cpumask_var_t mask); +int io_wq_max_workers(struct io_wq *wq, int *new_count); + static inline bool io_wq_is_hashed(struct io_wq_work *work) { return work->flags & IO_WQ_WORK_HASHED; } -void io_wq_cancel_all(struct io_wq *wq); - typedef bool (work_cancel_fn)(struct io_wq_work *, void *); enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel, void *data, bool cancel_all); -struct task_struct *io_wq_get_task(struct io_wq *wq); - #if defined(CONFIG_IO_WQ) extern void io_wq_worker_sleeping(struct task_struct *); extern void io_wq_worker_running(struct task_struct *); @@ -152,6 +154,7 @@ static inline void io_wq_worker_running(struct task_struct *tsk) static inline bool io_wq_current_is_worker(void) { - return in_task() && (current->flags & PF_IO_WORKER); + return in_task() && (current->flags & PF_IO_WORKER) && + current->pf_io_worker; } #endif diff --git a/fs/io_uring.c b/io_uring/io_uring.c similarity index 51% rename from fs/io_uring.c rename to io_uring/io_uring.c index 661303a435be..473dbd1830a3 100644 --- a/fs/io_uring.c +++ b/io_uring/io_uring.c @@ -11,7 +11,7 @@ * before writing the tail (using smp_load_acquire to read the tail will * do). It also needs a smp_mb() before updating CQ head (ordering the * entry load(s) with the head store), pairing with an implicit barrier - * through a control-dependency in io_get_cqring (smp_store_release to + * through a control-dependency in io_get_cqe (smp_store_release to * store head will do). Failure to do so could lead to reading invalid * CQ entries. * @@ -57,7 +57,6 @@ #include #include #include -#include #include #include #include @@ -75,35 +74,43 @@ #include #include #include -#include #include #include #include #include -#include -#include +#include #define CREATE_TRACE_POINTS #include #include -#include "internal.h" +#include "../fs/internal.h" #include "io-wq.h" #define IORING_MAX_ENTRIES 32768 #define IORING_MAX_CQ_ENTRIES (2 * IORING_MAX_ENTRIES) +#define IORING_SQPOLL_CAP_ENTRIES_VALUE 8 -/* - * Shift of 9 is 512 entries, or exactly one page on 64-bit archs - */ -#define IORING_FILE_TABLE_SHIFT 9 -#define IORING_MAX_FILES_TABLE (1U << IORING_FILE_TABLE_SHIFT) -#define IORING_FILE_TABLE_MASK (IORING_MAX_FILES_TABLE - 1) -#define IORING_MAX_FIXED_FILES (64 * IORING_MAX_FILES_TABLE) +/* only define max */ +#define IORING_MAX_FIXED_FILES (1U << 15) #define IORING_MAX_RESTRICTIONS (IORING_RESTRICTION_LAST + \ IORING_REGISTER_LAST + IORING_OP_LAST) +#define IO_RSRC_TAG_TABLE_SHIFT (PAGE_SHIFT - 3) +#define IO_RSRC_TAG_TABLE_MAX (1U << IO_RSRC_TAG_TABLE_SHIFT) +#define IO_RSRC_TAG_TABLE_MASK (IO_RSRC_TAG_TABLE_MAX - 1) + +#define IORING_MAX_REG_BUFFERS (1U << 14) + +#define SQE_VALID_FLAGS (IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK| \ + IOSQE_IO_HARDLINK | IOSQE_ASYNC | \ + IOSQE_BUFFER_SELECT) +#define IO_REQ_CLEAN_FLAGS (REQ_F_BUFFER_SELECTED | REQ_F_NEED_CLEANUP | \ + REQ_F_POLLED | REQ_F_INFLIGHT | REQ_F_CREDS) + +#define IO_TCTX_REFS_CACHE_NR (1U << 10) + struct io_uring { u32 head ____cacheline_aligned_in_smp; u32 tail ____cacheline_aligned_in_smp; @@ -162,7 +169,7 @@ struct io_rings { * Written by the application, shouldn't be modified by the * kernel. */ - u32 cq_flags; + u32 cq_flags; /* * Number of completion events lost because the queue was full; * this should be avoided by the application by making sure @@ -187,36 +194,64 @@ struct io_rings { struct io_uring_cqe cqes[] ____cacheline_aligned_in_smp; }; +enum io_uring_cmd_flags { + IO_URING_F_NONBLOCK = 1, + IO_URING_F_COMPLETE_DEFER = 2, +}; + struct io_mapped_ubuf { u64 ubuf; - size_t len; - struct bio_vec *bvec; + u64 ubuf_end; unsigned int nr_bvecs; unsigned long acct_pages; + struct bio_vec bvec[]; }; -struct fixed_file_table { - struct file **files; +struct io_ring_ctx; + +struct io_overflow_cqe { + struct io_uring_cqe cqe; + struct list_head list; }; -struct fixed_file_ref_node { +struct io_fixed_file { + /* file * with additional FFS_* flags */ + unsigned long file_ptr; +}; + +struct io_rsrc_put { + struct list_head list; + u64 tag; + union { + void *rsrc; + struct file *file; + struct io_mapped_ubuf *buf; + }; +}; + +struct io_file_table { + struct io_fixed_file *files; +}; + +struct io_rsrc_node { struct percpu_ref refs; struct list_head node; - struct list_head file_list; - struct fixed_file_data *file_data; + struct list_head rsrc_list; + struct io_rsrc_data *rsrc_data; struct llist_node llist; bool done; }; -struct fixed_file_data { - struct fixed_file_table *table; +typedef void (rsrc_put_fn)(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc); + +struct io_rsrc_data { struct io_ring_ctx *ctx; - struct fixed_file_ref_node *node; - struct percpu_ref refs; + u64 **tags; + unsigned int nr; + rsrc_put_fn *do_put; + atomic_t refs; struct completion done; - struct list_head ref_list; - spinlock_t lock; bool quiesce; }; @@ -235,33 +270,81 @@ struct io_restriction { bool registered; }; +enum { + IO_SQ_THREAD_SHOULD_STOP = 0, + IO_SQ_THREAD_SHOULD_PARK, +}; + struct io_sq_data { refcount_t refs; + atomic_t park_pending; struct mutex lock; /* ctx's that are using this sqd */ struct list_head ctx_list; - struct list_head ctx_new_list; - struct mutex ctx_lock; struct task_struct *thread; struct wait_queue_head wait; + + unsigned sq_thread_idle; + int sq_cpu; + pid_t task_pid; + pid_t task_tgid; + + unsigned long state; + struct completion exited; +}; + +#define IO_COMPL_BATCH 32 +#define IO_REQ_CACHE_SIZE 32 +#define IO_REQ_ALLOC_BATCH 8 + +struct io_submit_link { + struct io_kiocb *head; + struct io_kiocb *last; +}; + +struct io_submit_state { + struct blk_plug plug; + struct io_submit_link link; + + /* + * io_kiocb alloc cache + */ + void *reqs[IO_REQ_CACHE_SIZE]; + unsigned int free_reqs; + + bool plug_started; + + /* + * Batch completion logic + */ + struct io_kiocb *compl_reqs[IO_COMPL_BATCH]; + unsigned int compl_nr; + /* inline/task_work completion list, under ->uring_lock */ + struct list_head free_list; + + unsigned int ios_left; }; struct io_ring_ctx { + /* const or read-mostly hot data */ struct { struct percpu_ref refs; - } ____cacheline_aligned_in_smp; - struct { + struct io_rings *rings; unsigned int flags; unsigned int compat: 1; - unsigned int limit_mem: 1; - unsigned int cq_overflow_flushed: 1; unsigned int drain_next: 1; unsigned int eventfd_async: 1; unsigned int restricted: 1; - unsigned int sqo_dead: 1; + unsigned int off_timeout_used: 1; + unsigned int drain_active: 1; + } ____cacheline_aligned_in_smp; + + /* submission data */ + struct { + struct mutex uring_lock; /* * Ring buffer of indices into array of io_uring_sqe, which is @@ -275,101 +358,59 @@ struct io_ring_ctx { * array. */ u32 *sq_array; + struct io_uring_sqe *sq_sqes; unsigned cached_sq_head; unsigned sq_entries; - unsigned sq_mask; - unsigned sq_thread_idle; - unsigned cached_sq_dropped; - unsigned cached_cq_overflow; - unsigned long sq_check_overflow; - struct list_head defer_list; - struct list_head timeout_list; - struct list_head cq_overflow_list; - struct io_uring_sqe *sq_sqes; + /* + * Fixed resources fast path, should be accessed only under + * uring_lock, and updated through io_uring_register(2) + */ + struct io_rsrc_node *rsrc_node; + struct io_file_table file_table; + unsigned nr_user_files; + unsigned nr_user_bufs; + struct io_mapped_ubuf **user_bufs; + + struct io_submit_state submit_state; + struct list_head timeout_list; + struct list_head ltimeout_list; + struct list_head cq_overflow_list; + struct xarray io_buffers; + struct xarray personalities; + u32 pers_next; + unsigned sq_thread_idle; } ____cacheline_aligned_in_smp; - struct io_rings *rings; - - /* IO offload */ - struct io_wq *io_wq; - - /* - * For SQPOLL usage - we hold a reference to the parent task, so we - * have access to the ->files - */ - struct task_struct *sqo_task; - - /* Only used for accounting purposes */ - struct mm_struct *mm_account; - -#ifdef CONFIG_BLK_CGROUP - struct cgroup_subsys_state *sqo_blkcg_css; -#endif + /* IRQ completion list, under ->completion_lock */ + struct list_head locked_free_list; + unsigned int locked_free_nr; + const struct cred *sq_creds; /* cred used for __io_sq_thread() */ struct io_sq_data *sq_data; /* if using sq thread polling */ struct wait_queue_head sqo_sq_wait; - struct wait_queue_entry sqo_wait_entry; struct list_head sqd_list; - /* - * If used, fixed file set. Writers must ensure that ->refs is dead, - * readers must ensure that ->refs is alive as long as the file* is - * used. Only updated through io_uring_register(2). - */ - struct fixed_file_data *file_data; - unsigned nr_user_files; - - /* if used, fixed mapped user buffers */ - unsigned nr_user_bufs; - struct io_mapped_ubuf *user_bufs; - - struct user_struct *user; - - const struct cred *creds; - -#ifdef CONFIG_AUDIT - kuid_t loginuid; - unsigned int sessionid; -#endif - - struct completion ref_comp; - struct completion sq_thread_comp; - - /* if all else fails... */ - struct io_kiocb *fallback_req; - -#if defined(CONFIG_UNIX) - struct socket *ring_sock; -#endif - - struct xarray io_buffers; - - struct xarray personalities; - u32 pers_next; + unsigned long check_cq_overflow; struct { unsigned cached_cq_tail; unsigned cq_entries; - unsigned cq_mask; + struct eventfd_ctx *cq_ev_fd; + struct wait_queue_head poll_wait; + struct wait_queue_head cq_wait; + unsigned cq_extra; atomic_t cq_timeouts; unsigned cq_last_tm_flush; - unsigned long cq_check_overflow; - struct wait_queue_head cq_wait; - struct fasync_struct *cq_fasync; - struct eventfd_ctx *cq_ev_fd; - } ____cacheline_aligned_in_smp; - - struct { - struct mutex uring_lock; - wait_queue_head_t wait; } ____cacheline_aligned_in_smp; struct { spinlock_t completion_lock; + spinlock_t timeout_lock; + /* * ->iopoll_list is protected by the ctx->uring_lock for * io_uring instances that don't use IORING_SETUP_SQPOLL. @@ -379,17 +420,62 @@ struct io_ring_ctx { struct list_head iopoll_list; struct hlist_head *cancel_hash; unsigned cancel_hash_bits; - bool poll_multi_file; - - spinlock_t inflight_lock; - struct list_head inflight_list; + bool poll_multi_queue; } ____cacheline_aligned_in_smp; - struct delayed_work file_put_work; - struct llist_head file_put_llist; - - struct work_struct exit_work; struct io_restriction restrictions; + + /* slow path rsrc auxilary data, used by update/register */ + struct { + struct io_rsrc_node *rsrc_backup_node; + struct io_mapped_ubuf *dummy_ubuf; + struct io_rsrc_data *file_data; + struct io_rsrc_data *buf_data; + + struct delayed_work rsrc_put_work; + struct llist_head rsrc_put_llist; + struct list_head rsrc_ref_list; + spinlock_t rsrc_ref_lock; + }; + + /* Keep this last, we don't need it for the fast path */ + struct { + #if defined(CONFIG_UNIX) + struct socket *ring_sock; + #endif + /* hashed buffered write serialization */ + struct io_wq_hash *hash_map; + + /* Only used for accounting purposes */ + struct user_struct *user; + struct mm_struct *mm_account; + + /* ctx exit and cancelation */ + struct llist_head fallback_llist; + struct delayed_work fallback_work; + struct work_struct exit_work; + struct list_head tctx_list; + struct completion ref_comp; + u32 iowq_limits[2]; + bool iowq_limits_set; + }; +}; + +struct io_uring_task { + /* submission side */ + int cached_refs; + struct xarray xa; + struct wait_queue_head wait; + const struct io_ring_ctx *last; + struct io_wq *io_wq; + struct percpu_counter inflight; + atomic_t inflight_tracked; + atomic_t in_idle; + + spinlock_t task_lock; + struct io_wq_work_list task_list; + struct callback_head task_work; + bool task_running; }; /* @@ -398,20 +484,24 @@ struct io_ring_ctx { */ struct io_poll_iocb { struct file *file; - union { - struct wait_queue_head *head; - u64 addr; - }; + struct wait_queue_head *head; __poll_t events; - bool done; - bool canceled; struct wait_queue_entry wait; }; +struct io_poll_update { + struct file *file; + u64 old_user_data; + u64 new_user_data; + __poll_t events; + bool update_events; + bool update_user_data; +}; + struct io_close { struct file *file; - struct file *put_file; int fd; + u32 file_slot; }; struct io_timeout_data { @@ -419,6 +509,7 @@ struct io_timeout_data { struct hrtimer timer; struct timespec64 ts; enum hrtimer_mode mode; + u32 flags; }; struct io_accept { @@ -426,6 +517,7 @@ struct io_accept { struct sockaddr __user *addr; int __user *addr_len; int flags; + u32 file_slot; unsigned long nofile; }; @@ -447,11 +539,20 @@ struct io_timeout { u32 off; u32 target_seq; struct list_head list; + /* head of the link, used by linked timeouts only */ + struct io_kiocb *head; + /* for linked completions */ + struct io_kiocb *prev; }; struct io_timeout_rem { struct file *file; u64 addr; + + /* timeout update */ + struct timespec64 ts; + u32 flags; + bool ltimeout; }; struct io_rw { @@ -470,8 +571,9 @@ struct io_connect { struct io_sr_msg { struct file *file; union { - struct user_msghdr __user *umsg; - void __user *buf; + struct compat_msghdr __user *umsg_compat; + struct user_msghdr __user *umsg; + void __user *buf; }; int msg_flags; int bgid; @@ -482,13 +584,13 @@ struct io_sr_msg { struct io_open { struct file *file; int dfd; - bool ignore_nonblock; + u32 file_slot; struct filename *filename; struct open_how how; unsigned long nofile; }; -struct io_files_update { +struct io_rsrc_update { struct file *file; u64 arg; u32 nr_args; @@ -519,10 +621,10 @@ struct io_epoll { struct io_splice { struct file *file_out; - struct file *file_in; loff_t off_out; loff_t off_in; u64 len; + int splice_fd_in; unsigned int flags; }; @@ -544,9 +646,52 @@ struct io_statx { struct statx __user *buffer; }; +struct io_shutdown { + struct file *file; + int how; +}; + +struct io_rename { + struct file *file; + int old_dfd; + int new_dfd; + struct filename *oldpath; + struct filename *newpath; + int flags; +}; + +struct io_unlink { + struct file *file; + int dfd; + int flags; + struct filename *filename; +}; + +struct io_mkdir { + struct file *file; + int dfd; + umode_t mode; + struct filename *filename; +}; + +struct io_symlink { + struct file *file; + int new_dfd; + struct filename *oldpath; + struct filename *newpath; +}; + +struct io_hardlink { + struct file *file; + int old_dfd; + int new_dfd; + struct filename *oldpath; + struct filename *newpath; + int flags; +}; + struct io_completion { struct file *file; - struct list_head list; u32 cflags; }; @@ -556,7 +701,8 @@ struct io_async_connect { struct io_async_msghdr { struct iovec fast_iov[UIO_FASTIOV]; - struct iovec *iov; + /* points to an allocated iov, if NULL we use fast_iov instead */ + struct iovec *free_iov; struct sockaddr __user *uaddr; struct msghdr msg; struct sockaddr_storage addr; @@ -566,6 +712,7 @@ struct io_async_rw { struct iovec fast_iov[UIO_FASTIOV]; const struct iovec *free_iovec; struct iov_iter iter; + struct iov_iter_state iter_state; size_t bytes_done; struct wait_page_queue wpq; }; @@ -578,19 +725,24 @@ enum { REQ_F_FORCE_ASYNC_BIT = IOSQE_ASYNC_BIT, REQ_F_BUFFER_SELECT_BIT = IOSQE_BUFFER_SELECT_BIT, - REQ_F_LINK_HEAD_BIT, - REQ_F_FAIL_LINK_BIT, + /* first byte is taken by user flags, shift it to not overlap */ + REQ_F_FAIL_BIT = 8, REQ_F_INFLIGHT_BIT, REQ_F_CUR_POS_BIT, REQ_F_NOWAIT_BIT, REQ_F_LINK_TIMEOUT_BIT, - REQ_F_ISREG_BIT, REQ_F_NEED_CLEANUP_BIT, REQ_F_POLLED_BIT, REQ_F_BUFFER_SELECTED_BIT, - REQ_F_NO_FILE_TABLE_BIT, - REQ_F_WORK_INITIALIZED_BIT, - REQ_F_LTIMEOUT_ACTIVE_BIT, + REQ_F_COMPLETE_INLINE_BIT, + REQ_F_REISSUE_BIT, + REQ_F_CREDS_BIT, + REQ_F_REFCOUNT_BIT, + REQ_F_ARM_LTIMEOUT_BIT, + /* keep async read/write and isreg together and in order */ + REQ_F_NOWAIT_READ_BIT, + REQ_F_NOWAIT_WRITE_BIT, + REQ_F_ISREG_BIT, /* not a real bit, just to check we're not overflowing the space */ __REQ_F_LAST_BIT, @@ -610,11 +762,9 @@ enum { /* IOSQE_BUFFER_SELECT */ REQ_F_BUFFER_SELECT = BIT(REQ_F_BUFFER_SELECT_BIT), - /* head of a link */ - REQ_F_LINK_HEAD = BIT(REQ_F_LINK_HEAD_BIT), /* fail rest of links */ - REQ_F_FAIL_LINK = BIT(REQ_F_FAIL_LINK_BIT), - /* on inflight list */ + REQ_F_FAIL = BIT(REQ_F_FAIL_BIT), + /* on inflight list, should be cancelled and waited on exit reliably */ REQ_F_INFLIGHT = BIT(REQ_F_INFLIGHT_BIT), /* read/write uses file position */ REQ_F_CUR_POS = BIT(REQ_F_CUR_POS_BIT), @@ -622,20 +772,28 @@ enum { REQ_F_NOWAIT = BIT(REQ_F_NOWAIT_BIT), /* has or had linked timeout */ REQ_F_LINK_TIMEOUT = BIT(REQ_F_LINK_TIMEOUT_BIT), - /* regular file */ - REQ_F_ISREG = BIT(REQ_F_ISREG_BIT), /* needs cleanup */ REQ_F_NEED_CLEANUP = BIT(REQ_F_NEED_CLEANUP_BIT), /* already went through poll handler */ REQ_F_POLLED = BIT(REQ_F_POLLED_BIT), /* buffer already selected */ REQ_F_BUFFER_SELECTED = BIT(REQ_F_BUFFER_SELECTED_BIT), - /* doesn't need file table for this request */ - REQ_F_NO_FILE_TABLE = BIT(REQ_F_NO_FILE_TABLE_BIT), - /* io_wq_work is initialized */ - REQ_F_WORK_INITIALIZED = BIT(REQ_F_WORK_INITIALIZED_BIT), - /* linked timeout is active, i.e. prepared by link's head */ - REQ_F_LTIMEOUT_ACTIVE = BIT(REQ_F_LTIMEOUT_ACTIVE_BIT), + /* completion is deferred through io_comp_state */ + REQ_F_COMPLETE_INLINE = BIT(REQ_F_COMPLETE_INLINE_BIT), + /* caller should reissue async */ + REQ_F_REISSUE = BIT(REQ_F_REISSUE_BIT), + /* supports async reads */ + REQ_F_NOWAIT_READ = BIT(REQ_F_NOWAIT_READ_BIT), + /* supports async writes */ + REQ_F_NOWAIT_WRITE = BIT(REQ_F_NOWAIT_WRITE_BIT), + /* regular file */ + REQ_F_ISREG = BIT(REQ_F_ISREG_BIT), + /* has creds assigned */ + REQ_F_CREDS = BIT(REQ_F_CREDS_BIT), + /* skip refcounting if not set */ + REQ_F_REFCOUNT = BIT(REQ_F_REFCOUNT_BIT), + /* there is a linked timeout that has to be armed */ + REQ_F_ARM_LTIMEOUT = BIT(REQ_F_ARM_LTIMEOUT_BIT), }; struct async_poll { @@ -643,6 +801,21 @@ struct async_poll { struct io_poll_iocb *double_poll; }; +typedef void (*io_req_tw_func_t)(struct io_kiocb *req, bool *locked); + +struct io_task_work { + union { + struct io_wq_work_node node; + struct llist_node fallback_node; + }; + io_req_tw_func_t func; +}; + +enum { + IORING_RSRC_FILE = 0, + IORING_RSRC_BUFFER = 1, +}; + /* * NOTE! Each of the iocb union members has the file pointer * as the first entry in their struct definition. So you can @@ -654,6 +827,7 @@ struct io_kiocb { struct file *file; struct io_rw rw; struct io_poll_iocb poll; + struct io_poll_update poll_update; struct io_accept accept; struct io_sync sync; struct io_cancel cancel; @@ -663,13 +837,19 @@ struct io_kiocb { struct io_sr_msg sr_msg; struct io_open open; struct io_close close; - struct io_files_update files_update; + struct io_rsrc_update rsrc_update; struct io_fadvise fadvise; struct io_madvise madvise; struct io_epoll epoll; struct io_splice splice; struct io_provide_buf pbuf; struct io_statx statx; + struct io_shutdown shutdown; + struct io_rename rename; + struct io_unlink unlink; + struct io_mkdir mkdir; + struct io_symlink symlink; + struct io_hardlink hardlink; /* use only after cleaning per-op data, see io_clean_op() */ struct io_completion compl; }; @@ -685,26 +865,33 @@ struct io_kiocb { struct io_ring_ctx *ctx; unsigned int flags; - refcount_t refs; + atomic_t refs; struct task_struct *task; u64 user_data; - struct list_head link_list; + struct io_kiocb *link; + struct percpu_ref *fixed_rsrc_refs; - /* - * 1. used with ctx->iopoll_list with reads/writes - * 2. to track reqs with ->files (see io_op_def::file_table) - */ + /* used with ctx->iopoll_list with reads/writes */ struct list_head inflight_entry; - - struct list_head iopoll_entry; - - struct percpu_ref *fixed_file_refs; - struct callback_head task_work; + struct io_task_work io_task_work; /* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */ struct hlist_node hash_node; struct async_poll *apoll; struct io_wq_work work; + const struct cred *creds; + + /* store used ubuf, so we can prevent reloading */ + struct io_mapped_ubuf *imu; + /* stores selected buf, valid IFF REQ_F_BUFFER_SELECTED is set */ + struct io_buffer *kbuf; + atomic_t poll_refs; +}; + +struct io_tctx_node { + struct list_head ctx_node; + struct task_struct *task; + struct io_ring_ctx *ctx; }; struct io_defer_entry { @@ -713,42 +900,9 @@ struct io_defer_entry { u32 seq; }; -#define IO_IOPOLL_BATCH 8 - -struct io_comp_state { - unsigned int nr; - struct list_head list; - struct io_ring_ctx *ctx; -}; - -struct io_submit_state { - struct blk_plug plug; - - /* - * io_kiocb alloc cache - */ - void *reqs[IO_IOPOLL_BATCH]; - unsigned int free_reqs; - - /* - * Batch completion logic - */ - struct io_comp_state comp; - - /* - * File reference cache - */ - struct file *file; - unsigned int fd; - unsigned int has_refs; - unsigned int ios_left; -}; - struct io_op_def { /* needs req->file assigned */ unsigned needs_file : 1; - /* don't fail if file grab fails */ - unsigned needs_file_no_error : 1; /* hash wq insertion if file is a regular file */ unsigned hash_reg_file : 1; /* unbound wq insertion if file is a non-regular file */ @@ -760,11 +914,12 @@ struct io_op_def { unsigned pollout : 1; /* op supports buffer selection */ unsigned buffer_select : 1; - /* must always have async data allocated */ - unsigned needs_async_data : 1; + /* do prep async if is going to be punted */ + unsigned needs_async_setup : 1; + /* should block plug */ + unsigned plug : 1; /* size of async data needed, if any */ unsigned short async_size; - unsigned work_flags; }; static const struct io_op_def io_op_defs[] = { @@ -774,41 +929,36 @@ static const struct io_op_def io_op_defs[] = { .unbound_nonreg_file = 1, .pollin = 1, .buffer_select = 1, - .needs_async_data = 1, + .needs_async_setup = 1, + .plug = 1, .async_size = sizeof(struct io_async_rw), - .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG | - IO_WQ_WORK_FILES, }, [IORING_OP_WRITEV] = { .needs_file = 1, .hash_reg_file = 1, .unbound_nonreg_file = 1, .pollout = 1, - .needs_async_data = 1, + .needs_async_setup = 1, + .plug = 1, .async_size = sizeof(struct io_async_rw), - .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG | - IO_WQ_WORK_FSIZE | IO_WQ_WORK_FILES, }, [IORING_OP_FSYNC] = { .needs_file = 1, - .work_flags = IO_WQ_WORK_BLKCG, }, [IORING_OP_READ_FIXED] = { .needs_file = 1, .unbound_nonreg_file = 1, .pollin = 1, + .plug = 1, .async_size = sizeof(struct io_async_rw), - .work_flags = IO_WQ_WORK_BLKCG | IO_WQ_WORK_MM | - IO_WQ_WORK_FILES, }, [IORING_OP_WRITE_FIXED] = { .needs_file = 1, .hash_reg_file = 1, .unbound_nonreg_file = 1, .pollout = 1, + .plug = 1, .async_size = sizeof(struct io_async_rw), - .work_flags = IO_WQ_WORK_BLKCG | IO_WQ_WORK_FSIZE | - IO_WQ_WORK_MM | IO_WQ_WORK_FILES, }, [IORING_OP_POLL_ADD] = { .needs_file = 1, @@ -817,126 +967,91 @@ static const struct io_op_def io_op_defs[] = { [IORING_OP_POLL_REMOVE] = {}, [IORING_OP_SYNC_FILE_RANGE] = { .needs_file = 1, - .work_flags = IO_WQ_WORK_BLKCG, }, [IORING_OP_SENDMSG] = { .needs_file = 1, .unbound_nonreg_file = 1, .pollout = 1, - .needs_async_data = 1, + .needs_async_setup = 1, .async_size = sizeof(struct io_async_msghdr), - .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG | - IO_WQ_WORK_FS, }, [IORING_OP_RECVMSG] = { .needs_file = 1, .unbound_nonreg_file = 1, .pollin = 1, .buffer_select = 1, - .needs_async_data = 1, + .needs_async_setup = 1, .async_size = sizeof(struct io_async_msghdr), - .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG | - IO_WQ_WORK_FS, }, [IORING_OP_TIMEOUT] = { - .needs_async_data = 1, .async_size = sizeof(struct io_timeout_data), - .work_flags = IO_WQ_WORK_MM, }, - [IORING_OP_TIMEOUT_REMOVE] = {}, + [IORING_OP_TIMEOUT_REMOVE] = { + /* used by timeout updates' prep() */ + }, [IORING_OP_ACCEPT] = { .needs_file = 1, .unbound_nonreg_file = 1, .pollin = 1, - .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_FILES, }, [IORING_OP_ASYNC_CANCEL] = {}, [IORING_OP_LINK_TIMEOUT] = { - .needs_async_data = 1, .async_size = sizeof(struct io_timeout_data), - .work_flags = IO_WQ_WORK_MM, }, [IORING_OP_CONNECT] = { .needs_file = 1, .unbound_nonreg_file = 1, .pollout = 1, - .needs_async_data = 1, + .needs_async_setup = 1, .async_size = sizeof(struct io_async_connect), - .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_FS, }, [IORING_OP_FALLOCATE] = { .needs_file = 1, - .work_flags = IO_WQ_WORK_BLKCG | IO_WQ_WORK_FSIZE, - }, - [IORING_OP_OPENAT] = { - .work_flags = IO_WQ_WORK_FILES | IO_WQ_WORK_BLKCG | - IO_WQ_WORK_FS, - }, - [IORING_OP_CLOSE] = { - .needs_file = 1, - .needs_file_no_error = 1, - .work_flags = IO_WQ_WORK_FILES | IO_WQ_WORK_BLKCG, - }, - [IORING_OP_FILES_UPDATE] = { - .work_flags = IO_WQ_WORK_FILES | IO_WQ_WORK_MM, - }, - [IORING_OP_STATX] = { - .work_flags = IO_WQ_WORK_FILES | IO_WQ_WORK_MM | - IO_WQ_WORK_FS | IO_WQ_WORK_BLKCG, }, + [IORING_OP_OPENAT] = {}, + [IORING_OP_CLOSE] = {}, + [IORING_OP_FILES_UPDATE] = {}, + [IORING_OP_STATX] = {}, [IORING_OP_READ] = { .needs_file = 1, .unbound_nonreg_file = 1, .pollin = 1, .buffer_select = 1, + .plug = 1, .async_size = sizeof(struct io_async_rw), - .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG | - IO_WQ_WORK_FILES, }, [IORING_OP_WRITE] = { .needs_file = 1, .hash_reg_file = 1, .unbound_nonreg_file = 1, .pollout = 1, + .plug = 1, .async_size = sizeof(struct io_async_rw), - .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG | - IO_WQ_WORK_FSIZE | IO_WQ_WORK_FILES, }, [IORING_OP_FADVISE] = { .needs_file = 1, - .work_flags = IO_WQ_WORK_BLKCG, - }, - [IORING_OP_MADVISE] = { - .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG, }, + [IORING_OP_MADVISE] = {}, [IORING_OP_SEND] = { .needs_file = 1, .unbound_nonreg_file = 1, .pollout = 1, - .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG | - IO_WQ_WORK_FS, }, [IORING_OP_RECV] = { .needs_file = 1, .unbound_nonreg_file = 1, .pollin = 1, .buffer_select = 1, - .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG | - IO_WQ_WORK_FS, }, [IORING_OP_OPENAT2] = { - .work_flags = IO_WQ_WORK_FILES | IO_WQ_WORK_FS | - IO_WQ_WORK_BLKCG, }, [IORING_OP_EPOLL_CTL] = { .unbound_nonreg_file = 1, - .work_flags = IO_WQ_WORK_FILES, }, [IORING_OP_SPLICE] = { .needs_file = 1, .hash_reg_file = 1, .unbound_nonreg_file = 1, - .work_flags = IO_WQ_WORK_BLKCG | IO_WQ_WORK_FILES, }, [IORING_OP_PROVIDE_BUFFERS] = {}, [IORING_OP_REMOVE_BUFFERS] = {}, @@ -945,43 +1060,47 @@ static const struct io_op_def io_op_defs[] = { .hash_reg_file = 1, .unbound_nonreg_file = 1, }, + [IORING_OP_SHUTDOWN] = { + .needs_file = 1, + }, + [IORING_OP_RENAMEAT] = {}, + [IORING_OP_UNLINKAT] = {}, }; -enum io_mem_account { - ACCT_LOCKED, - ACCT_PINNED, -}; +/* requests with any of those set should undergo io_disarm_next() */ +#define IO_DISARM_MASK (REQ_F_ARM_LTIMEOUT | REQ_F_LINK_TIMEOUT | REQ_F_FAIL) -static void destroy_fixed_file_ref_node(struct fixed_file_ref_node *ref_node); -static struct fixed_file_ref_node *alloc_fixed_file_ref_node( - struct io_ring_ctx *ctx); +static bool io_disarm_next(struct io_kiocb *req); +static void io_uring_del_tctx_node(unsigned long index); +static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx, + struct task_struct *task, + bool cancel_all); +static void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd); + +static void io_fill_cqe_req(struct io_kiocb *req, s32 res, u32 cflags); -static void __io_complete_rw(struct io_kiocb *req, long res, long res2, - struct io_comp_state *cs); -static void io_cqring_fill_event(struct io_kiocb *req, long res); static void io_put_req(struct io_kiocb *req); -static void io_put_req_deferred(struct io_kiocb *req, int nr); -static void io_double_put_req(struct io_kiocb *req); -static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req); -static void __io_queue_linked_timeout(struct io_kiocb *req); +static void io_put_req_deferred(struct io_kiocb *req); +static void io_dismantle_req(struct io_kiocb *req); static void io_queue_linked_timeout(struct io_kiocb *req); -static int __io_sqe_files_update(struct io_ring_ctx *ctx, - struct io_uring_files_update *ip, - unsigned nr_args); -static void __io_clean_op(struct io_kiocb *req); -static struct file *io_file_get(struct io_submit_state *state, +static int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type, + struct io_uring_rsrc_update2 *up, + unsigned nr_args); +static void io_clean_op(struct io_kiocb *req); +static struct file *io_file_get(struct io_ring_ctx *ctx, struct io_kiocb *req, int fd, bool fixed); -static void __io_queue_sqe(struct io_kiocb *req, struct io_comp_state *cs); -static void io_file_put_work(struct work_struct *work); +static void __io_queue_sqe(struct io_kiocb *req); +static void io_rsrc_put_work(struct work_struct *work); -static ssize_t io_import_iovec(int rw, struct io_kiocb *req, - struct iovec **iovec, struct iov_iter *iter, - bool needs_lock); -static int io_setup_async_rw(struct io_kiocb *req, const struct iovec *iovec, - const struct iovec *fast_iov, - struct iov_iter *iter, bool force); -static void io_req_drop_files(struct io_kiocb *req); static void io_req_task_queue(struct io_kiocb *req); +static void io_submit_flush_completions(struct io_ring_ctx *ctx); +static int io_req_prep_async(struct io_kiocb *req); + +static int io_install_fixed_file(struct io_kiocb *req, struct file *file, + unsigned int issue_flags, u32 slot_index); +static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags); + +static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer); static struct kmem_cache *req_cachep; @@ -1000,21 +1119,67 @@ struct sock *io_uring_get_socket(struct file *file) } EXPORT_SYMBOL(io_uring_get_socket); -static inline void io_clean_op(struct io_kiocb *req) +static inline void io_tw_lock(struct io_ring_ctx *ctx, bool *locked) { - if (req->flags & (REQ_F_NEED_CLEANUP | REQ_F_BUFFER_SELECTED)) - __io_clean_op(req); + if (!*locked) { + mutex_lock(&ctx->uring_lock); + *locked = true; + } } -static inline bool __io_match_files(struct io_kiocb *req, - struct files_struct *files) +#define io_for_each_link(pos, head) \ + for (pos = (head); pos; pos = pos->link) + +/* + * Shamelessly stolen from the mm implementation of page reference checking, + * see commit f958d7b528b1 for details. + */ +#define req_ref_zero_or_close_to_overflow(req) \ + ((unsigned int) atomic_read(&(req->refs)) + 127u <= 127u) + +static inline bool req_ref_inc_not_zero(struct io_kiocb *req) { - if (req->file && req->file->f_op == &io_uring_fops) + WARN_ON_ONCE(!(req->flags & REQ_F_REFCOUNT)); + return atomic_inc_not_zero(&req->refs); +} + +static inline bool req_ref_put_and_test(struct io_kiocb *req) +{ + if (likely(!(req->flags & REQ_F_REFCOUNT))) return true; - return ((req->flags & REQ_F_WORK_INITIALIZED) && - (req->work.flags & IO_WQ_WORK_FILES)) && - req->work.identity->files == files; + WARN_ON_ONCE(req_ref_zero_or_close_to_overflow(req)); + return atomic_dec_and_test(&req->refs); +} + +static inline void req_ref_get(struct io_kiocb *req) +{ + WARN_ON_ONCE(!(req->flags & REQ_F_REFCOUNT)); + WARN_ON_ONCE(req_ref_zero_or_close_to_overflow(req)); + atomic_inc(&req->refs); +} + +static inline void __io_req_set_refcount(struct io_kiocb *req, int nr) +{ + if (!(req->flags & REQ_F_REFCOUNT)) { + req->flags |= REQ_F_REFCOUNT; + atomic_set(&req->refs, nr); + } +} + +static inline void io_req_set_refcount(struct io_kiocb *req) +{ + __io_req_set_refcount(req, 1); +} + +static inline void io_req_set_rsrc_node(struct io_kiocb *req) +{ + struct io_ring_ctx *ctx = req->ctx; + + if (!req->fixed_rsrc_refs) { + req->fixed_rsrc_refs = &ctx->rsrc_node->refs; + percpu_ref_get(req->fixed_rsrc_refs); + } } static void io_refs_resurrect(struct percpu_ref *ref, struct completion *compl) @@ -1029,157 +1194,71 @@ static void io_refs_resurrect(struct percpu_ref *ref, struct completion *compl) percpu_ref_put(ref); } -static bool io_match_task(struct io_kiocb *head, - struct task_struct *task, - struct files_struct *files) +static bool io_match_task(struct io_kiocb *head, struct task_struct *task, + bool cancel_all) + __must_hold(&req->ctx->timeout_lock) { - struct io_kiocb *link; + struct io_kiocb *req; - if (task && head->task != task) { - /* in terms of cancelation, always match if req task is dead */ - if (head->task->flags & PF_EXITING) - return true; + if (task && head->task != task) return false; - } - if (!files) + if (cancel_all) return true; - if (__io_match_files(head, files)) - return true; - if (head->flags & REQ_F_LINK_HEAD) { - list_for_each_entry(link, &head->link_list, link_list) { - if (__io_match_files(link, files)) - return true; - } + + io_for_each_link(req, head) { + if (req->flags & REQ_F_INFLIGHT) + return true; } return false; } - -static void io_sq_thread_drop_mm(void) +static bool io_match_linked(struct io_kiocb *head) { - struct mm_struct *mm = current->mm; + struct io_kiocb *req; - if (mm) { - kthread_unuse_mm(mm); - mmput(mm); - current->mm = NULL; + io_for_each_link(req, head) { + if (req->flags & REQ_F_INFLIGHT) + return true; } -} - -static int __io_sq_thread_acquire_mm(struct io_ring_ctx *ctx) -{ - struct mm_struct *mm; - - if (current->flags & PF_EXITING) - return -EFAULT; - if (current->mm) - return 0; - - /* Should never happen */ - if (unlikely(!(ctx->flags & IORING_SETUP_SQPOLL))) - return -EFAULT; - - task_lock(ctx->sqo_task); - mm = ctx->sqo_task->mm; - if (unlikely(!mm || !mmget_not_zero(mm))) - mm = NULL; - task_unlock(ctx->sqo_task); - - if (mm) { - kthread_use_mm(mm); - return 0; - } - - return -EFAULT; -} - -static int io_sq_thread_acquire_mm(struct io_ring_ctx *ctx, - struct io_kiocb *req) -{ - if (!(io_op_defs[req->opcode].work_flags & IO_WQ_WORK_MM)) - return 0; - return __io_sq_thread_acquire_mm(ctx); -} - -static void io_sq_thread_associate_blkcg(struct io_ring_ctx *ctx, - struct cgroup_subsys_state **cur_css) - -{ -#ifdef CONFIG_BLK_CGROUP - /* puts the old one when swapping */ - if (*cur_css != ctx->sqo_blkcg_css) { - kthread_associate_blkcg(ctx->sqo_blkcg_css); - *cur_css = ctx->sqo_blkcg_css; - } -#endif -} - -static void io_sq_thread_unassociate_blkcg(void) -{ -#ifdef CONFIG_BLK_CGROUP - kthread_associate_blkcg(NULL); -#endif -} - -static inline void req_set_fail_links(struct io_kiocb *req) -{ - if ((req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) == REQ_F_LINK) - req->flags |= REQ_F_FAIL_LINK; + return false; } /* - * None of these are dereferenced, they are simply used to check if any of - * them have changed. If we're under current and check they are still the - * same, we're fine to grab references to them for actual out-of-line use. + * As io_match_task() but protected against racing with linked timeouts. + * User must not hold timeout_lock. */ -static void io_init_identity(struct io_identity *id) +static bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task, + bool cancel_all) { - id->files = current->files; - id->mm = current->mm; -#ifdef CONFIG_BLK_CGROUP - rcu_read_lock(); - id->blkcg_css = blkcg_css(); - rcu_read_unlock(); -#endif - id->creds = current_cred(); - id->nsproxy = current->nsproxy; - id->fs = current->fs; - id->fsize = rlimit(RLIMIT_FSIZE); -#ifdef CONFIG_AUDIT - id->loginuid = current->loginuid; - id->sessionid = current->sessionid; -#endif - refcount_set(&id->count, 1); + bool matched; + + if (task && head->task != task) + return false; + if (cancel_all) + return true; + + if (head->flags & REQ_F_LINK_TIMEOUT) { + struct io_ring_ctx *ctx = head->ctx; + + /* protect against races with linked timeouts */ + spin_lock_irq(&ctx->timeout_lock); + matched = io_match_linked(head); + spin_unlock_irq(&ctx->timeout_lock); + } else { + matched = io_match_linked(head); + } + return matched; } -static inline void __io_req_init_async(struct io_kiocb *req) +static inline void req_set_fail(struct io_kiocb *req) { - memset(&req->work, 0, sizeof(req->work)); - req->flags |= REQ_F_WORK_INITIALIZED; + req->flags |= REQ_F_FAIL; } -/* - * Note: must call io_req_init_async() for the first time you - * touch any members of io_wq_work. - */ -static inline void io_req_init_async(struct io_kiocb *req) +static inline void req_fail_link_node(struct io_kiocb *req, int res) { - struct io_uring_task *tctx = req->task->io_uring; - - if (req->flags & REQ_F_WORK_INITIALIZED) - return; - - __io_req_init_async(req); - - /* Grab a ref if this isn't our static identity */ - req->work.identity = tctx->identity; - if (tctx->identity != &tctx->__identity) - refcount_inc(&req->work.identity->count); -} - -static inline bool io_async_submit(struct io_ring_ctx *ctx) -{ - return ctx->flags & IORING_SETUP_SQPOLL; + req_set_fail(req); + req->result = res; } static void io_ring_ctx_ref_free(struct percpu_ref *ref) @@ -1194,6 +1273,27 @@ static inline bool io_is_timeout_noseq(struct io_kiocb *req) return !req->timeout.off; } +static void io_fallback_req_func(struct work_struct *work) +{ + struct io_ring_ctx *ctx = container_of(work, struct io_ring_ctx, + fallback_work.work); + struct llist_node *node = llist_del_all(&ctx->fallback_llist); + struct io_kiocb *req, *tmp; + bool locked = false; + + percpu_ref_get(&ctx->refs); + llist_for_each_entry_safe(req, tmp, node, io_task_work.fallback_node) + req->io_task_work.func(req, &locked); + + if (locked) { + if (ctx->submit_state.compl_nr) + io_submit_flush_completions(ctx); + mutex_unlock(&ctx->uring_lock); + } + percpu_ref_put(&ctx->refs); + +} + static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) { struct io_ring_ctx *ctx; @@ -1203,10 +1303,6 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) if (!ctx) return NULL; - ctx->fallback_req = kmem_cache_alloc(req_cachep, GFP_KERNEL); - if (!ctx->fallback_req) - goto err; - /* * Use 5 bits less than the max cq entries, that should give us around * 32 entries per hash list if totally full and uniformly spread. @@ -1222,6 +1318,12 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) goto err; __hash_init(ctx->cancel_hash, 1U << hash_bits); + ctx->dummy_ubuf = kzalloc(sizeof(*ctx->dummy_ubuf), GFP_KERNEL); + if (!ctx->dummy_ubuf) + goto err; + /* set invalid range, so io_import_fixed() fails meeting it */ + ctx->dummy_ubuf->ubuf = -1UL; + if (percpu_ref_init(&ctx->refs, io_ring_ctx_ref_free, PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) goto err; @@ -1229,232 +1331,109 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) ctx->flags = p->flags; init_waitqueue_head(&ctx->sqo_sq_wait); INIT_LIST_HEAD(&ctx->sqd_list); - init_waitqueue_head(&ctx->cq_wait); + init_waitqueue_head(&ctx->poll_wait); INIT_LIST_HEAD(&ctx->cq_overflow_list); init_completion(&ctx->ref_comp); - init_completion(&ctx->sq_thread_comp); xa_init_flags(&ctx->io_buffers, XA_FLAGS_ALLOC1); xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1); mutex_init(&ctx->uring_lock); - init_waitqueue_head(&ctx->wait); + init_waitqueue_head(&ctx->cq_wait); spin_lock_init(&ctx->completion_lock); + spin_lock_init(&ctx->timeout_lock); INIT_LIST_HEAD(&ctx->iopoll_list); INIT_LIST_HEAD(&ctx->defer_list); INIT_LIST_HEAD(&ctx->timeout_list); - spin_lock_init(&ctx->inflight_lock); - INIT_LIST_HEAD(&ctx->inflight_list); - INIT_DELAYED_WORK(&ctx->file_put_work, io_file_put_work); - init_llist_head(&ctx->file_put_llist); + INIT_LIST_HEAD(&ctx->ltimeout_list); + spin_lock_init(&ctx->rsrc_ref_lock); + INIT_LIST_HEAD(&ctx->rsrc_ref_list); + INIT_DELAYED_WORK(&ctx->rsrc_put_work, io_rsrc_put_work); + init_llist_head(&ctx->rsrc_put_llist); + INIT_LIST_HEAD(&ctx->tctx_list); + INIT_LIST_HEAD(&ctx->submit_state.free_list); + INIT_LIST_HEAD(&ctx->locked_free_list); + INIT_DELAYED_WORK(&ctx->fallback_work, io_fallback_req_func); return ctx; err: - if (ctx->fallback_req) - kmem_cache_free(req_cachep, ctx->fallback_req); + kfree(ctx->dummy_ubuf); kfree(ctx->cancel_hash); kfree(ctx); return NULL; } +static void io_account_cq_overflow(struct io_ring_ctx *ctx) +{ + struct io_rings *r = ctx->rings; + + WRITE_ONCE(r->cq_overflow, READ_ONCE(r->cq_overflow) + 1); + ctx->cq_extra--; +} + static bool req_need_defer(struct io_kiocb *req, u32 seq) { if (unlikely(req->flags & REQ_F_IO_DRAIN)) { struct io_ring_ctx *ctx = req->ctx; - return seq != ctx->cached_cq_tail - + READ_ONCE(ctx->cached_cq_overflow); + return seq + READ_ONCE(ctx->cq_extra) != ctx->cached_cq_tail; } return false; } -static void __io_commit_cqring(struct io_ring_ctx *ctx) -{ - struct io_rings *rings = ctx->rings; - - /* order cqe stores with ring update */ - smp_store_release(&rings->cq.tail, ctx->cached_cq_tail); -} - -static void io_put_identity(struct io_uring_task *tctx, struct io_kiocb *req) -{ - if (req->work.identity == &tctx->__identity) - return; - if (refcount_dec_and_test(&req->work.identity->count)) - kfree(req->work.identity); -} - -static void io_req_clean_work(struct io_kiocb *req) -{ - if (!(req->flags & REQ_F_WORK_INITIALIZED)) - return; - - req->flags &= ~REQ_F_WORK_INITIALIZED; - - if (req->work.flags & IO_WQ_WORK_MM) { - mmdrop(req->work.identity->mm); - req->work.flags &= ~IO_WQ_WORK_MM; - } -#ifdef CONFIG_BLK_CGROUP - if (req->work.flags & IO_WQ_WORK_BLKCG) { - css_put(req->work.identity->blkcg_css); - req->work.flags &= ~IO_WQ_WORK_BLKCG; - } +#define FFS_ASYNC_READ 0x1UL +#define FFS_ASYNC_WRITE 0x2UL +#ifdef CONFIG_64BIT +#define FFS_ISREG 0x4UL +#else +#define FFS_ISREG 0x0UL #endif - if (req->work.flags & IO_WQ_WORK_CREDS) { - put_cred(req->work.identity->creds); - req->work.flags &= ~IO_WQ_WORK_CREDS; - } - if (req->work.flags & IO_WQ_WORK_FS) { - struct fs_struct *fs = req->work.identity->fs; +#define FFS_MASK ~(FFS_ASYNC_READ|FFS_ASYNC_WRITE|FFS_ISREG) - spin_lock(&req->work.identity->fs->lock); - if (--fs->users) - fs = NULL; - spin_unlock(&req->work.identity->fs->lock); - if (fs) - free_fs_struct(fs); - req->work.flags &= ~IO_WQ_WORK_FS; - } - if (req->flags & REQ_F_INFLIGHT) - io_req_drop_files(req); - - io_put_identity(req->task->io_uring, req); +static inline bool io_req_ffs_set(struct io_kiocb *req) +{ + return IS_ENABLED(CONFIG_64BIT) && (req->flags & REQ_F_FIXED_FILE); } -/* - * Create a private copy of io_identity, since some fields don't match - * the current context. - */ -static bool io_identity_cow(struct io_kiocb *req) +static void io_req_track_inflight(struct io_kiocb *req) { - struct io_uring_task *tctx = req->task->io_uring; - const struct cred *creds = NULL; - struct io_identity *id; - - if (req->work.flags & IO_WQ_WORK_CREDS) - creds = req->work.identity->creds; - - id = kmemdup(req->work.identity, sizeof(*id), GFP_KERNEL); - if (unlikely(!id)) { - req->work.flags |= IO_WQ_WORK_CANCEL; - return false; + if (!(req->flags & REQ_F_INFLIGHT)) { + req->flags |= REQ_F_INFLIGHT; + atomic_inc(&req->task->io_uring->inflight_tracked); } - - /* - * We can safely just re-init the creds we copied Either the field - * matches the current one, or we haven't grabbed it yet. The only - * exception is ->creds, through registered personalities, so handle - * that one separately. - */ - io_init_identity(id); - if (creds) - id->creds = creds; - - /* add one for this request */ - refcount_inc(&id->count); - - /* drop tctx and req identity references, if needed */ - if (tctx->identity != &tctx->__identity && - refcount_dec_and_test(&tctx->identity->count)) - kfree(tctx->identity); - if (req->work.identity != &tctx->__identity && - refcount_dec_and_test(&req->work.identity->count)) - kfree(req->work.identity); - - req->work.identity = id; - tctx->identity = id; - return true; } -static bool io_grab_identity(struct io_kiocb *req) +static struct io_kiocb *__io_prep_linked_timeout(struct io_kiocb *req) { - const struct io_op_def *def = &io_op_defs[req->opcode]; - struct io_identity *id = req->work.identity; - struct io_ring_ctx *ctx = req->ctx; + if (WARN_ON_ONCE(!req->link)) + return NULL; - if (def->work_flags & IO_WQ_WORK_FSIZE) { - if (id->fsize != rlimit(RLIMIT_FSIZE)) - return false; - req->work.flags |= IO_WQ_WORK_FSIZE; - } -#ifdef CONFIG_BLK_CGROUP - if (!(req->work.flags & IO_WQ_WORK_BLKCG) && - (def->work_flags & IO_WQ_WORK_BLKCG)) { - rcu_read_lock(); - if (id->blkcg_css != blkcg_css()) { - rcu_read_unlock(); - return false; - } - /* - * This should be rare, either the cgroup is dying or the task - * is moving cgroups. Just punt to root for the handful of ios. - */ - if (css_tryget_online(id->blkcg_css)) - req->work.flags |= IO_WQ_WORK_BLKCG; - rcu_read_unlock(); - } -#endif - if (!(req->work.flags & IO_WQ_WORK_CREDS)) { - if (id->creds != current_cred()) - return false; - get_cred(id->creds); - req->work.flags |= IO_WQ_WORK_CREDS; - } -#ifdef CONFIG_AUDIT - if (!uid_eq(current->loginuid, id->loginuid) || - current->sessionid != id->sessionid) - return false; -#endif - if (!(req->work.flags & IO_WQ_WORK_FS) && - (def->work_flags & IO_WQ_WORK_FS)) { - if (current->fs != id->fs) - return false; - spin_lock(&id->fs->lock); - if (!id->fs->in_exec) { - id->fs->users++; - req->work.flags |= IO_WQ_WORK_FS; - } else { - req->work.flags |= IO_WQ_WORK_CANCEL; - } - spin_unlock(¤t->fs->lock); - } - if (!(req->work.flags & IO_WQ_WORK_FILES) && - (def->work_flags & IO_WQ_WORK_FILES) && - !(req->flags & REQ_F_NO_FILE_TABLE)) { - if (id->files != current->files || - id->nsproxy != current->nsproxy) - return false; - atomic_inc(&id->files->count); - get_nsproxy(id->nsproxy); + req->flags &= ~REQ_F_ARM_LTIMEOUT; + req->flags |= REQ_F_LINK_TIMEOUT; - if (!(req->flags & REQ_F_INFLIGHT)) { - req->flags |= REQ_F_INFLIGHT; + /* linked timeouts should have two refs once prep'ed */ + io_req_set_refcount(req); + __io_req_set_refcount(req->link, 2); + return req->link; +} - spin_lock_irq(&ctx->inflight_lock); - list_add(&req->inflight_entry, &ctx->inflight_list); - spin_unlock_irq(&ctx->inflight_lock); - } - req->work.flags |= IO_WQ_WORK_FILES; - } - if (!(req->work.flags & IO_WQ_WORK_MM) && - (def->work_flags & IO_WQ_WORK_MM)) { - if (id->mm != current->mm) - return false; - mmgrab(id->mm); - req->work.flags |= IO_WQ_WORK_MM; - } - - return true; +static inline struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req) +{ + if (likely(!(req->flags & REQ_F_ARM_LTIMEOUT))) + return NULL; + return __io_prep_linked_timeout(req); } static void io_prep_async_work(struct io_kiocb *req) { const struct io_op_def *def = &io_op_defs[req->opcode]; struct io_ring_ctx *ctx = req->ctx; - struct io_identity *id; - io_req_init_async(req); - id = req->work.identity; + if (!(req->flags & REQ_F_CREDS)) { + req->flags |= REQ_F_CREDS; + req->creds = get_current_cred(); + } + req->work.list.next = NULL; + req->work.flags = 0; if (req->flags & REQ_F_FORCE_ASYNC) req->work.flags |= IO_WQ_WORK_CONCURRENT; @@ -1465,92 +1444,77 @@ static void io_prep_async_work(struct io_kiocb *req) if (def->unbound_nonreg_file) req->work.flags |= IO_WQ_WORK_UNBOUND; } - - /* if we fail grabbing identity, we must COW, regrab, and retry */ - if (io_grab_identity(req)) - return; - - if (!io_identity_cow(req)) - return; - - /* can't fail at this point */ - if (!io_grab_identity(req)) - WARN_ON(1); } static void io_prep_async_link(struct io_kiocb *req) { struct io_kiocb *cur; - io_prep_async_work(req); - if (req->flags & REQ_F_LINK_HEAD) - list_for_each_entry(cur, &req->link_list, link_list) + if (req->flags & REQ_F_LINK_TIMEOUT) { + struct io_ring_ctx *ctx = req->ctx; + + spin_lock_irq(&ctx->timeout_lock); + io_for_each_link(cur, req) io_prep_async_work(cur); + spin_unlock_irq(&ctx->timeout_lock); + } else { + io_for_each_link(cur, req) + io_prep_async_work(cur); + } } -static struct io_kiocb *__io_queue_async_work(struct io_kiocb *req) +static void io_queue_async_work(struct io_kiocb *req, bool *locked) { struct io_ring_ctx *ctx = req->ctx; struct io_kiocb *link = io_prep_linked_timeout(req); + struct io_uring_task *tctx = req->task->io_uring; - trace_io_uring_queue_async_work(ctx, io_wq_is_hashed(&req->work), req, - &req->work, req->flags); - io_wq_enqueue(ctx->io_wq, &req->work); - return link; -} + /* must not take the lock, NULL it as a precaution */ + locked = NULL; -static void io_queue_async_work(struct io_kiocb *req) -{ - struct io_kiocb *link; + BUG_ON(!tctx); + BUG_ON(!tctx->io_wq); /* init ->work of the whole link before punting */ io_prep_async_link(req); - link = __io_queue_async_work(req); + /* + * Not expected to happen, but if we do have a bug where this _can_ + * happen, catch it here and ensure the request is marked as + * canceled. That will make io-wq go through the usual work cancel + * procedure rather than attempt to run this request (or create a new + * worker for it). + */ + if (WARN_ON_ONCE(!same_thread_group(req->task, current))) + req->work.flags |= IO_WQ_WORK_CANCEL; + + trace_io_uring_queue_async_work(ctx, io_wq_is_hashed(&req->work), req, + &req->work, req->flags); + io_wq_enqueue(tctx->io_wq, &req->work); if (link) io_queue_linked_timeout(link); } static void io_kill_timeout(struct io_kiocb *req, int status) + __must_hold(&req->ctx->completion_lock) + __must_hold(&req->ctx->timeout_lock) { struct io_timeout_data *io = req->async_data; - int ret; - ret = hrtimer_try_to_cancel(&io->timer); - if (ret != -1) { + if (hrtimer_try_to_cancel(&io->timer) != -1) { if (status) - req_set_fail_links(req); + req_set_fail(req); atomic_set(&req->ctx->cq_timeouts, atomic_read(&req->ctx->cq_timeouts) + 1); list_del_init(&req->timeout.list); - io_cqring_fill_event(req, status); - io_put_req_deferred(req, 1); + io_fill_cqe_req(req, status, 0); + io_put_req_deferred(req); } } -/* - * Returns true if we found and killed one or more timeouts - */ -static bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk, - struct files_struct *files) +static void io_queue_deferred(struct io_ring_ctx *ctx) { - struct io_kiocb *req, *tmp; - int canceled = 0; - - spin_lock_irq(&ctx->completion_lock); - list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) { - if (io_match_task(req, tsk, files)) { - io_kill_timeout(req, -ECANCELED); - canceled++; - } - } - spin_unlock_irq(&ctx->completion_lock); - return canceled != 0; -} - -static void __io_queue_deferred(struct io_ring_ctx *ctx) -{ - do { + while (!list_empty(&ctx->defer_list)) { struct io_defer_entry *de = list_first_entry(&ctx->defer_list, struct io_defer_entry, list); @@ -1559,19 +1523,16 @@ static void __io_queue_deferred(struct io_ring_ctx *ctx) list_del_init(&de->list); io_req_task_queue(de->req); kfree(de); - } while (!list_empty(&ctx->defer_list)); + } } static void io_flush_timeouts(struct io_ring_ctx *ctx) + __must_hold(&ctx->completion_lock) { + u32 seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts); struct io_kiocb *req, *tmp; - u32 seq; - - if (list_empty(&ctx->timeout_list)) - return; - - seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts); + spin_lock_irq(&ctx->timeout_lock); list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) { u32 events_needed, events_got; @@ -1592,441 +1553,564 @@ static void io_flush_timeouts(struct io_ring_ctx *ctx) io_kill_timeout(req, 0); } - ctx->cq_last_tm_flush = seq; + spin_unlock_irq(&ctx->timeout_lock); } -static void io_commit_cqring(struct io_ring_ctx *ctx) +static void __io_commit_cqring_flush(struct io_ring_ctx *ctx) { - io_flush_timeouts(ctx); - __io_commit_cqring(ctx); + if (ctx->off_timeout_used) + io_flush_timeouts(ctx); + if (ctx->drain_active) + io_queue_deferred(ctx); +} - if (unlikely(!list_empty(&ctx->defer_list))) - __io_queue_deferred(ctx); +static inline void io_commit_cqring(struct io_ring_ctx *ctx) +{ + if (unlikely(ctx->off_timeout_used || ctx->drain_active)) + __io_commit_cqring_flush(ctx); + /* order cqe stores with ring update */ + smp_store_release(&ctx->rings->cq.tail, ctx->cached_cq_tail); } static inline bool io_sqring_full(struct io_ring_ctx *ctx) { struct io_rings *r = ctx->rings; - return READ_ONCE(r->sq.tail) - ctx->cached_sq_head == r->sq_ring_entries; + return READ_ONCE(r->sq.tail) - ctx->cached_sq_head == ctx->sq_entries; } -static struct io_uring_cqe *io_get_cqring(struct io_ring_ctx *ctx) +static inline unsigned int __io_cqring_events(struct io_ring_ctx *ctx) +{ + return ctx->cached_cq_tail - READ_ONCE(ctx->rings->cq.head); +} + +static inline struct io_uring_cqe *io_get_cqe(struct io_ring_ctx *ctx) { struct io_rings *rings = ctx->rings; - unsigned tail; + unsigned tail, mask = ctx->cq_entries - 1; - tail = ctx->cached_cq_tail; /* * writes to the cq entry need to come after reading head; the * control dependency is enough as we're using WRITE_ONCE to * fill the cq entry */ - if (tail - READ_ONCE(rings->cq.head) == rings->cq_ring_entries) + if (__io_cqring_events(ctx) == ctx->cq_entries) return NULL; - ctx->cached_cq_tail++; - return &rings->cqes[tail & ctx->cq_mask]; + tail = ctx->cached_cq_tail++; + return &rings->cqes[tail & mask]; } static inline bool io_should_trigger_evfd(struct io_ring_ctx *ctx) { - if (!ctx->cq_ev_fd) + if (likely(!ctx->cq_ev_fd)) return false; if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED) return false; - if (!ctx->eventfd_async) - return true; - return io_wq_current_is_worker(); + return !ctx->eventfd_async || io_wq_current_is_worker(); } +/* + * This should only get called when at least one event has been posted. + * Some applications rely on the eventfd notification count only changing + * IFF a new CQE has been added to the CQ ring. There's no depedency on + * 1:1 relationship between how many times this function is called (and + * hence the eventfd count) and number of CQEs posted to the CQ ring. + */ static void io_cqring_ev_posted(struct io_ring_ctx *ctx) { - if (wq_has_sleeper(&ctx->cq_wait)) { - wake_up_interruptible(&ctx->cq_wait); - kill_fasync(&ctx->cq_fasync, SIGIO, POLL_IN); - } - if (waitqueue_active(&ctx->wait)) - wake_up(&ctx->wait); + /* + * wake_up_all() may seem excessive, but io_wake_function() and + * io_should_wake() handle the termination of the loop and only + * wake as many waiters as we need to. + */ + if (wq_has_sleeper(&ctx->cq_wait)) + wake_up_all(&ctx->cq_wait); if (ctx->sq_data && waitqueue_active(&ctx->sq_data->wait)) wake_up(&ctx->sq_data->wait); if (io_should_trigger_evfd(ctx)) eventfd_signal(ctx->cq_ev_fd, 1); + if (waitqueue_active(&ctx->poll_wait)) + wake_up_interruptible(&ctx->poll_wait); } -static void io_cqring_mark_overflow(struct io_ring_ctx *ctx) +static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx) { - if (list_empty(&ctx->cq_overflow_list)) { - clear_bit(0, &ctx->sq_check_overflow); - clear_bit(0, &ctx->cq_check_overflow); - ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW; + /* see waitqueue_active() comment */ + smp_mb(); + + if (ctx->flags & IORING_SETUP_SQPOLL) { + if (waitqueue_active(&ctx->cq_wait)) + wake_up_all(&ctx->cq_wait); } + if (io_should_trigger_evfd(ctx)) + eventfd_signal(ctx->cq_ev_fd, 1); + if (waitqueue_active(&ctx->poll_wait)) + wake_up_interruptible(&ctx->poll_wait); } /* Returns true if there are no backlogged entries after the flush */ -static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, - struct task_struct *tsk, - struct files_struct *files) +static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force) { - struct io_rings *rings = ctx->rings; - struct io_kiocb *req, *tmp; - struct io_uring_cqe *cqe; - unsigned long flags; - LIST_HEAD(list); + bool all_flushed, posted; - if (!force) { - if ((ctx->cached_cq_tail - READ_ONCE(rings->cq.head) == - rings->cq_ring_entries)) - return false; - } + if (!force && __io_cqring_events(ctx) == ctx->cq_entries) + return false; - spin_lock_irqsave(&ctx->completion_lock, flags); + posted = false; + spin_lock(&ctx->completion_lock); + while (!list_empty(&ctx->cq_overflow_list)) { + struct io_uring_cqe *cqe = io_get_cqe(ctx); + struct io_overflow_cqe *ocqe; - cqe = NULL; - list_for_each_entry_safe(req, tmp, &ctx->cq_overflow_list, compl.list) { - if (!io_match_task(req, tsk, files)) - continue; - - cqe = io_get_cqring(ctx); if (!cqe && !force) break; + ocqe = list_first_entry(&ctx->cq_overflow_list, + struct io_overflow_cqe, list); + if (cqe) + memcpy(cqe, &ocqe->cqe, sizeof(*cqe)); + else + io_account_cq_overflow(ctx); - list_move(&req->compl.list, &list); - if (cqe) { - WRITE_ONCE(cqe->user_data, req->user_data); - WRITE_ONCE(cqe->res, req->result); - WRITE_ONCE(cqe->flags, req->compl.cflags); - } else { - ctx->cached_cq_overflow++; - WRITE_ONCE(ctx->rings->cq_overflow, - ctx->cached_cq_overflow); - } + posted = true; + list_del(&ocqe->list); + kfree(ocqe); } - io_commit_cqring(ctx); - io_cqring_mark_overflow(ctx); - - spin_unlock_irqrestore(&ctx->completion_lock, flags); - io_cqring_ev_posted(ctx); - - while (!list_empty(&list)) { - req = list_first_entry(&list, struct io_kiocb, compl.list); - list_del(&req->compl.list); - io_put_req(req); + all_flushed = list_empty(&ctx->cq_overflow_list); + if (all_flushed) { + clear_bit(0, &ctx->check_cq_overflow); + WRITE_ONCE(ctx->rings->sq_flags, + ctx->rings->sq_flags & ~IORING_SQ_CQ_OVERFLOW); } - return cqe != NULL; + if (posted) + io_commit_cqring(ctx); + spin_unlock(&ctx->completion_lock); + if (posted) + io_cqring_ev_posted(ctx); + return all_flushed; } -static void io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, - struct task_struct *tsk, - struct files_struct *files) +static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx) { - if (test_bit(0, &ctx->cq_check_overflow)) { + bool ret = true; + + if (test_bit(0, &ctx->check_cq_overflow)) { /* iopoll syncs against uring_lock, not completion_lock */ if (ctx->flags & IORING_SETUP_IOPOLL) mutex_lock(&ctx->uring_lock); - __io_cqring_overflow_flush(ctx, force, tsk, files); + ret = __io_cqring_overflow_flush(ctx, false); if (ctx->flags & IORING_SETUP_IOPOLL) mutex_unlock(&ctx->uring_lock); } + + return ret; } -static void __io_cqring_fill_event(struct io_kiocb *req, long res, - unsigned int cflags) +/* must to be called somewhat shortly after putting a request */ +static inline void io_put_task(struct task_struct *task, int nr) +{ + struct io_uring_task *tctx = task->io_uring; + + if (likely(task == current)) { + tctx->cached_refs += nr; + } else { + percpu_counter_sub(&tctx->inflight, nr); + if (unlikely(atomic_read(&tctx->in_idle))) + wake_up(&tctx->wait); + put_task_struct_many(task, nr); + } +} + +static void io_task_refs_refill(struct io_uring_task *tctx) +{ + unsigned int refill = -tctx->cached_refs + IO_TCTX_REFS_CACHE_NR; + + percpu_counter_add(&tctx->inflight, refill); + refcount_add(refill, ¤t->usage); + tctx->cached_refs += refill; +} + +static inline void io_get_task_refs(int nr) +{ + struct io_uring_task *tctx = current->io_uring; + + tctx->cached_refs -= nr; + if (unlikely(tctx->cached_refs < 0)) + io_task_refs_refill(tctx); +} + +static __cold void io_uring_drop_tctx_refs(struct task_struct *task) +{ + struct io_uring_task *tctx = task->io_uring; + unsigned int refs = tctx->cached_refs; + + if (refs) { + tctx->cached_refs = 0; + percpu_counter_sub(&tctx->inflight, refs); + put_task_struct_many(task, refs); + } +} + +static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data, + s32 res, u32 cflags) +{ + struct io_overflow_cqe *ocqe; + + ocqe = kmalloc(sizeof(*ocqe), GFP_ATOMIC | __GFP_ACCOUNT); + if (!ocqe) { + /* + * If we're in ring overflow flush mode, or in task cancel mode, + * or cannot allocate an overflow entry, then we need to drop it + * on the floor. + */ + io_account_cq_overflow(ctx); + return false; + } + if (list_empty(&ctx->cq_overflow_list)) { + set_bit(0, &ctx->check_cq_overflow); + WRITE_ONCE(ctx->rings->sq_flags, + ctx->rings->sq_flags | IORING_SQ_CQ_OVERFLOW); + + } + ocqe->cqe.user_data = user_data; + ocqe->cqe.res = res; + ocqe->cqe.flags = cflags; + list_add_tail(&ocqe->list, &ctx->cq_overflow_list); + return true; +} + +static inline bool __io_fill_cqe(struct io_ring_ctx *ctx, u64 user_data, + s32 res, u32 cflags) { - struct io_ring_ctx *ctx = req->ctx; struct io_uring_cqe *cqe; - trace_io_uring_complete(ctx, req->user_data, res); + trace_io_uring_complete(ctx, user_data, res, cflags); /* * If we can't get a cq entry, userspace overflowed the * submission (by quite a lot). Increment the overflow count in * the ring. */ - cqe = io_get_cqring(ctx); + cqe = io_get_cqe(ctx); if (likely(cqe)) { - WRITE_ONCE(cqe->user_data, req->user_data); + WRITE_ONCE(cqe->user_data, user_data); WRITE_ONCE(cqe->res, res); WRITE_ONCE(cqe->flags, cflags); - } else if (ctx->cq_overflow_flushed || - atomic_read(&req->task->io_uring->in_idle)) { - /* - * If we're in ring overflow flush mode, or in task cancel mode, - * then we cannot store the request for later flushing, we need - * to drop it on the floor. - */ - ctx->cached_cq_overflow++; - WRITE_ONCE(ctx->rings->cq_overflow, ctx->cached_cq_overflow); - } else { - if (list_empty(&ctx->cq_overflow_list)) { - set_bit(0, &ctx->sq_check_overflow); - set_bit(0, &ctx->cq_check_overflow); - ctx->rings->sq_flags |= IORING_SQ_CQ_OVERFLOW; - } - io_clean_op(req); - req->result = res; - req->compl.cflags = cflags; - refcount_inc(&req->refs); - list_add_tail(&req->compl.list, &ctx->cq_overflow_list); + return true; } + return io_cqring_event_overflow(ctx, user_data, res, cflags); } -static void io_cqring_fill_event(struct io_kiocb *req, long res) +static noinline void io_fill_cqe_req(struct io_kiocb *req, s32 res, u32 cflags) { - __io_cqring_fill_event(req, res, 0); + __io_fill_cqe(req->ctx, req->user_data, res, cflags); } -static void io_cqring_add_event(struct io_kiocb *req, long res, long cflags) +static noinline bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, + s32 res, u32 cflags) +{ + ctx->cq_extra++; + return __io_fill_cqe(ctx, user_data, res, cflags); +} + +static void io_req_complete_post(struct io_kiocb *req, s32 res, + u32 cflags) { struct io_ring_ctx *ctx = req->ctx; - unsigned long flags; - spin_lock_irqsave(&ctx->completion_lock, flags); - __io_cqring_fill_event(req, res, cflags); - io_commit_cqring(ctx); - spin_unlock_irqrestore(&ctx->completion_lock, flags); - - io_cqring_ev_posted(ctx); -} - -static void io_submit_flush_completions(struct io_comp_state *cs) -{ - struct io_ring_ctx *ctx = cs->ctx; - - spin_lock_irq(&ctx->completion_lock); - while (!list_empty(&cs->list)) { - struct io_kiocb *req; - - req = list_first_entry(&cs->list, struct io_kiocb, compl.list); - list_del(&req->compl.list); - __io_cqring_fill_event(req, req->result, req->compl.cflags); - - /* - * io_free_req() doesn't care about completion_lock unless one - * of these flags is set. REQ_F_WORK_INITIALIZED is in the list - * because of a potential deadlock with req->work.fs->lock - */ - if (req->flags & (REQ_F_FAIL_LINK|REQ_F_LINK_TIMEOUT - |REQ_F_WORK_INITIALIZED)) { - spin_unlock_irq(&ctx->completion_lock); - io_put_req(req); - spin_lock_irq(&ctx->completion_lock); - } else { - io_put_req(req); + spin_lock(&ctx->completion_lock); + __io_fill_cqe(ctx, req->user_data, res, cflags); + /* + * If we're the last reference to this request, add to our locked + * free_list cache. + */ + if (req_ref_put_and_test(req)) { + if (req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) { + if (req->flags & IO_DISARM_MASK) + io_disarm_next(req); + if (req->link) { + io_req_task_queue(req->link); + req->link = NULL; + } } - } - io_commit_cqring(ctx); - spin_unlock_irq(&ctx->completion_lock); - - io_cqring_ev_posted(ctx); - cs->nr = 0; -} - -static void __io_req_complete(struct io_kiocb *req, long res, unsigned cflags, - struct io_comp_state *cs) -{ - if (!cs) { - io_cqring_add_event(req, res, cflags); - io_put_req(req); + io_dismantle_req(req); + io_put_task(req->task, 1); + list_add(&req->inflight_entry, &ctx->locked_free_list); + ctx->locked_free_nr++; } else { + if (!percpu_ref_tryget(&ctx->refs)) + req = NULL; + } + io_commit_cqring(ctx); + spin_unlock(&ctx->completion_lock); + + if (req) { + io_cqring_ev_posted(ctx); + percpu_ref_put(&ctx->refs); + } +} + +static inline bool io_req_needs_clean(struct io_kiocb *req) +{ + return req->flags & IO_REQ_CLEAN_FLAGS; +} + +static inline void io_req_complete_state(struct io_kiocb *req, s32 res, + u32 cflags) +{ + if (io_req_needs_clean(req)) io_clean_op(req); - req->result = res; - req->compl.cflags = cflags; - list_add_tail(&req->compl.list, &cs->list); - if (++cs->nr >= 32) - io_submit_flush_completions(cs); - } + req->result = res; + req->compl.cflags = cflags; + req->flags |= REQ_F_COMPLETE_INLINE; } -static void io_req_complete(struct io_kiocb *req, long res) +static inline void __io_req_complete(struct io_kiocb *req, unsigned issue_flags, + s32 res, u32 cflags) { - __io_req_complete(req, res, 0, NULL); + if (issue_flags & IO_URING_F_COMPLETE_DEFER) + io_req_complete_state(req, res, cflags); + else + io_req_complete_post(req, res, cflags); } -static inline bool io_is_fallback_req(struct io_kiocb *req) +static inline void io_req_complete(struct io_kiocb *req, s32 res) { - return req == (struct io_kiocb *) - ((unsigned long) req->ctx->fallback_req & ~1UL); + __io_req_complete(req, 0, res, 0); } -static struct io_kiocb *io_get_fallback_req(struct io_ring_ctx *ctx) +static void io_req_complete_failed(struct io_kiocb *req, s32 res) { - struct io_kiocb *req; - - req = ctx->fallback_req; - if (!test_and_set_bit_lock(0, (unsigned long *) &ctx->fallback_req)) - return req; - - return NULL; + req_set_fail(req); + io_req_complete_post(req, res, 0); } -static struct io_kiocb *io_alloc_req(struct io_ring_ctx *ctx, - struct io_submit_state *state) +static void io_req_complete_fail_submit(struct io_kiocb *req) { - if (!state->free_reqs) { - gfp_t gfp = GFP_KERNEL | __GFP_NOWARN; - size_t sz; - int ret; + /* + * We don't submit, fail them all, for that replace hardlinks with + * normal links. Extra REQ_F_LINK is tolerated. + */ + req->flags &= ~REQ_F_HARDLINK; + req->flags |= REQ_F_LINK; + io_req_complete_failed(req, req->result); +} - sz = min_t(size_t, state->ios_left, ARRAY_SIZE(state->reqs)); - ret = kmem_cache_alloc_bulk(req_cachep, gfp, sz, state->reqs); +/* + * Don't initialise the fields below on every allocation, but do that in + * advance and keep them valid across allocations. + */ +static void io_preinit_req(struct io_kiocb *req, struct io_ring_ctx *ctx) +{ + req->ctx = ctx; + req->link = NULL; + req->async_data = NULL; + /* not necessary, but safer to zero */ + req->result = 0; +} - /* - * Bulk alloc is all-or-nothing. If we fail to get a batch, - * retry single alloc to be on the safe side. - */ - if (unlikely(ret <= 0)) { - state->reqs[0] = kmem_cache_alloc(req_cachep, gfp); - if (!state->reqs[0]) - goto fallback; - ret = 1; - } - state->free_reqs = ret; +static void io_flush_cached_locked_reqs(struct io_ring_ctx *ctx, + struct io_submit_state *state) +{ + spin_lock(&ctx->completion_lock); + list_splice_init(&ctx->locked_free_list, &state->free_list); + ctx->locked_free_nr = 0; + spin_unlock(&ctx->completion_lock); +} + +/* Returns true IFF there are requests in the cache */ +static bool io_flush_cached_reqs(struct io_ring_ctx *ctx) +{ + struct io_submit_state *state = &ctx->submit_state; + int nr; + + /* + * If we have more than a batch's worth of requests in our IRQ side + * locked cache, grab the lock and move them over to our submission + * side cache. + */ + if (READ_ONCE(ctx->locked_free_nr) > IO_COMPL_BATCH) + io_flush_cached_locked_reqs(ctx, state); + + nr = state->free_reqs; + while (!list_empty(&state->free_list)) { + struct io_kiocb *req = list_first_entry(&state->free_list, + struct io_kiocb, inflight_entry); + + list_del(&req->inflight_entry); + state->reqs[nr++] = req; + if (nr == ARRAY_SIZE(state->reqs)) + break; } + state->free_reqs = nr; + return nr != 0; +} + +/* + * A request might get retired back into the request caches even before opcode + * handlers and io_issue_sqe() are done with it, e.g. inline completion path. + * Because of that, io_alloc_req() should be called only under ->uring_lock + * and with extra caution to not get a request that is still worked on. + */ +static struct io_kiocb *io_alloc_req(struct io_ring_ctx *ctx) + __must_hold(&ctx->uring_lock) +{ + struct io_submit_state *state = &ctx->submit_state; + gfp_t gfp = GFP_KERNEL | __GFP_NOWARN; + int ret, i; + + BUILD_BUG_ON(ARRAY_SIZE(state->reqs) < IO_REQ_ALLOC_BATCH); + + if (likely(state->free_reqs || io_flush_cached_reqs(ctx))) + goto got_req; + + ret = kmem_cache_alloc_bulk(req_cachep, gfp, IO_REQ_ALLOC_BATCH, + state->reqs); + + /* + * Bulk alloc is all-or-nothing. If we fail to get a batch, + * retry single alloc to be on the safe side. + */ + if (unlikely(ret <= 0)) { + state->reqs[0] = kmem_cache_alloc(req_cachep, gfp); + if (!state->reqs[0]) + return NULL; + ret = 1; + } + + for (i = 0; i < ret; i++) + io_preinit_req(state->reqs[i], ctx); + state->free_reqs = ret; +got_req: state->free_reqs--; return state->reqs[state->free_reqs]; -fallback: - return io_get_fallback_req(ctx); } -static inline void io_put_file(struct io_kiocb *req, struct file *file, - bool fixed) +static inline void io_put_file(struct file *file) { - if (fixed) - percpu_ref_put(req->fixed_file_refs); - else + if (file) fput(file); } static void io_dismantle_req(struct io_kiocb *req) { - io_clean_op(req); + unsigned int flags = req->flags; - if (req->async_data) + if (io_req_needs_clean(req)) + io_clean_op(req); + if (!(flags & REQ_F_FIXED_FILE)) + io_put_file(req->file); + if (req->fixed_rsrc_refs) + percpu_ref_put(req->fixed_rsrc_refs); + if (req->async_data) { kfree(req->async_data); - if (req->file) - io_put_file(req, req->file, (req->flags & REQ_F_FIXED_FILE)); - - io_req_clean_work(req); + req->async_data = NULL; + } } static void __io_free_req(struct io_kiocb *req) { - struct io_uring_task *tctx = req->task->io_uring; struct io_ring_ctx *ctx = req->ctx; io_dismantle_req(req); + io_put_task(req->task, 1); - percpu_counter_dec(&tctx->inflight); - if (atomic_read(&tctx->in_idle)) - wake_up(&tctx->wait); - put_task_struct(req->task); + spin_lock(&ctx->completion_lock); + list_add(&req->inflight_entry, &ctx->locked_free_list); + ctx->locked_free_nr++; + spin_unlock(&ctx->completion_lock); - if (likely(!io_is_fallback_req(req))) - kmem_cache_free(req_cachep, req); - else - clear_bit_unlock(0, (unsigned long *) &ctx->fallback_req); percpu_ref_put(&ctx->refs); } -static void io_kill_linked_timeout(struct io_kiocb *req) +static inline void io_remove_next_linked(struct io_kiocb *req) { - struct io_ring_ctx *ctx = req->ctx; - struct io_kiocb *link; - bool cancelled = false; - unsigned long flags; + struct io_kiocb *nxt = req->link; - spin_lock_irqsave(&ctx->completion_lock, flags); - link = list_first_entry_or_null(&req->link_list, struct io_kiocb, - link_list); - /* - * Can happen if a linked timeout fired and link had been like - * req -> link t-out -> link t-out [-> ...] - */ - if (link && (link->flags & REQ_F_LTIMEOUT_ACTIVE)) { + req->link = nxt->link; + nxt->link = NULL; +} + +static bool io_kill_linked_timeout(struct io_kiocb *req) + __must_hold(&req->ctx->completion_lock) + __must_hold(&req->ctx->timeout_lock) +{ + struct io_kiocb *link = req->link; + + if (link && link->opcode == IORING_OP_LINK_TIMEOUT) { struct io_timeout_data *io = link->async_data; - int ret; - list_del_init(&link->link_list); - ret = hrtimer_try_to_cancel(&io->timer); - if (ret != -1) { - io_cqring_fill_event(link, -ECANCELED); - io_commit_cqring(ctx); - cancelled = true; + io_remove_next_linked(req); + link->timeout.head = NULL; + if (hrtimer_try_to_cancel(&io->timer) != -1) { + list_del(&link->timeout.list); + io_fill_cqe_req(link, -ECANCELED, 0); + io_put_req_deferred(link); + return true; } } - req->flags &= ~REQ_F_LINK_TIMEOUT; - spin_unlock_irqrestore(&ctx->completion_lock, flags); - - if (cancelled) { - io_cqring_ev_posted(ctx); - io_put_req(link); - } + return false; } -static struct io_kiocb *io_req_link_next(struct io_kiocb *req) -{ - struct io_kiocb *nxt; - - /* - * The list should never be empty when we are called here. But could - * potentially happen if the chain is messed up, check to be on the - * safe side. - */ - if (unlikely(list_empty(&req->link_list))) - return NULL; - - nxt = list_first_entry(&req->link_list, struct io_kiocb, link_list); - list_del_init(&req->link_list); - if (!list_empty(&nxt->link_list)) - nxt->flags |= REQ_F_LINK_HEAD; - return nxt; -} - -/* - * Called if REQ_F_LINK_HEAD is set, and we fail the head request - */ static void io_fail_links(struct io_kiocb *req) + __must_hold(&req->ctx->completion_lock) { - struct io_ring_ctx *ctx = req->ctx; - unsigned long flags; + struct io_kiocb *nxt, *link = req->link; - spin_lock_irqsave(&ctx->completion_lock, flags); - while (!list_empty(&req->link_list)) { - struct io_kiocb *link = list_first_entry(&req->link_list, - struct io_kiocb, link_list); + req->link = NULL; + while (link) { + long res = -ECANCELED; + + if (link->flags & REQ_F_FAIL) + res = link->result; + + nxt = link->link; + link->link = NULL; - list_del_init(&link->link_list); trace_io_uring_fail_link(req, link); - - io_cqring_fill_event(link, -ECANCELED); - - /* - * It's ok to free under spinlock as they're not linked anymore, - * but avoid REQ_F_WORK_INITIALIZED because it may deadlock on - * work.fs->lock. - */ - if (link->flags & REQ_F_WORK_INITIALIZED) - io_put_req_deferred(link, 2); - else - io_double_put_req(link); + io_fill_cqe_req(link, res, 0); + io_put_req_deferred(link); + link = nxt; } +} - io_commit_cqring(ctx); - spin_unlock_irqrestore(&ctx->completion_lock, flags); +static bool io_disarm_next(struct io_kiocb *req) + __must_hold(&req->ctx->completion_lock) +{ + bool posted = false; - io_cqring_ev_posted(ctx); + if (req->flags & REQ_F_ARM_LTIMEOUT) { + struct io_kiocb *link = req->link; + + req->flags &= ~REQ_F_ARM_LTIMEOUT; + if (link && link->opcode == IORING_OP_LINK_TIMEOUT) { + io_remove_next_linked(req); + io_fill_cqe_req(link, -ECANCELED, 0); + io_put_req_deferred(link); + posted = true; + } + } else if (req->flags & REQ_F_LINK_TIMEOUT) { + struct io_ring_ctx *ctx = req->ctx; + + spin_lock_irq(&ctx->timeout_lock); + posted = io_kill_linked_timeout(req); + spin_unlock_irq(&ctx->timeout_lock); + } + if (unlikely((req->flags & REQ_F_FAIL) && + !(req->flags & REQ_F_HARDLINK))) { + posted |= (req->link != NULL); + io_fail_links(req); + } + return posted; } static struct io_kiocb *__io_req_find_next(struct io_kiocb *req) { - req->flags &= ~REQ_F_LINK_HEAD; - if (req->flags & REQ_F_LINK_TIMEOUT) - io_kill_linked_timeout(req); + struct io_kiocb *nxt; /* * If LINK is set, we have dependent requests in this chain. If we @@ -2034,28 +2118,112 @@ static struct io_kiocb *__io_req_find_next(struct io_kiocb *req) * dependencies to the next request. In case of failure, fail the rest * of the chain. */ - if (likely(!(req->flags & REQ_F_FAIL_LINK))) - return io_req_link_next(req); - io_fail_links(req); - return NULL; + if (req->flags & IO_DISARM_MASK) { + struct io_ring_ctx *ctx = req->ctx; + bool posted; + + spin_lock(&ctx->completion_lock); + posted = io_disarm_next(req); + if (posted) + io_commit_cqring(req->ctx); + spin_unlock(&ctx->completion_lock); + if (posted) + io_cqring_ev_posted(ctx); + } + nxt = req->link; + req->link = NULL; + return nxt; } -static struct io_kiocb *io_req_find_next(struct io_kiocb *req) +static inline struct io_kiocb *io_req_find_next(struct io_kiocb *req) { - if (likely(!(req->flags & REQ_F_LINK_HEAD))) + if (likely(!(req->flags & (REQ_F_LINK|REQ_F_HARDLINK)))) return NULL; return __io_req_find_next(req); } -static int io_req_task_work_add(struct io_kiocb *req, bool twa_signal_ok) +static void ctx_flush_and_put(struct io_ring_ctx *ctx, bool *locked) +{ + if (!ctx) + return; + if (*locked) { + if (ctx->submit_state.compl_nr) + io_submit_flush_completions(ctx); + mutex_unlock(&ctx->uring_lock); + *locked = false; + } + percpu_ref_put(&ctx->refs); +} + +static void tctx_task_work(struct callback_head *cb) +{ + bool locked = false; + struct io_ring_ctx *ctx = NULL; + struct io_uring_task *tctx = container_of(cb, struct io_uring_task, + task_work); + + while (1) { + struct io_wq_work_node *node; + + if (!tctx->task_list.first && locked && ctx->submit_state.compl_nr) + io_submit_flush_completions(ctx); + + spin_lock_irq(&tctx->task_lock); + node = tctx->task_list.first; + INIT_WQ_LIST(&tctx->task_list); + if (!node) + tctx->task_running = false; + spin_unlock_irq(&tctx->task_lock); + if (!node) + break; + + do { + struct io_wq_work_node *next = node->next; + struct io_kiocb *req = container_of(node, struct io_kiocb, + io_task_work.node); + + if (req->ctx != ctx) { + ctx_flush_and_put(ctx, &locked); + ctx = req->ctx; + /* if not contended, grab and improve batching */ + locked = mutex_trylock(&ctx->uring_lock); + percpu_ref_get(&ctx->refs); + } + req->io_task_work.func(req, &locked); + node = next; + } while (node); + + cond_resched(); + } + + ctx_flush_and_put(ctx, &locked); + + /* relaxed read is enough as only the task itself sets ->in_idle */ + if (unlikely(atomic_read(&tctx->in_idle))) + io_uring_drop_tctx_refs(current); +} + +static void io_req_task_work_add(struct io_kiocb *req) { struct task_struct *tsk = req->task; - struct io_ring_ctx *ctx = req->ctx; + struct io_uring_task *tctx = tsk->io_uring; enum task_work_notify_mode notify; - int ret; + struct io_wq_work_node *node; + unsigned long flags; + bool running; - if (tsk->flags & PF_EXITING) - return -ESRCH; + WARN_ON_ONCE(!tctx); + + spin_lock_irqsave(&tctx->task_lock, flags); + wq_list_add_tail(&req->io_task_work.node, &tctx->task_list); + running = tctx->task_running; + if (!running) + tctx->task_running = true; + spin_unlock_irqrestore(&tctx->task_lock, flags); + + /* task_work already pending, we're done */ + if (running) + return; /* * SQPOLL kernel thread doesn't need notification, just a wakeup. For @@ -2063,85 +2231,68 @@ static int io_req_task_work_add(struct io_kiocb *req, bool twa_signal_ok) * processing task_work. There's no reliable way to tell if TWA_RESUME * will do the job. */ - notify = TWA_NONE; - if (!(ctx->flags & IORING_SETUP_SQPOLL) && twa_signal_ok) - notify = TWA_SIGNAL; - - ret = task_work_add(tsk, &req->task_work, notify); - if (!ret) + notify = (req->ctx->flags & IORING_SETUP_SQPOLL) ? TWA_NONE : TWA_SIGNAL; + if (!task_work_add(tsk, &tctx->task_work, notify)) { wake_up_process(tsk); + return; + } - return ret; + spin_lock_irqsave(&tctx->task_lock, flags); + tctx->task_running = false; + node = tctx->task_list.first; + INIT_WQ_LIST(&tctx->task_list); + spin_unlock_irqrestore(&tctx->task_lock, flags); + + while (node) { + req = container_of(node, struct io_kiocb, io_task_work.node); + node = node->next; + if (llist_add(&req->io_task_work.fallback_node, + &req->ctx->fallback_llist)) + schedule_delayed_work(&req->ctx->fallback_work, 1); + } } -static void __io_req_task_cancel(struct io_kiocb *req, int error) +static void io_req_task_cancel(struct io_kiocb *req, bool *locked) { struct io_ring_ctx *ctx = req->ctx; - spin_lock_irq(&ctx->completion_lock); - io_cqring_fill_event(req, error); - io_commit_cqring(ctx); - spin_unlock_irq(&ctx->completion_lock); - - io_cqring_ev_posted(ctx); - req_set_fail_links(req); - io_double_put_req(req); + /* not needed for normal modes, but SQPOLL depends on it */ + io_tw_lock(ctx, locked); + io_req_complete_failed(req, req->result); } -static void io_req_task_cancel(struct callback_head *cb) -{ - struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work); - struct io_ring_ctx *ctx = req->ctx; - - mutex_lock(&ctx->uring_lock); - __io_req_task_cancel(req, -ECANCELED); - mutex_unlock(&ctx->uring_lock); - percpu_ref_put(&ctx->refs); -} - -static void __io_req_task_submit(struct io_kiocb *req) +static void io_req_task_submit(struct io_kiocb *req, bool *locked) { struct io_ring_ctx *ctx = req->ctx; - mutex_lock(&ctx->uring_lock); - if (!ctx->sqo_dead && !__io_sq_thread_acquire_mm(ctx)) - __io_queue_sqe(req, NULL); + io_tw_lock(ctx, locked); + /* req->task == current here, checking PF_EXITING is safe */ + if (likely(!(req->task->flags & PF_EXITING))) + __io_queue_sqe(req); else - __io_req_task_cancel(req, -EFAULT); - mutex_unlock(&ctx->uring_lock); - - if (ctx->flags & IORING_SETUP_SQPOLL) - io_sq_thread_drop_mm(); + io_req_complete_failed(req, -EFAULT); } -static void io_req_task_submit(struct callback_head *cb) +static void io_req_task_queue_fail(struct io_kiocb *req, int ret) { - struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work); - struct io_ring_ctx *ctx = req->ctx; - - __io_req_task_submit(req); - percpu_ref_put(&ctx->refs); + req->result = ret; + req->io_task_work.func = io_req_task_cancel; + io_req_task_work_add(req); } static void io_req_task_queue(struct io_kiocb *req) { - int ret; - - init_task_work(&req->task_work, io_req_task_submit); - percpu_ref_get(&req->ctx->refs); - - ret = io_req_task_work_add(req, true); - if (unlikely(ret)) { - struct task_struct *tsk; - - init_task_work(&req->task_work, io_req_task_cancel); - tsk = io_wq_get_task(req->ctx->io_wq); - task_work_add(tsk, &req->task_work, TWA_NONE); - wake_up_process(tsk); - } + req->io_task_work.func = io_req_task_submit; + io_req_task_work_add(req); } -static void io_queue_next(struct io_kiocb *req) +static void io_req_task_queue_reissue(struct io_kiocb *req) +{ + req->io_task_work.func = io_queue_async_work; + io_req_task_work_add(req); +} + +static inline void io_queue_next(struct io_kiocb *req) { struct io_kiocb *nxt = io_req_find_next(req); @@ -2155,153 +2306,118 @@ static void io_free_req(struct io_kiocb *req) __io_free_req(req); } -struct req_batch { - void *reqs[IO_IOPOLL_BATCH]; - int to_free; +static void io_free_req_work(struct io_kiocb *req, bool *locked) +{ + io_free_req(req); +} +struct req_batch { struct task_struct *task; int task_refs; + int ctx_refs; }; static inline void io_init_req_batch(struct req_batch *rb) { - rb->to_free = 0; rb->task_refs = 0; + rb->ctx_refs = 0; rb->task = NULL; } -static void __io_req_free_batch_flush(struct io_ring_ctx *ctx, - struct req_batch *rb) -{ - kmem_cache_free_bulk(req_cachep, rb->to_free, rb->reqs); - percpu_ref_put_many(&ctx->refs, rb->to_free); - rb->to_free = 0; -} - static void io_req_free_batch_finish(struct io_ring_ctx *ctx, struct req_batch *rb) { - if (rb->to_free) - __io_req_free_batch_flush(ctx, rb); - if (rb->task) { - struct io_uring_task *tctx = rb->task->io_uring; - - percpu_counter_sub(&tctx->inflight, rb->task_refs); - if (atomic_read(&tctx->in_idle)) - wake_up(&tctx->wait); - put_task_struct_many(rb->task, rb->task_refs); - rb->task = NULL; - } + if (rb->ctx_refs) + percpu_ref_put_many(&ctx->refs, rb->ctx_refs); + if (rb->task) + io_put_task(rb->task, rb->task_refs); } -static void io_req_free_batch(struct req_batch *rb, struct io_kiocb *req) +static void io_req_free_batch(struct req_batch *rb, struct io_kiocb *req, + struct io_submit_state *state) { - if (unlikely(io_is_fallback_req(req))) { - io_free_req(req); - return; - } - if (req->flags & REQ_F_LINK_HEAD) - io_queue_next(req); + io_queue_next(req); + io_dismantle_req(req); if (req->task != rb->task) { - if (rb->task) { - struct io_uring_task *tctx = rb->task->io_uring; - - percpu_counter_sub(&tctx->inflight, rb->task_refs); - if (atomic_read(&tctx->in_idle)) - wake_up(&tctx->wait); - put_task_struct_many(rb->task, rb->task_refs); - } + if (rb->task) + io_put_task(rb->task, rb->task_refs); rb->task = req->task; rb->task_refs = 0; } rb->task_refs++; + rb->ctx_refs++; - io_dismantle_req(req); - rb->reqs[rb->to_free++] = req; - if (unlikely(rb->to_free == ARRAY_SIZE(rb->reqs))) - __io_req_free_batch_flush(req->ctx, rb); + if (state->free_reqs != ARRAY_SIZE(state->reqs)) + state->reqs[state->free_reqs++] = req; + else + list_add(&req->inflight_entry, &state->free_list); +} + +static void io_submit_flush_completions(struct io_ring_ctx *ctx) + __must_hold(&ctx->uring_lock) +{ + struct io_submit_state *state = &ctx->submit_state; + int i, nr = state->compl_nr; + struct req_batch rb; + + spin_lock(&ctx->completion_lock); + for (i = 0; i < nr; i++) { + struct io_kiocb *req = state->compl_reqs[i]; + + __io_fill_cqe(ctx, req->user_data, req->result, + req->compl.cflags); + } + io_commit_cqring(ctx); + spin_unlock(&ctx->completion_lock); + io_cqring_ev_posted(ctx); + + io_init_req_batch(&rb); + for (i = 0; i < nr; i++) { + struct io_kiocb *req = state->compl_reqs[i]; + + if (req_ref_put_and_test(req)) + io_req_free_batch(&rb, req, &ctx->submit_state); + } + + io_req_free_batch_finish(ctx, &rb); + state->compl_nr = 0; } /* * Drop reference to request, return next in chain (if there is one) if this * was the last reference to this request. */ -static struct io_kiocb *io_put_req_find_next(struct io_kiocb *req) +static inline struct io_kiocb *io_put_req_find_next(struct io_kiocb *req) { struct io_kiocb *nxt = NULL; - if (refcount_dec_and_test(&req->refs)) { + if (req_ref_put_and_test(req)) { nxt = io_req_find_next(req); __io_free_req(req); } return nxt; } -static void io_put_req(struct io_kiocb *req) +static inline void io_put_req(struct io_kiocb *req) { - if (refcount_dec_and_test(&req->refs)) + if (req_ref_put_and_test(req)) io_free_req(req); } -static void io_put_req_deferred_cb(struct callback_head *cb) +static inline void io_put_req_deferred(struct io_kiocb *req) { - struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work); - - io_free_req(req); -} - -static void io_free_req_deferred(struct io_kiocb *req) -{ - int ret; - - init_task_work(&req->task_work, io_put_req_deferred_cb); - ret = io_req_task_work_add(req, true); - if (unlikely(ret)) { - struct task_struct *tsk; - - tsk = io_wq_get_task(req->ctx->io_wq); - task_work_add(tsk, &req->task_work, TWA_NONE); - wake_up_process(tsk); + if (req_ref_put_and_test(req)) { + req->io_task_work.func = io_free_req_work; + io_req_task_work_add(req); } } -static inline void io_put_req_deferred(struct io_kiocb *req, int refs) -{ - if (refcount_sub_and_test(refs, &req->refs)) - io_free_req_deferred(req); -} - -static struct io_wq_work *io_steal_work(struct io_kiocb *req) -{ - struct io_kiocb *nxt; - - /* - * A ref is owned by io-wq in which context we're. So, if that's the - * last one, it's safe to steal next work. False negatives are Ok, - * it just will be re-punted async in io_put_work() - */ - if (refcount_read(&req->refs) != 1) - return NULL; - - nxt = io_req_find_next(req); - return nxt ? &nxt->work : NULL; -} - -static void io_double_put_req(struct io_kiocb *req) -{ - /* drop both submit and complete references */ - if (refcount_sub_and_test(2, &req->refs)) - io_free_req(req); -} - static unsigned io_cqring_events(struct io_ring_ctx *ctx) { - struct io_rings *rings = ctx->rings; - /* See comment at the top of this file */ smp_rmb(); - return ctx->cached_cq_tail - READ_ONCE(rings->cq.head); + return __io_cqring_events(ctx); } static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx) @@ -2327,38 +2443,23 @@ static inline unsigned int io_put_rw_kbuf(struct io_kiocb *req) { struct io_buffer *kbuf; + if (likely(!(req->flags & REQ_F_BUFFER_SELECTED))) + return 0; kbuf = (struct io_buffer *) (unsigned long) req->rw.addr; return io_put_kbuf(req, kbuf); } static inline bool io_run_task_work(void) { - /* - * Not safe to run on exiting task, and the task_work handling will - * not add work to such a task. - */ - if (unlikely(current->flags & PF_EXITING)) - return false; - if (current->task_works) { + if (test_thread_flag(TIF_NOTIFY_SIGNAL) || current->task_works) { __set_current_state(TASK_RUNNING); - task_work_run(); + tracehook_notify_signal(); return true; } return false; } -static void io_iopoll_queue(struct list_head *again) -{ - struct io_kiocb *req; - - do { - req = list_first_entry(again, struct io_kiocb, iopoll_entry); - list_del(&req->iopoll_entry); - __io_complete_rw(req, -EAGAIN, 0, NULL); - } while (!list_empty(again)); -} - /* * Find and free completed poll iocbs */ @@ -2367,41 +2468,25 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, { struct req_batch rb; struct io_kiocb *req; - LIST_HEAD(again); /* order with ->result store in io_complete_rw_iopoll() */ smp_rmb(); io_init_req_batch(&rb); while (!list_empty(done)) { - int cflags = 0; + req = list_first_entry(done, struct io_kiocb, inflight_entry); + list_del(&req->inflight_entry); - req = list_first_entry(done, struct io_kiocb, iopoll_entry); - if (READ_ONCE(req->result) == -EAGAIN) { - req->result = 0; - req->iopoll_completed = 0; - list_move_tail(&req->iopoll_entry, &again); - continue; - } - list_del(&req->iopoll_entry); - - if (req->flags & REQ_F_BUFFER_SELECTED) - cflags = io_put_rw_kbuf(req); - - __io_cqring_fill_event(req, req->result, cflags); + io_fill_cqe_req(req, req->result, io_put_rw_kbuf(req)); (*nr_events)++; - if (refcount_dec_and_test(&req->refs)) - io_req_free_batch(&rb, req); + if (req_ref_put_and_test(req)) + io_req_free_batch(&rb, req, &ctx->submit_state); } io_commit_cqring(ctx); - if (ctx->flags & IORING_SETUP_SQPOLL) - io_cqring_ev_posted(ctx); + io_cqring_ev_posted_iopoll(ctx); io_req_free_batch_finish(ctx, &rb); - - if (!list_empty(&again)) - io_iopoll_queue(&again); } static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, @@ -2410,17 +2495,16 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, struct io_kiocb *req, *tmp; LIST_HEAD(done); bool spin; - int ret; /* * Only spin for completions if we don't have multiple devices hanging * off our complete list, and we're under the requested amount. */ - spin = !ctx->poll_multi_file && *nr_events < min; + spin = !ctx->poll_multi_queue && *nr_events < min; - ret = 0; - list_for_each_entry_safe(req, tmp, &ctx->iopoll_list, iopoll_entry) { + list_for_each_entry_safe(req, tmp, &ctx->iopoll_list, inflight_entry) { struct kiocb *kiocb = &req->rw.kiocb; + int ret; /* * Move completed and retryable entries to our local lists. @@ -2428,50 +2512,27 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, * and complete those lists first, if we have entries there. */ if (READ_ONCE(req->iopoll_completed)) { - list_move_tail(&req->iopoll_entry, &done); + list_move_tail(&req->inflight_entry, &done); continue; } if (!list_empty(&done)) break; ret = kiocb->ki_filp->f_op->iopoll(kiocb, spin); - if (ret < 0) - break; + if (unlikely(ret < 0)) + return ret; + else if (ret) + spin = false; /* iopoll may have completed current req */ if (READ_ONCE(req->iopoll_completed)) - list_move_tail(&req->iopoll_entry, &done); - - if (ret && spin) - spin = false; - ret = 0; + list_move_tail(&req->inflight_entry, &done); } if (!list_empty(&done)) io_iopoll_complete(ctx, nr_events, &done); - return ret; -} - -/* - * Poll for a minimum of 'min' events. Note that if min == 0 we consider that a - * non-spinning poll check - we'll still enter the driver poll loop, but only - * as a non-spinning completion check. - */ -static int io_iopoll_getevents(struct io_ring_ctx *ctx, unsigned int *nr_events, - long min) -{ - while (!list_empty(&ctx->iopoll_list) && !need_resched()) { - int ret; - - ret = io_do_iopoll(ctx, nr_events, min); - if (ret < 0) - return ret; - if (*nr_events >= min) - return 0; - } - - return 1; + return 0; } /* @@ -2509,7 +2570,7 @@ static void io_iopoll_try_reap_events(struct io_ring_ctx *ctx) static int io_iopoll_check(struct io_ring_ctx *ctx, long min) { unsigned int nr_events = 0; - int iters = 0, ret = 0; + int ret = 0; /* * We disallow the app entering submit/complete with polling, but we @@ -2517,17 +2578,16 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min) * that got punted to a workqueue. */ mutex_lock(&ctx->uring_lock); + /* + * Don't enter poll loop if we already have events pending. + * If we do, we can potentially be spinning for commands that + * already triggered a CQE (eg in error). + */ + if (test_bit(0, &ctx->check_cq_overflow)) + __io_cqring_overflow_flush(ctx, false); + if (io_cqring_events(ctx)) + goto out; do { - /* - * Don't enter poll loop if we already have events pending. - * If we do, we can potentially be spinning for commands that - * already triggered a CQE (eg in error). - */ - if (test_bit(0, &ctx->cq_check_overflow)) - __io_cqring_overflow_flush(ctx, false, NULL, NULL); - if (io_cqring_events(ctx)) - break; - /* * If a submit got punted to a workqueue, we can have the * application entering polling for a command before it gets @@ -2538,18 +2598,21 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min) * forever, while the workqueue is stuck trying to acquire the * very same mutex. */ - if (!(++iters & 7)) { + if (list_empty(&ctx->iopoll_list)) { + u32 tail = ctx->cached_cq_tail; + mutex_unlock(&ctx->uring_lock); io_run_task_work(); mutex_lock(&ctx->uring_lock); + + /* some requests don't go through iopoll_list */ + if (tail != ctx->cached_cq_tail || + list_empty(&ctx->iopoll_list)) + break; } - - ret = io_iopoll_getevents(ctx, &nr_events, min); - if (ret <= 0) - break; - ret = 0; - } while (min && !nr_events && !need_resched()); - + ret = io_do_iopoll(ctx, &nr_events, min); + } while (!ret && nr_events < min && !need_resched()); +out: mutex_unlock(&ctx->uring_lock); return ret; } @@ -2561,79 +2624,129 @@ static void kiocb_end_write(struct io_kiocb *req) * thread. */ if (req->flags & REQ_F_ISREG) { - struct inode *inode = file_inode(req->file); + struct super_block *sb = file_inode(req->file)->i_sb; - __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE); + __sb_writers_acquired(sb, SB_FREEZE_WRITE); + sb_end_write(sb); } - file_end_write(req->file); -} - -static void io_complete_rw_common(struct kiocb *kiocb, long res, - struct io_comp_state *cs) -{ - struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb); - int cflags = 0; - - if (kiocb->ki_flags & IOCB_WRITE) - kiocb_end_write(req); - - if (res != req->result) - req_set_fail_links(req); - if (req->flags & REQ_F_BUFFER_SELECTED) - cflags = io_put_rw_kbuf(req); - __io_req_complete(req, res, cflags, cs); } #ifdef CONFIG_BLOCK -static bool io_resubmit_prep(struct io_kiocb *req, int error) +static bool io_resubmit_prep(struct io_kiocb *req) { - req_set_fail_links(req); - return false; -} -#endif + struct io_async_rw *rw = req->async_data; -static bool io_rw_reissue(struct io_kiocb *req, long res) + if (!rw) + return !io_req_prep_async(req); + iov_iter_restore(&rw->iter, &rw->iter_state); + return true; +} + +static bool io_rw_should_reissue(struct io_kiocb *req) { -#ifdef CONFIG_BLOCK umode_t mode = file_inode(req->file)->i_mode; - int ret; + struct io_ring_ctx *ctx = req->ctx; if (!S_ISBLK(mode) && !S_ISREG(mode)) return false; - if ((res != -EAGAIN && res != -EOPNOTSUPP) || io_wq_current_is_worker()) + if ((req->flags & REQ_F_NOWAIT) || (io_wq_current_is_worker() && + !(ctx->flags & IORING_SETUP_IOPOLL))) return false; /* * If ref is dying, we might be running poll reap from the exit work. * Don't attempt to reissue from that path, just let it fail with * -EAGAIN. */ - if (percpu_ref_is_dying(&req->ctx->refs)) + if (percpu_ref_is_dying(&ctx->refs)) return false; - - ret = io_sq_thread_acquire_mm(req->ctx, req); - - if (io_resubmit_prep(req, ret)) { - refcount_inc(&req->refs); - io_queue_async_work(req); - return true; - } - + /* + * Play it safe and assume not safe to re-import and reissue if we're + * not in the original thread group (or in task context). + */ + if (!same_thread_group(req->task, current) || !in_task()) + return false; + return true; +} +#else +static bool io_resubmit_prep(struct io_kiocb *req) +{ + return false; +} +static bool io_rw_should_reissue(struct io_kiocb *req) +{ + return false; +} #endif + +static bool __io_complete_rw_common(struct io_kiocb *req, long res) +{ + if (req->rw.kiocb.ki_flags & IOCB_WRITE) { + kiocb_end_write(req); + fsnotify_modify(req->file); + } else { + fsnotify_access(req->file); + } + if (res != req->result) { + if ((res == -EAGAIN || res == -EOPNOTSUPP) && + io_rw_should_reissue(req)) { + req->flags |= REQ_F_REISSUE; + return true; + } + req_set_fail(req); + req->result = res; + } return false; } -static void __io_complete_rw(struct io_kiocb *req, long res, long res2, - struct io_comp_state *cs) +static inline int io_fixup_rw_res(struct io_kiocb *req, unsigned res) { - if (!io_rw_reissue(req, res)) - io_complete_rw_common(&req->rw.kiocb, res, cs); + struct io_async_rw *io = req->async_data; + + /* add previously done IO, if any */ + if (io && io->bytes_done > 0) { + if (res < 0) + res = io->bytes_done; + else + res += io->bytes_done; + } + return res; +} + +static void io_req_task_complete(struct io_kiocb *req, bool *locked) +{ + unsigned int cflags = io_put_rw_kbuf(req); + int res = req->result; + + if (*locked) { + struct io_ring_ctx *ctx = req->ctx; + struct io_submit_state *state = &ctx->submit_state; + + io_req_complete_state(req, res, cflags); + state->compl_reqs[state->compl_nr++] = req; + if (state->compl_nr == ARRAY_SIZE(state->compl_reqs)) + io_submit_flush_completions(ctx); + } else { + io_req_complete_post(req, res, cflags); + } +} + +static void __io_complete_rw(struct io_kiocb *req, long res, long res2, + unsigned int issue_flags) +{ + if (__io_complete_rw_common(req, res)) + return; + __io_req_complete(req, issue_flags, io_fixup_rw_res(req, res), io_put_rw_kbuf(req)); } static void io_complete_rw(struct kiocb *kiocb, long res, long res2) { struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb); - __io_complete_rw(req, res, res2, NULL); + if (__io_complete_rw_common(req, res)) + return; + req->result = io_fixup_rw_res(req, res); + req->io_task_work.func = io_req_task_complete; + io_req_task_work_add(req); } static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2) @@ -2642,12 +2755,15 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2) if (kiocb->ki_flags & IOCB_WRITE) kiocb_end_write(req); - - if (res != -EAGAIN && res != req->result) - req_set_fail_links(req); + if (unlikely(res != req->result)) { + if (res == -EAGAIN && io_rw_should_reissue(req)) { + req->flags |= REQ_F_REISSUE; + return; + } + } WRITE_ONCE(req->result, res); - /* order with io_poll_complete() checking ->result */ + /* order with io_iopoll_complete() checking ->result */ smp_wmb(); WRITE_ONCE(req->iopoll_completed, 1); } @@ -2655,12 +2771,17 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2) /* * After the iocb has been issued, it's safe to be found on the poll list. * Adding the kiocb to the list AFTER submission ensures that we don't - * find it from a io_iopoll_getevents() thread before the issuer is done + * find it from a io_do_iopoll() thread before the issuer is done * accessing the kiocb cookie. */ static void io_iopoll_req_issued(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; + const bool in_async = io_wq_current_is_worker(); + + /* workqueue context doesn't hold uring_lock, grab it now */ + if (unlikely(in_async)) + mutex_lock(&ctx->uring_lock); /* * Track whether we have multiple files in our lists. This will impact @@ -2668,14 +2789,22 @@ static void io_iopoll_req_issued(struct io_kiocb *req) * different devices. */ if (list_empty(&ctx->iopoll_list)) { - ctx->poll_multi_file = false; - } else if (!ctx->poll_multi_file) { + ctx->poll_multi_queue = false; + } else if (!ctx->poll_multi_queue) { struct io_kiocb *list_req; + unsigned int queue_num0, queue_num1; list_req = list_first_entry(&ctx->iopoll_list, struct io_kiocb, - iopoll_entry); - if (list_req->file != req->file) - ctx->poll_multi_file = true; + inflight_entry); + + if (list_req->file != req->file) { + ctx->poll_multi_queue = true; + } else { + queue_num0 = blk_qc_t_to_queue_num(list_req->rw.kiocb.ki_cookie); + queue_num1 = blk_qc_t_to_queue_num(req->rw.kiocb.ki_cookie); + if (queue_num0 != queue_num1) + ctx->poll_multi_queue = true; + } } /* @@ -2683,61 +2812,28 @@ static void io_iopoll_req_issued(struct io_kiocb *req) * it to the front so we find it first. */ if (READ_ONCE(req->iopoll_completed)) - list_add(&req->iopoll_entry, &ctx->iopoll_list); + list_add(&req->inflight_entry, &ctx->iopoll_list); else - list_add_tail(&req->iopoll_entry, &ctx->iopoll_list); + list_add_tail(&req->inflight_entry, &ctx->iopoll_list); - if ((ctx->flags & IORING_SETUP_SQPOLL) && - wq_has_sleeper(&ctx->sq_data->wait)) - wake_up(&ctx->sq_data->wait); -} + if (unlikely(in_async)) { + /* + * If IORING_SETUP_SQPOLL is enabled, sqes are either handle + * in sq thread task context or in io worker task context. If + * current task context is sq thread, we don't need to check + * whether should wake up sq thread. + */ + if ((ctx->flags & IORING_SETUP_SQPOLL) && + wq_has_sleeper(&ctx->sq_data->wait)) + wake_up(&ctx->sq_data->wait); -static void __io_state_file_put(struct io_submit_state *state) -{ - if (state->has_refs) - fput_many(state->file, state->has_refs); - state->file = NULL; -} - -static inline void io_state_file_put(struct io_submit_state *state) -{ - if (state->file) - __io_state_file_put(state); -} - -/* - * Get as many references to a file as we have IOs left in this submission, - * assuming most submissions are for one file, or at least that each file - * has more than one submission. - */ -static struct file *__io_file_get(struct io_submit_state *state, int fd) -{ - if (!state) - return fget(fd); - - if (state->file) { - if (state->fd == fd) { - state->has_refs--; - return state->file; - } - __io_state_file_put(state); + mutex_unlock(&ctx->uring_lock); } - state->file = fget_many(fd, state->ios_left); - if (!state->file) - return NULL; - - state->fd = fd; - state->has_refs = state->ios_left - 1; - return state->file; } static bool io_bdev_nowait(struct block_device *bdev) { -#ifdef CONFIG_BLOCK return !bdev || blk_queue_nowait(bdev_get_queue(bdev)); -#else - return true; -#endif } /* @@ -2745,19 +2841,21 @@ static bool io_bdev_nowait(struct block_device *bdev) * any file. For now, just ensure that anything potentially problematic is done * inline. */ -static bool io_file_supports_async(struct file *file, int rw) +static bool __io_file_supports_nowait(struct file *file, int rw) { umode_t mode = file_inode(file)->i_mode; if (S_ISBLK(mode)) { - if (io_bdev_nowait(file->f_inode->i_bdev)) + if (IS_ENABLED(CONFIG_BLOCK) && + io_bdev_nowait(I_BDEV(file->f_mapping->host))) return true; return false; } if (S_ISSOCK(mode)) return true; if (S_ISREG(mode)) { - if (io_bdev_nowait(file->f_inode->i_sb->s_bdev) && + if (IS_ENABLED(CONFIG_BLOCK) && + io_bdev_nowait(file->f_inode->i_sb->s_bdev) && file->f_op != &io_uring_fops) return true; return false; @@ -2776,20 +2874,36 @@ static bool io_file_supports_async(struct file *file, int rw) return file->f_op->write_iter != NULL; } -static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe) +static bool io_file_supports_nowait(struct io_kiocb *req, int rw) +{ + if (rw == READ && (req->flags & REQ_F_NOWAIT_READ)) + return true; + else if (rw == WRITE && (req->flags & REQ_F_NOWAIT_WRITE)) + return true; + + return __io_file_supports_nowait(req->file, rw); +} + +static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, + int rw) { struct io_ring_ctx *ctx = req->ctx; struct kiocb *kiocb = &req->rw.kiocb; + struct file *file = req->file; unsigned ioprio; int ret; - if (S_ISREG(file_inode(req->file)->i_mode)) + if (!io_req_ffs_set(req) && S_ISREG(file_inode(file)->i_mode)) req->flags |= REQ_F_ISREG; kiocb->ki_pos = READ_ONCE(sqe->off); - if (kiocb->ki_pos == -1 && !(req->file->f_mode & FMODE_STREAM)) { - req->flags |= REQ_F_CUR_POS; - kiocb->ki_pos = req->file->f_pos; + if (kiocb->ki_pos == -1) { + if (!(file->f_mode & FMODE_STREAM)) { + req->flags |= REQ_F_CUR_POS; + kiocb->ki_pos = file->f_pos; + } else { + kiocb->ki_pos = 0; + } } kiocb->ki_hint = ki_hint_validate(file_write_hint(kiocb->ki_filp)); kiocb->ki_flags = iocb_flags(kiocb->ki_filp); @@ -2797,6 +2911,15 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (unlikely(ret)) return ret; + /* + * If the file is marked O_NONBLOCK, still allow retry for it if it + * supports async. Otherwise it's impossible to use O_NONBLOCK files + * reliably. If not, or it IOCB_NOWAIT is set, don't retry. + */ + if ((kiocb->ki_flags & IOCB_NOWAIT) || + ((file->f_flags & O_NONBLOCK) && !io_file_supports_nowait(req, rw))) + req->flags |= REQ_F_NOWAIT; + ioprio = READ_ONCE(sqe->ioprio); if (ioprio) { ret = ioprio_check_cap(ioprio); @@ -2807,10 +2930,6 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe) } else kiocb->ki_ioprio = get_current_ioprio(); - /* don't allow async punt if RWF_NOWAIT was requested */ - if (kiocb->ki_flags & IOCB_NOWAIT) - req->flags |= REQ_F_NOWAIT; - if (ctx->flags & IORING_SETUP_IOPOLL) { if (!(kiocb->ki_flags & IOCB_DIRECT) || !kiocb->ki_filp->f_op->iopoll) @@ -2825,9 +2944,24 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe) kiocb->ki_complete = io_complete_rw; } + /* used for fixed read/write too - just read unconditionally */ + req->buf_index = READ_ONCE(sqe->buf_index); + req->imu = NULL; + + if (req->opcode == IORING_OP_READ_FIXED || + req->opcode == IORING_OP_WRITE_FIXED) { + struct io_ring_ctx *ctx = req->ctx; + u16 index; + + if (unlikely(req->buf_index >= ctx->nr_user_bufs)) + return -EFAULT; + index = array_index_nospec(req->buf_index, ctx->nr_user_bufs); + req->imu = ctx->user_bufs[index]; + io_req_set_rsrc_node(req); + } + req->rw.addr = READ_ONCE(sqe->addr); req->rw.len = READ_ONCE(sqe->len); - req->buf_index = READ_ONCE(sqe->buf_index); return 0; } @@ -2853,48 +2987,49 @@ static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret) } static void kiocb_done(struct kiocb *kiocb, ssize_t ret, - struct io_comp_state *cs) + unsigned int issue_flags) { struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb); - struct io_async_rw *io = req->async_data; - - /* add previously done IO, if any */ - if (io && io->bytes_done > 0) { - if (ret < 0) - ret = io->bytes_done; - else - ret += io->bytes_done; - } if (req->flags & REQ_F_CUR_POS) req->file->f_pos = kiocb->ki_pos; - if (ret >= 0 && kiocb->ki_complete == io_complete_rw) - __io_complete_rw(req, ret, 0, cs); + if (ret >= 0 && (kiocb->ki_complete == io_complete_rw)) + __io_complete_rw(req, ret, 0, issue_flags); else io_rw_done(kiocb, ret); + + if (req->flags & REQ_F_REISSUE) { + req->flags &= ~REQ_F_REISSUE; + if (io_resubmit_prep(req)) { + io_req_task_queue_reissue(req); + } else { + unsigned int cflags = io_put_rw_kbuf(req); + struct io_ring_ctx *ctx = req->ctx; + + ret = io_fixup_rw_res(req, ret); + req_set_fail(req); + if (!(issue_flags & IO_URING_F_NONBLOCK)) { + mutex_lock(&ctx->uring_lock); + __io_req_complete(req, issue_flags, ret, cflags); + mutex_unlock(&ctx->uring_lock); + } else { + __io_req_complete(req, issue_flags, ret, cflags); + } + } + } } -static ssize_t io_import_fixed(struct io_kiocb *req, int rw, - struct iov_iter *iter) +static int __io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter, + struct io_mapped_ubuf *imu) { - struct io_ring_ctx *ctx = req->ctx; size_t len = req->rw.len; - struct io_mapped_ubuf *imu; - u16 index, buf_index = req->buf_index; + u64 buf_end, buf_addr = req->rw.addr; size_t offset; - u64 buf_addr; - if (unlikely(buf_index >= ctx->nr_user_bufs)) - return -EFAULT; - index = array_index_nospec(buf_index, ctx->nr_user_bufs); - imu = &ctx->user_bufs[index]; - buf_addr = req->rw.addr; - - /* overflow */ - if (buf_addr + len < buf_addr) + if (unlikely(check_add_overflow(buf_addr, (u64)len, &buf_end))) return -EFAULT; /* not inside the mapped region */ - if (buf_addr < imu->ubuf || buf_addr + len > imu->ubuf + imu->len) + if (unlikely(buf_addr < imu->ubuf || buf_end > imu->ubuf_end)) return -EFAULT; /* @@ -2939,7 +3074,14 @@ static ssize_t io_import_fixed(struct io_kiocb *req, int rw, } } - return len; + return 0; +} + +static int io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter) +{ + if (WARN_ON_ONCE(!req->imu)) + return -EFAULT; + return __io_import_fixed(req, rw, iter, req->imu); } static void io_ring_submit_unlock(struct io_ring_ctx *ctx, bool needs_lock) @@ -3080,16 +3222,14 @@ static ssize_t io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov, return __io_iov_buffer_select(req, iov, needs_lock); } -static ssize_t __io_import_iovec(int rw, struct io_kiocb *req, - struct iovec **iovec, struct iov_iter *iter, - bool needs_lock) +static int io_import_iovec(int rw, struct io_kiocb *req, struct iovec **iovec, + struct iov_iter *iter, bool needs_lock) { void __user *buf = u64_to_user_ptr(req->rw.addr); size_t sqe_len = req->rw.len; + u8 opcode = req->opcode; ssize_t ret; - u8 opcode; - opcode = req->opcode; if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED) { *iovec = NULL; return io_import_fixed(req, rw, iter); @@ -3114,10 +3254,8 @@ static ssize_t __io_import_iovec(int rw, struct io_kiocb *req, if (req->flags & REQ_F_BUFFER_SELECT) { ret = io_iov_buffer_select(req, *iovec, needs_lock); - if (!ret) { - ret = (*iovec)->iov_len; - iov_iter_init(iter, rw, *iovec, 1, ret); - } + if (!ret) + iov_iter_init(iter, rw, *iovec, 1, (*iovec)->iov_len); *iovec = NULL; return ret; } @@ -3126,18 +3264,6 @@ static ssize_t __io_import_iovec(int rw, struct io_kiocb *req, req->ctx->compat); } -static ssize_t io_import_iovec(int rw, struct io_kiocb *req, - struct iovec **iovec, struct iov_iter *iter, - bool needs_lock) -{ - struct io_async_rw *iorw = req->async_data; - - if (!iorw) - return __io_import_iovec(rw, req, iovec, iter, needs_lock); - *iovec = NULL; - return 0; -} - static inline loff_t *io_kiocb_ppos(struct kiocb *kiocb) { return (kiocb->ki_filp->f_mode & FMODE_STREAM) ? NULL : &kiocb->ki_pos; @@ -3230,32 +3356,31 @@ static void io_req_map_rw(struct io_kiocb *req, const struct iovec *iovec, } } -static inline int __io_alloc_async_data(struct io_kiocb *req) +static inline int io_alloc_async_data(struct io_kiocb *req) { WARN_ON_ONCE(!io_op_defs[req->opcode].async_size); req->async_data = kmalloc(io_op_defs[req->opcode].async_size, GFP_KERNEL); return req->async_data == NULL; } -static int io_alloc_async_data(struct io_kiocb *req) -{ - if (!io_op_defs[req->opcode].needs_async_data) - return 0; - - return __io_alloc_async_data(req); -} - static int io_setup_async_rw(struct io_kiocb *req, const struct iovec *iovec, const struct iovec *fast_iov, struct iov_iter *iter, bool force) { - if (!force && !io_op_defs[req->opcode].needs_async_data) + if (!force && !io_op_defs[req->opcode].needs_async_setup) return 0; if (!req->async_data) { - if (__io_alloc_async_data(req)) + struct io_async_rw *iorw; + + if (io_alloc_async_data(req)) { + kfree(iovec); return -ENOMEM; + } io_req_map_rw(req, iovec, fast_iov, iter); + iorw = req->async_data; + /* we've copied and mapped the iter, ensure state is saved */ + iov_iter_save_state(&iorw->iter, &iorw->iter_state); } return 0; } @@ -3264,9 +3389,9 @@ static inline int io_rw_prep_async(struct io_kiocb *req, int rw) { struct io_async_rw *iorw = req->async_data; struct iovec *iov = iorw->fast_iov; - ssize_t ret; + int ret; - ret = __io_import_iovec(rw, req, &iov, &iorw->iter, false); + ret = io_import_iovec(rw, req, &iov, &iorw->iter, false); if (unlikely(ret < 0)) return ret; @@ -3274,24 +3399,15 @@ static inline int io_rw_prep_async(struct io_kiocb *req, int rw) iorw->free_iovec = iov; if (iov) req->flags |= REQ_F_NEED_CLEANUP; + iov_iter_save_state(&iorw->iter, &iorw->iter_state); return 0; } static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { - ssize_t ret; - - ret = io_prep_rw(req, sqe); - if (ret) - return ret; - if (unlikely(!(req->file->f_mode & FMODE_READ))) return -EBADF; - - /* either don't need iovec imported or already have it */ - if (!req->async_data) - return 0; - return io_rw_prep_async(req, READ); + return io_prep_rw(req, sqe, READ); } /* @@ -3310,7 +3426,6 @@ static int io_async_buf_func(struct wait_queue_entry *wait, unsigned mode, struct wait_page_queue *wpq; struct io_kiocb *req = wait->private; struct wait_page_key *key = arg; - int ret; wpq = container_of(wait, struct wait_page_queue, wait); @@ -3319,22 +3434,7 @@ static int io_async_buf_func(struct wait_queue_entry *wait, unsigned mode, req->rw.kiocb.ki_flags &= ~IOCB_WAITQ; list_del_init(&wait->entry); - - init_task_work(&req->task_work, io_req_task_submit); - percpu_ref_get(&req->ctx->refs); - - /* submit ref gets dropped, acquire a new one */ - refcount_inc(&req->refs); - ret = io_req_task_work_add(req, true); - if (unlikely(ret)) { - struct task_struct *tsk; - - /* queue just for cancelation */ - init_task_work(&req->task_work, io_req_task_cancel); - tsk = io_wq_get_task(req->ctx->io_wq); - task_work_add(tsk, &req->task_work, TWA_NONE); - wake_up_process(tsk); - } + io_req_task_queue(req); return 1; } @@ -3381,7 +3481,7 @@ static bool io_rw_should_retry(struct io_kiocb *req) return true; } -static int io_iter_do_read(struct io_kiocb *req, struct iov_iter *iter) +static inline int io_iter_do_read(struct io_kiocb *req, struct iov_iter *iter) { if (req->file->f_op->read_iter) return call_read_iter(req->file, &req->rw.kiocb, iter); @@ -3391,27 +3491,40 @@ static int io_iter_do_read(struct io_kiocb *req, struct iov_iter *iter) return -EINVAL; } -static int io_read(struct io_kiocb *req, bool force_nonblock, - struct io_comp_state *cs) +static bool need_read_all(struct io_kiocb *req) +{ + return req->flags & REQ_F_ISREG || + S_ISBLK(file_inode(req->file)->i_mode); +} + +static int io_read(struct io_kiocb *req, unsigned int issue_flags) { struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; struct kiocb *kiocb = &req->rw.kiocb; struct iov_iter __iter, *iter = &__iter; - struct iov_iter iter_cp; struct io_async_rw *rw = req->async_data; - ssize_t io_size, ret, ret2; - bool no_async; + bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; + struct iov_iter_state __state, *state; + ssize_t ret, ret2; - if (rw) + if (rw) { iter = &rw->iter; - - ret = io_import_iovec(READ, req, &iovec, iter, !force_nonblock); - if (ret < 0) - return ret; - iter_cp = *iter; - io_size = iov_iter_count(iter); - req->result = io_size; - ret = 0; + state = &rw->iter_state; + /* + * We come here from an earlier attempt, restore our state to + * match in case it doesn't. It's cheap enough that we don't + * need to make this conditional. + */ + iov_iter_restore(iter, state); + iovec = NULL; + } else { + ret = io_import_iovec(READ, req, &iovec, iter, !force_nonblock); + if (ret < 0) + return ret; + state = &__state; + iov_iter_save_state(iter, state); + } + req->result = iov_iter_count(iter); /* Ensure we clear previously set non-block flag */ if (!force_nonblock) @@ -3419,127 +3532,130 @@ static int io_read(struct io_kiocb *req, bool force_nonblock, else kiocb->ki_flags |= IOCB_NOWAIT; - /* If the file doesn't support async, just async punt */ - no_async = force_nonblock && !io_file_supports_async(req->file, READ); - if (no_async) - goto copy_iov; + if (force_nonblock && !io_file_supports_nowait(req, READ)) { + ret = io_setup_async_rw(req, iovec, inline_vecs, iter, true); + return ret ?: -EAGAIN; + } - ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), io_size); - if (unlikely(ret)) - goto out_free; + ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), req->result); + if (unlikely(ret)) { + kfree(iovec); + return ret; + } ret = io_iter_do_read(req, iter); - if (!ret) { - goto done; - } else if (ret == -EIOCBQUEUED) { - ret = 0; - goto out_free; - } else if (ret == -EAGAIN) { + if (ret == -EAGAIN || (req->flags & REQ_F_REISSUE)) { + req->flags &= ~REQ_F_REISSUE; /* IOPOLL retry should happen for io-wq threads */ if (!force_nonblock && !(req->ctx->flags & IORING_SETUP_IOPOLL)) goto done; - /* no retry on NONBLOCK marked file */ - if (req->file->f_flags & O_NONBLOCK) + /* no retry on NONBLOCK nor RWF_NOWAIT */ + if (req->flags & REQ_F_NOWAIT) goto done; - /* some cases will consume bytes even on error returns */ - *iter = iter_cp; ret = 0; - goto copy_iov; - } else if (ret < 0) { - /* make sure -ERESTARTSYS -> -EINTR is done */ - goto done; - } - - /* read it all, or we did blocking attempt. no retry. */ - if (!iov_iter_count(iter) || !force_nonblock || - (req->file->f_flags & O_NONBLOCK) || !(req->flags & REQ_F_ISREG)) - goto done; - - io_size -= ret; -copy_iov: - ret2 = io_setup_async_rw(req, iovec, inline_vecs, iter, true); - if (ret2) { - ret = ret2; + } else if (ret == -EIOCBQUEUED) { goto out_free; - } - if (no_async) - return -EAGAIN; - rw = req->async_data; - /* it's copied and will be cleaned with ->io */ - iovec = NULL; - /* now use our persistent iterator, if we aren't already */ - iter = &rw->iter; -retry: - rw->bytes_done += ret; - /* if we can retry, do so with the callbacks armed */ - if (!io_rw_should_retry(req)) { - kiocb->ki_flags &= ~IOCB_WAITQ; - return -EAGAIN; + } else if (ret <= 0 || ret == req->result || !force_nonblock || + (req->flags & REQ_F_NOWAIT) || !need_read_all(req)) { + /* read all, failed, already did sync or don't want to retry */ + goto done; } /* - * Now retry read with the IOCB_WAITQ parts set in the iocb. If we - * get -EIOCBQUEUED, then we'll get a notification when the desired - * page gets unlocked. We can also get a partial read here, and if we - * do, then just retry at the new offset. + * Don't depend on the iter state matching what was consumed, or being + * untouched in case of error. Restore it and we'll advance it + * manually if we need to. */ - ret = io_iter_do_read(req, iter); - if (ret == -EIOCBQUEUED) { - ret = 0; - goto out_free; - } else if (ret > 0 && ret < io_size) { + iov_iter_restore(iter, state); + + ret2 = io_setup_async_rw(req, iovec, inline_vecs, iter, true); + if (ret2) + return ret2; + + iovec = NULL; + rw = req->async_data; + /* + * Now use our persistent iterator and state, if we aren't already. + * We've restored and mapped the iter to match. + */ + if (iter != &rw->iter) { + iter = &rw->iter; + state = &rw->iter_state; + } + + do { + /* + * We end up here because of a partial read, either from + * above or inside this loop. Advance the iter by the bytes + * that were consumed. + */ + iov_iter_advance(iter, ret); + if (!iov_iter_count(iter)) + break; + rw->bytes_done += ret; + iov_iter_save_state(iter, state); + + /* if we can retry, do so with the callbacks armed */ + if (!io_rw_should_retry(req)) { + kiocb->ki_flags &= ~IOCB_WAITQ; + return -EAGAIN; + } + + req->result = iov_iter_count(iter); + /* + * Now retry read with the IOCB_WAITQ parts set in the iocb. If + * we get -EIOCBQUEUED, then we'll get a notification when the + * desired page gets unlocked. We can also get a partial read + * here, and if we do, then just retry at the new offset. + */ + ret = io_iter_do_read(req, iter); + if (ret == -EIOCBQUEUED) + return 0; /* we got some bytes, but not all. retry. */ kiocb->ki_flags &= ~IOCB_WAITQ; - goto retry; - } + iov_iter_restore(iter, state); + } while (ret > 0); done: - kiocb_done(kiocb, ret, cs); - ret = 0; + kiocb_done(kiocb, ret, issue_flags); out_free: - /* it's reportedly faster than delegating the null check to kfree() */ + /* it's faster to check here then delegate to kfree */ if (iovec) kfree(iovec); - return ret; + return 0; } static int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { - ssize_t ret; - - ret = io_prep_rw(req, sqe); - if (ret) - return ret; - if (unlikely(!(req->file->f_mode & FMODE_WRITE))) return -EBADF; - - /* either don't need iovec imported or already have it */ - if (!req->async_data) - return 0; - return io_rw_prep_async(req, WRITE); + return io_prep_rw(req, sqe, WRITE); } -static int io_write(struct io_kiocb *req, bool force_nonblock, - struct io_comp_state *cs) +static int io_write(struct io_kiocb *req, unsigned int issue_flags) { struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; struct kiocb *kiocb = &req->rw.kiocb; struct iov_iter __iter, *iter = &__iter; - struct iov_iter iter_cp; struct io_async_rw *rw = req->async_data; - ssize_t ret, ret2, io_size; + bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; + struct iov_iter_state __state, *state; + ssize_t ret, ret2; - if (rw) + if (rw) { iter = &rw->iter; - - ret = io_import_iovec(WRITE, req, &iovec, iter, !force_nonblock); - if (ret < 0) - return ret; - iter_cp = *iter; - io_size = iov_iter_count(iter); - req->result = io_size; + state = &rw->iter_state; + iov_iter_restore(iter, state); + iovec = NULL; + } else { + ret = io_import_iovec(WRITE, req, &iovec, iter, !force_nonblock); + if (ret < 0) + return ret; + state = &__state; + iov_iter_save_state(iter, state); + } + req->result = iov_iter_count(iter); /* Ensure we clear previously set non-block flag */ if (!force_nonblock) @@ -3548,7 +3664,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock, kiocb->ki_flags |= IOCB_NOWAIT; /* If the file doesn't support async, just async punt */ - if (force_nonblock && !io_file_supports_async(req->file, WRITE)) + if (force_nonblock && !io_file_supports_nowait(req, WRITE)) goto copy_iov; /* file path doesn't support NOWAIT for non-direct_IO */ @@ -3556,7 +3672,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock, (req->flags & REQ_F_ISREG)) goto copy_iov; - ret = rw_verify_area(WRITE, req->file, io_kiocb_ppos(kiocb), io_size); + ret = rw_verify_area(WRITE, req->file, io_kiocb_ppos(kiocb), req->result); if (unlikely(ret)) goto out_free; @@ -3581,28 +3697,36 @@ static int io_write(struct io_kiocb *req, bool force_nonblock, else ret2 = -EINVAL; + if (req->flags & REQ_F_REISSUE) { + req->flags &= ~REQ_F_REISSUE; + ret2 = -EAGAIN; + } + /* * Raw bdev writes will return -EOPNOTSUPP for IOCB_NOWAIT. Just * retry them without IOCB_NOWAIT. */ if (ret2 == -EOPNOTSUPP && (kiocb->ki_flags & IOCB_NOWAIT)) ret2 = -EAGAIN; - /* no retry on NONBLOCK marked file */ - if (ret2 == -EAGAIN && (req->file->f_flags & O_NONBLOCK)) + /* no retry on NONBLOCK nor RWF_NOWAIT */ + if (ret2 == -EAGAIN && (req->flags & REQ_F_NOWAIT)) goto done; if (!force_nonblock || ret2 != -EAGAIN) { /* IOPOLL retry should happen for io-wq threads */ if ((req->ctx->flags & IORING_SETUP_IOPOLL) && ret2 == -EAGAIN) goto copy_iov; done: - kiocb_done(kiocb, ret2, cs); + kiocb_done(kiocb, ret2, issue_flags); } else { copy_iov: - /* some cases will consume bytes even on error returns */ - *iter = iter_cp; + iov_iter_restore(iter, state); ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false); - if (!ret) + if (!ret) { + if (kiocb->ki_flags & IOCB_WRITE) + kiocb_end_write(req); return -EAGAIN; + } + return ret; } out_free: /* it's reportedly faster than delegating the null check to kfree() */ @@ -3611,37 +3735,160 @@ out_free: return ret; } +static int io_renameat_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) +{ + struct io_rename *ren = &req->rename; + const char __user *oldf, *newf; + + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) + return -EINVAL; + if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in) + return -EINVAL; + if (unlikely(req->flags & REQ_F_FIXED_FILE)) + return -EBADF; + + ren->old_dfd = READ_ONCE(sqe->fd); + oldf = u64_to_user_ptr(READ_ONCE(sqe->addr)); + newf = u64_to_user_ptr(READ_ONCE(sqe->addr2)); + ren->new_dfd = READ_ONCE(sqe->len); + ren->flags = READ_ONCE(sqe->rename_flags); + + ren->oldpath = getname(oldf); + if (IS_ERR(ren->oldpath)) + return PTR_ERR(ren->oldpath); + + ren->newpath = getname(newf); + if (IS_ERR(ren->newpath)) { + putname(ren->oldpath); + return PTR_ERR(ren->newpath); + } + + req->flags |= REQ_F_NEED_CLEANUP; + return 0; +} + +static int io_renameat(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_rename *ren = &req->rename; + int ret; + + if (issue_flags & IO_URING_F_NONBLOCK) + return -EAGAIN; + + ret = do_renameat2(ren->old_dfd, ren->oldpath, ren->new_dfd, + ren->newpath, ren->flags); + + req->flags &= ~REQ_F_NEED_CLEANUP; + if (ret < 0) + req_set_fail(req); + io_req_complete(req, ret); + return 0; +} + +static int io_unlinkat_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) +{ + struct io_unlink *un = &req->unlink; + const char __user *fname; + + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) + return -EINVAL; + if (sqe->ioprio || sqe->off || sqe->len || sqe->buf_index || + sqe->splice_fd_in) + return -EINVAL; + if (unlikely(req->flags & REQ_F_FIXED_FILE)) + return -EBADF; + + un->dfd = READ_ONCE(sqe->fd); + + un->flags = READ_ONCE(sqe->unlink_flags); + if (un->flags & ~AT_REMOVEDIR) + return -EINVAL; + + fname = u64_to_user_ptr(READ_ONCE(sqe->addr)); + un->filename = getname(fname); + if (IS_ERR(un->filename)) + return PTR_ERR(un->filename); + + req->flags |= REQ_F_NEED_CLEANUP; + return 0; +} + +static int io_unlinkat(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_unlink *un = &req->unlink; + int ret; + + if (issue_flags & IO_URING_F_NONBLOCK) + return -EAGAIN; + + if (un->flags & AT_REMOVEDIR) + ret = do_rmdir(un->dfd, un->filename); + else + ret = do_unlinkat(un->dfd, un->filename); + + req->flags &= ~REQ_F_NEED_CLEANUP; + if (ret < 0) + req_set_fail(req); + io_req_complete(req, ret); + return 0; +} + +static int io_shutdown_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) +{ +#if defined(CONFIG_NET) + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) + return -EINVAL; + if (unlikely(sqe->ioprio || sqe->off || sqe->addr || sqe->rw_flags || + sqe->buf_index || sqe->splice_fd_in)) + return -EINVAL; + + req->shutdown.how = READ_ONCE(sqe->len); + return 0; +#else + return -EOPNOTSUPP; +#endif +} + +static int io_shutdown(struct io_kiocb *req, unsigned int issue_flags) +{ +#if defined(CONFIG_NET) + struct socket *sock; + int ret; + + if (issue_flags & IO_URING_F_NONBLOCK) + return -EAGAIN; + + sock = sock_from_file(req->file, &ret); + if (unlikely(!sock)) + return ret; + + ret = __sys_shutdown_sock(sock, req->shutdown.how); + if (ret < 0) + req_set_fail(req); + io_req_complete(req, ret); + return 0; +#else + return -EOPNOTSUPP; +#endif +} + static int __io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { - struct io_splice* sp = &req->splice; + struct io_splice *sp = &req->splice; unsigned int valid_flags = SPLICE_F_FD_IN_FIXED | SPLICE_F_ALL; if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; - sp->file_in = NULL; sp->len = READ_ONCE(sqe->len); sp->flags = READ_ONCE(sqe->splice_flags); - if (unlikely(sp->flags & ~valid_flags)) return -EINVAL; - - sp->file_in = io_file_get(NULL, req, READ_ONCE(sqe->splice_fd_in), - (sp->flags & SPLICE_F_FD_IN_FIXED)); - if (!sp->file_in) - return -EBADF; - req->flags |= REQ_F_NEED_CLEANUP; - - if (!S_ISREG(file_inode(sp->file_in)->i_mode)) { - /* - * Splice operation will be punted aync, and here need to - * modify io_wq_work.flags, so initialize io_wq_work firstly. - */ - io_req_init_async(req); - req->work.flags |= IO_WQ_WORK_UNBOUND; - } - + sp->splice_fd_in = READ_ONCE(sqe->splice_fd_in); return 0; } @@ -3653,60 +3900,75 @@ static int io_tee_prep(struct io_kiocb *req, return __io_splice_prep(req, sqe); } -static int io_tee(struct io_kiocb *req, bool force_nonblock) +static int io_tee(struct io_kiocb *req, unsigned int issue_flags) { struct io_splice *sp = &req->splice; - struct file *in = sp->file_in; struct file *out = sp->file_out; unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED; + struct file *in; long ret = 0; - if (force_nonblock) + if (issue_flags & IO_URING_F_NONBLOCK) return -EAGAIN; + + in = io_file_get(req->ctx, req, sp->splice_fd_in, + (sp->flags & SPLICE_F_FD_IN_FIXED)); + if (!in) { + ret = -EBADF; + goto done; + } + if (sp->len) ret = do_tee(in, out, sp->len, flags); - io_put_file(req, in, (sp->flags & SPLICE_F_FD_IN_FIXED)); - req->flags &= ~REQ_F_NEED_CLEANUP; - + if (!(sp->flags & SPLICE_F_FD_IN_FIXED)) + io_put_file(in); +done: if (ret != sp->len) - req_set_fail_links(req); + req_set_fail(req); io_req_complete(req, ret); return 0; } static int io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { - struct io_splice* sp = &req->splice; + struct io_splice *sp = &req->splice; sp->off_in = READ_ONCE(sqe->splice_off_in); sp->off_out = READ_ONCE(sqe->off); return __io_splice_prep(req, sqe); } -static int io_splice(struct io_kiocb *req, bool force_nonblock) +static int io_splice(struct io_kiocb *req, unsigned int issue_flags) { struct io_splice *sp = &req->splice; - struct file *in = sp->file_in; struct file *out = sp->file_out; unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED; loff_t *poff_in, *poff_out; + struct file *in; long ret = 0; - if (force_nonblock) + if (issue_flags & IO_URING_F_NONBLOCK) return -EAGAIN; + in = io_file_get(req->ctx, req, sp->splice_fd_in, + (sp->flags & SPLICE_F_FD_IN_FIXED)); + if (!in) { + ret = -EBADF; + goto done; + } + poff_in = (sp->off_in == -1) ? NULL : &sp->off_in; poff_out = (sp->off_out == -1) ? NULL : &sp->off_out; if (sp->len) ret = do_splice(in, poff_in, out, poff_out, sp->len, flags); - io_put_file(req, in, (sp->flags & SPLICE_F_FD_IN_FIXED)); - req->flags &= ~REQ_F_NEED_CLEANUP; - + if (!(sp->flags & SPLICE_F_FD_IN_FIXED)) + io_put_file(in); +done: if (ret != sp->len) - req_set_fail_links(req); + req_set_fail(req); io_req_complete(req, ret); return 0; } @@ -3714,24 +3976,21 @@ static int io_splice(struct io_kiocb *req, bool force_nonblock) /* * IORING_OP_NOP just posts a completion event, nothing else. */ -static int io_nop(struct io_kiocb *req, struct io_comp_state *cs) +static int io_nop(struct io_kiocb *req, unsigned int issue_flags) { struct io_ring_ctx *ctx = req->ctx; if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; - __io_req_complete(req, 0, 0, cs); + __io_req_complete(req, issue_flags, 0, 0); return 0; } -static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe) +static int io_fsync_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_ring_ctx *ctx = req->ctx; - if (!req->file) - return -EBADF; - if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index || @@ -3747,20 +4006,20 @@ static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe) return 0; } -static int io_fsync(struct io_kiocb *req, bool force_nonblock) +static int io_fsync(struct io_kiocb *req, unsigned int issue_flags) { loff_t end = req->sync.off + req->sync.len; int ret; /* fsync always requires a blocking context */ - if (force_nonblock) + if (issue_flags & IO_URING_F_NONBLOCK) return -EAGAIN; ret = vfs_fsync_range(req->file, req->sync.off, end > 0 ? end : LLONG_MAX, req->sync.flags & IORING_FSYNC_DATASYNC); if (ret < 0) - req_set_fail_links(req); + req_set_fail(req); io_req_complete(req, ret); return 0; } @@ -3780,17 +4039,19 @@ static int io_fallocate_prep(struct io_kiocb *req, return 0; } -static int io_fallocate(struct io_kiocb *req, bool force_nonblock) +static int io_fallocate(struct io_kiocb *req, unsigned int issue_flags) { int ret; /* fallocate always requiring blocking context */ - if (force_nonblock) + if (issue_flags & IO_URING_F_NONBLOCK) return -EAGAIN; ret = vfs_fallocate(req->file, req->sync.mode, req->sync.off, req->sync.len); if (ret < 0) - req_set_fail_links(req); + req_set_fail(req); + else + fsnotify_modify(req->file); io_req_complete(req, ret); return 0; } @@ -3800,7 +4061,9 @@ static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe const char __user *fname; int ret; - if (unlikely(sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)) + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) + return -EINVAL; + if (unlikely(sqe->ioprio || sqe->buf_index)) return -EINVAL; if (unlikely(req->flags & REQ_F_FIXED_FILE)) return -EBADF; @@ -3817,20 +4080,21 @@ static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe req->open.filename = NULL; return ret; } + + req->open.file_slot = READ_ONCE(sqe->file_index); + if (req->open.file_slot && (req->open.how.flags & O_CLOEXEC)) + return -EINVAL; + req->open.nofile = rlimit(RLIMIT_NOFILE); - req->open.ignore_nonblock = false; req->flags |= REQ_F_NEED_CLEANUP; return 0; } static int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { - u64 flags, mode; + u64 mode = READ_ONCE(sqe->len); + u64 flags = READ_ONCE(sqe->open_flags); - if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL))) - return -EINVAL; - mode = READ_ONCE(sqe->len); - flags = READ_ONCE(sqe->open_flags); req->open.how = build_open_how(flags, mode); return __io_openat_prep(req, sqe); } @@ -3841,8 +4105,6 @@ static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) size_t len; int ret; - if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL))) - return -EINVAL; how = u64_to_user_ptr(READ_ONCE(sqe->addr2)); len = READ_ONCE(sqe->len); if (len < OPEN_HOW_SIZE_VER0) @@ -3856,58 +4118,75 @@ static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) return __io_openat_prep(req, sqe); } -static int io_openat2(struct io_kiocb *req, bool force_nonblock) +static int io_openat2(struct io_kiocb *req, unsigned int issue_flags) { struct open_flags op; struct file *file; + bool resolve_nonblock, nonblock_set; + bool fixed = !!req->open.file_slot; int ret; - if (force_nonblock && !req->open.ignore_nonblock) - return -EAGAIN; - ret = build_open_flags(&req->open.how, &op); if (ret) goto err; + nonblock_set = op.open_flag & O_NONBLOCK; + resolve_nonblock = req->open.how.resolve & RESOLVE_CACHED; + if (issue_flags & IO_URING_F_NONBLOCK) { + /* + * Don't bother trying for O_TRUNC, O_CREAT, or O_TMPFILE open, + * it'll always -EAGAIN + */ + if (req->open.how.flags & (O_TRUNC | O_CREAT | O_TMPFILE)) + return -EAGAIN; + op.lookup_flags |= LOOKUP_CACHED; + op.open_flag |= O_NONBLOCK; + } - ret = __get_unused_fd_flags(req->open.how.flags, req->open.nofile); - if (ret < 0) - goto err; + if (!fixed) { + ret = __get_unused_fd_flags(req->open.how.flags, req->open.nofile); + if (ret < 0) + goto err; + } file = do_filp_open(req->open.dfd, req->open.filename, &op); if (IS_ERR(file)) { - put_unused_fd(ret); - ret = PTR_ERR(file); /* - * A work-around to ensure that /proc/self works that way - * that it should - if we get -EOPNOTSUPP back, then assume - * that proc_self_get_link() failed us because we're in async - * context. We should be safe to retry this from the task - * itself with force_nonblock == false set, as it should not - * block on lookup. Would be nice to know this upfront and - * avoid the async dance, but doesn't seem feasible. + * We could hang on to this 'fd' on retrying, but seems like + * marginal gain for something that is now known to be a slower + * path. So just put it, and we'll get a new one when we retry. */ - if (ret == -EOPNOTSUPP && io_wq_current_is_worker()) { - req->open.ignore_nonblock = true; - refcount_inc(&req->refs); - io_req_task_queue(req); - return 0; - } - } else { - fsnotify_open(file); - fd_install(ret, file); + if (!fixed) + put_unused_fd(ret); + + ret = PTR_ERR(file); + /* only retry if RESOLVE_CACHED wasn't already set by application */ + if (ret == -EAGAIN && + (!resolve_nonblock && (issue_flags & IO_URING_F_NONBLOCK))) + return -EAGAIN; + goto err; } + + if ((issue_flags & IO_URING_F_NONBLOCK) && !nonblock_set) + file->f_flags &= ~O_NONBLOCK; + fsnotify_open(file); + + if (!fixed) + fd_install(ret, file); + else + ret = io_install_fixed_file(req, file, issue_flags, + req->open.file_slot - 1); err: putname(req->open.filename); req->flags &= ~REQ_F_NEED_CLEANUP; if (ret < 0) - req_set_fail_links(req); - io_req_complete(req, ret); + req_set_fail(req); + __io_req_complete(req, issue_flags, ret, 0); return 0; } -static int io_openat(struct io_kiocb *req, bool force_nonblock) +static int io_openat(struct io_kiocb *req, unsigned int issue_flags) { - return io_openat2(req, force_nonblock); + return io_openat2(req, issue_flags); } static int io_remove_buffers_prep(struct io_kiocb *req, @@ -3948,6 +4227,7 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx, struct io_buffer *buf, kfree(nxt); if (++i == nbufs) return i; + cond_resched(); } i++; kfree(buf); @@ -3956,13 +4236,13 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx, struct io_buffer *buf, return i; } -static int io_remove_buffers(struct io_kiocb *req, bool force_nonblock, - struct io_comp_state *cs) +static int io_remove_buffers(struct io_kiocb *req, unsigned int issue_flags) { struct io_provide_buf *p = &req->pbuf; struct io_ring_ctx *ctx = req->ctx; struct io_buffer *head; int ret = 0; + bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; io_ring_submit_lock(ctx, !force_nonblock); @@ -3973,16 +4253,11 @@ static int io_remove_buffers(struct io_kiocb *req, bool force_nonblock, if (head) ret = __io_remove_buffers(ctx, head, p->bgid, p->nbufs); if (ret < 0) - req_set_fail_links(req); + req_set_fail(req); - /* need to hold the lock to complete IOPOLL requests */ - if (ctx->flags & IORING_SETUP_IOPOLL) { - __io_req_complete(req, ret, 0, cs); - io_ring_submit_unlock(ctx, !force_nonblock); - } else { - io_ring_submit_unlock(ctx, !force_nonblock); - __io_req_complete(req, ret, 0, cs); - } + /* complete before unlock, IOPOLL may need the lock */ + __io_req_complete(req, issue_flags, ret, 0); + io_ring_submit_unlock(ctx, !force_nonblock); return 0; } @@ -4049,13 +4324,13 @@ static int io_add_buffers(struct io_provide_buf *pbuf, struct io_buffer **head) return i ? i : -ENOMEM; } -static int io_provide_buffers(struct io_kiocb *req, bool force_nonblock, - struct io_comp_state *cs) +static int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags) { struct io_provide_buf *p = &req->pbuf; struct io_ring_ctx *ctx = req->ctx; struct io_buffer *head, *list; int ret = 0; + bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; io_ring_submit_lock(ctx, !force_nonblock); @@ -4065,21 +4340,16 @@ static int io_provide_buffers(struct io_kiocb *req, bool force_nonblock, ret = io_add_buffers(p, &head); if (ret >= 0 && !list) { - ret = xa_insert(&ctx->io_buffers, p->bgid, head, GFP_KERNEL); + ret = xa_insert(&ctx->io_buffers, p->bgid, head, + GFP_KERNEL_ACCOUNT); if (ret < 0) __io_remove_buffers(ctx, head, p->bgid, -1U); } if (ret < 0) - req_set_fail_links(req); - - /* need to hold the lock to complete IOPOLL requests */ - if (ctx->flags & IORING_SETUP_IOPOLL) { - __io_req_complete(req, ret, 0, cs); - io_ring_submit_unlock(ctx, !force_nonblock); - } else { - io_ring_submit_unlock(ctx, !force_nonblock); - __io_req_complete(req, ret, 0, cs); - } + req_set_fail(req); + /* complete before unlock, IOPOLL may need the lock */ + __io_req_complete(req, issue_flags, ret, 0); + io_ring_submit_unlock(ctx, !force_nonblock); return 0; } @@ -4089,7 +4359,7 @@ static int io_epoll_ctl_prep(struct io_kiocb *req, #if defined(CONFIG_EPOLL) if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in) return -EINVAL; - if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL))) + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; req->epoll.epfd = READ_ONCE(sqe->fd); @@ -4110,20 +4380,20 @@ static int io_epoll_ctl_prep(struct io_kiocb *req, #endif } -static int io_epoll_ctl(struct io_kiocb *req, bool force_nonblock, - struct io_comp_state *cs) +static int io_epoll_ctl(struct io_kiocb *req, unsigned int issue_flags) { #if defined(CONFIG_EPOLL) struct io_epoll *ie = &req->epoll; int ret; + bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; ret = do_epoll_ctl(ie->epfd, ie->op, ie->fd, &ie->event, force_nonblock); if (force_nonblock && ret == -EAGAIN) return -EAGAIN; if (ret < 0) - req_set_fail_links(req); - __io_req_complete(req, ret, 0, cs); + req_set_fail(req); + __io_req_complete(req, issue_flags, ret, 0); return 0; #else return -EOPNOTSUPP; @@ -4147,18 +4417,18 @@ static int io_madvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) #endif } -static int io_madvise(struct io_kiocb *req, bool force_nonblock) +static int io_madvise(struct io_kiocb *req, unsigned int issue_flags) { #if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU) struct io_madvise *ma = &req->madvise; int ret; - if (force_nonblock) + if (issue_flags & IO_URING_F_NONBLOCK) return -EAGAIN; ret = do_madvise(current->mm, ma->addr, ma->len, ma->advice); if (ret < 0) - req_set_fail_links(req); + req_set_fail(req); io_req_complete(req, ret); return 0; #else @@ -4179,12 +4449,12 @@ static int io_fadvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) return 0; } -static int io_fadvise(struct io_kiocb *req, bool force_nonblock) +static int io_fadvise(struct io_kiocb *req, unsigned int issue_flags) { struct io_fadvise *fa = &req->fadvise; int ret; - if (force_nonblock) { + if (issue_flags & IO_URING_F_NONBLOCK) { switch (fa->advice) { case POSIX_FADV_NORMAL: case POSIX_FADV_RANDOM: @@ -4197,14 +4467,14 @@ static int io_fadvise(struct io_kiocb *req, bool force_nonblock) ret = vfs_fadvise(req->file, fa->offset, fa->len, fa->advice); if (ret < 0) - req_set_fail_links(req); - io_req_complete(req, ret); + req_set_fail(req); + __io_req_complete(req, issue_flags, ret, 0); return 0; } static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { - if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL))) + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in) return -EINVAL; @@ -4220,89 +4490,96 @@ static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) return 0; } -static int io_statx(struct io_kiocb *req, bool force_nonblock) +static int io_statx(struct io_kiocb *req, unsigned int issue_flags) { struct io_statx *ctx = &req->statx; int ret; - if (force_nonblock) + if (issue_flags & IO_URING_F_NONBLOCK) return -EAGAIN; ret = do_statx(ctx->dfd, ctx->filename, ctx->flags, ctx->mask, ctx->buffer); if (ret < 0) - req_set_fail_links(req); + req_set_fail(req); io_req_complete(req, ret); return 0; } static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { - /* - * If we queue this for async, it must not be cancellable. That would - * leave the 'file' in an undeterminate state, and here need to modify - * io_wq_work.flags, so initialize io_wq_work firstly. - */ - io_req_init_async(req); - - if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL))) + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; if (sqe->ioprio || sqe->off || sqe->addr || sqe->len || - sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in) + sqe->rw_flags || sqe->buf_index) return -EINVAL; if (req->flags & REQ_F_FIXED_FILE) return -EBADF; req->close.fd = READ_ONCE(sqe->fd); - if ((req->file && req->file->f_op == &io_uring_fops)) - return -EBADF; + req->close.file_slot = READ_ONCE(sqe->file_index); + if (req->close.file_slot && req->close.fd) + return -EINVAL; - req->close.put_file = NULL; return 0; } -static int io_close(struct io_kiocb *req, bool force_nonblock, - struct io_comp_state *cs) +static int io_close(struct io_kiocb *req, unsigned int issue_flags) { + struct files_struct *files = current->files; struct io_close *close = &req->close; - int ret; + struct fdtable *fdt; + struct file *file = NULL; + int ret = -EBADF; - /* might be already done during nonblock submission */ - if (!close->put_file) { - ret = close_fd_get_file(close->fd, &close->put_file); - if (ret < 0) - return (ret == -ENOENT) ? -EBADF : ret; + if (req->close.file_slot) { + ret = io_close_fixed(req, issue_flags); + goto err; + } + + spin_lock(&files->file_lock); + fdt = files_fdtable(files); + if (close->fd >= fdt->max_fds) { + spin_unlock(&files->file_lock); + goto err; + } + file = fdt->fd[close->fd]; + if (!file || file->f_op == &io_uring_fops) { + spin_unlock(&files->file_lock); + file = NULL; + goto err; } /* if the file has a flush method, be safe and punt to async */ - if (close->put_file->f_op->flush && force_nonblock) { - /* not safe to cancel at this point */ - req->work.flags |= IO_WQ_WORK_NO_CANCEL; - /* was never set, but play safe */ - req->flags &= ~REQ_F_NOWAIT; - /* avoid grabbing files - we don't need the files */ - req->flags |= REQ_F_NO_FILE_TABLE; + if (file->f_op->flush && (issue_flags & IO_URING_F_NONBLOCK)) { + spin_unlock(&files->file_lock); return -EAGAIN; } + ret = __close_fd_get_file(close->fd, &file); + spin_unlock(&files->file_lock); + if (ret < 0) { + if (ret == -ENOENT) + ret = -EBADF; + goto err; + } + /* No ->flush() or already async, safely close from here */ - ret = filp_close(close->put_file, req->work.identity->files); + ret = filp_close(file, current->files); +err: if (ret < 0) - req_set_fail_links(req); - fput(close->put_file); - close->put_file = NULL; - __io_req_complete(req, ret, 0, cs); + req_set_fail(req); + if (file) + fput(file); + __io_req_complete(req, issue_flags, ret, 0); return 0; } -static int io_prep_sfr(struct io_kiocb *req, const struct io_uring_sqe *sqe) +static int io_sfr_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_ring_ctx *ctx = req->ctx; - if (!req->file) - return -EBADF; - if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index || @@ -4315,18 +4592,18 @@ static int io_prep_sfr(struct io_kiocb *req, const struct io_uring_sqe *sqe) return 0; } -static int io_sync_file_range(struct io_kiocb *req, bool force_nonblock) +static int io_sync_file_range(struct io_kiocb *req, unsigned int issue_flags) { int ret; /* sync_file_range always requires a blocking context */ - if (force_nonblock) + if (issue_flags & IO_URING_F_NONBLOCK) return -EAGAIN; ret = sync_file_range(req->file, req->sync.off, req->sync.len, req->sync.flags); if (ret < 0) - req_set_fail_links(req); + req_set_fail(req); io_req_complete(req, ret); return 0; } @@ -4340,55 +4617,65 @@ static int io_setup_async_msg(struct io_kiocb *req, if (async_msg) return -EAGAIN; if (io_alloc_async_data(req)) { - if (kmsg->iov != kmsg->fast_iov) - kfree(kmsg->iov); + kfree(kmsg->free_iov); return -ENOMEM; } async_msg = req->async_data; req->flags |= REQ_F_NEED_CLEANUP; memcpy(async_msg, kmsg, sizeof(*kmsg)); + if (async_msg->msg.msg_name) + async_msg->msg.msg_name = &async_msg->addr; + /* if were using fast_iov, set it to the new one */ + if (!async_msg->free_iov) + async_msg->msg.msg_iter.iov = async_msg->fast_iov; + return -EAGAIN; } static int io_sendmsg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg) { - iomsg->iov = iomsg->fast_iov; iomsg->msg.msg_name = &iomsg->addr; + iomsg->free_iov = iomsg->fast_iov; return sendmsg_copy_msghdr(&iomsg->msg, req->sr_msg.umsg, - req->sr_msg.msg_flags, &iomsg->iov); + req->sr_msg.msg_flags, &iomsg->free_iov); } -static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +static int io_sendmsg_prep_async(struct io_kiocb *req) { - struct io_async_msghdr *async_msg = req->async_data; - struct io_sr_msg *sr = &req->sr_msg; int ret; - if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) - return -EINVAL; - if (unlikely(sqe->addr2 || sqe->splice_fd_in || sqe->ioprio)) - return -EINVAL; - - sr->msg_flags = READ_ONCE(sqe->msg_flags); - sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); - sr->len = READ_ONCE(sqe->len); - -#ifdef CONFIG_COMPAT - if (req->ctx->compat) - sr->msg_flags |= MSG_CMSG_COMPAT; -#endif - - if (!async_msg || !io_op_defs[req->opcode].needs_async_data) - return 0; - ret = io_sendmsg_copy_hdr(req, async_msg); + ret = io_sendmsg_copy_hdr(req, req->async_data); if (!ret) req->flags |= REQ_F_NEED_CLEANUP; return ret; } -static int io_sendmsg(struct io_kiocb *req, bool force_nonblock, - struct io_comp_state *cs) +static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ + struct io_sr_msg *sr = &req->sr_msg; + + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) + return -EINVAL; + if (unlikely(sqe->addr2 || sqe->file_index)) + return -EINVAL; + if (unlikely(sqe->addr2 || sqe->file_index || sqe->ioprio)) + return -EINVAL; + + sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); + sr->len = READ_ONCE(sqe->len); + sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; + if (sr->msg_flags & MSG_DONTWAIT) + req->flags |= REQ_F_NOWAIT; + +#ifdef CONFIG_COMPAT + if (req->ctx->compat) + sr->msg_flags |= MSG_CMSG_COMPAT; +#endif + return 0; +} + +static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) { struct io_async_msghdr iomsg, *kmsg; struct socket *sock; @@ -4400,46 +4687,37 @@ static int io_sendmsg(struct io_kiocb *req, bool force_nonblock, if (unlikely(!sock)) return ret; - if (req->async_data) { - kmsg = req->async_data; - kmsg->msg.msg_name = &kmsg->addr; - /* if iov is set, it's allocated already */ - if (!kmsg->iov) - kmsg->iov = kmsg->fast_iov; - kmsg->msg.msg_iter.iov = kmsg->iov; - } else { + kmsg = req->async_data; + if (!kmsg) { ret = io_sendmsg_copy_hdr(req, &iomsg); if (ret) return ret; kmsg = &iomsg; } - flags = req->sr_msg.msg_flags | MSG_NOSIGNAL; - if (flags & MSG_DONTWAIT) - req->flags |= REQ_F_NOWAIT; - else if (force_nonblock) + flags = req->sr_msg.msg_flags; + if (issue_flags & IO_URING_F_NONBLOCK) flags |= MSG_DONTWAIT; - if (flags & MSG_WAITALL) min_ret = iov_iter_count(&kmsg->msg.msg_iter); ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); - if (force_nonblock && ret == -EAGAIN) + if ((issue_flags & IO_URING_F_NONBLOCK) && ret == -EAGAIN) return io_setup_async_msg(req, kmsg); if (ret == -ERESTARTSYS) ret = -EINTR; - if (kmsg->iov != kmsg->fast_iov) - kfree(kmsg->iov); + /* fast path, check for non-NULL to avoid function call */ + if (kmsg->free_iov) + kfree(kmsg->free_iov); req->flags &= ~REQ_F_NEED_CLEANUP; if (ret < min_ret) - req_set_fail_links(req); - __io_req_complete(req, ret, 0, cs); + req_set_fail(req); + __io_req_complete(req, issue_flags, ret, 0); return 0; } -static int io_send(struct io_kiocb *req, bool force_nonblock, - struct io_comp_state *cs) +static int io_send(struct io_kiocb *req, unsigned int issue_flags) { struct io_sr_msg *sr = &req->sr_msg; struct msghdr msg; @@ -4462,25 +4740,22 @@ static int io_send(struct io_kiocb *req, bool force_nonblock, msg.msg_controllen = 0; msg.msg_namelen = 0; - flags = req->sr_msg.msg_flags | MSG_NOSIGNAL; - if (flags & MSG_DONTWAIT) - req->flags |= REQ_F_NOWAIT; - else if (force_nonblock) + flags = req->sr_msg.msg_flags; + if (issue_flags & IO_URING_F_NONBLOCK) flags |= MSG_DONTWAIT; - if (flags & MSG_WAITALL) min_ret = iov_iter_count(&msg.msg_iter); msg.msg_flags = flags; ret = sock_sendmsg(sock, &msg); - if (force_nonblock && ret == -EAGAIN) + if ((issue_flags & IO_URING_F_NONBLOCK) && ret == -EAGAIN) return -EAGAIN; if (ret == -ERESTARTSYS) ret = -EINTR; if (ret < min_ret) - req_set_fail_links(req); - __io_req_complete(req, ret, 0, cs); + req_set_fail(req); + __io_req_complete(req, issue_flags, ret, 0); return 0; } @@ -4500,15 +4775,14 @@ static int __io_recvmsg_copy_hdr(struct io_kiocb *req, if (req->flags & REQ_F_BUFFER_SELECT) { if (iov_len > 1) return -EINVAL; - if (copy_from_user(iomsg->iov, uiov, sizeof(*uiov))) + if (copy_from_user(iomsg->fast_iov, uiov, sizeof(*uiov))) return -EFAULT; - sr->len = iomsg->iov[0].iov_len; - iov_iter_init(&iomsg->msg.msg_iter, READ, iomsg->iov, 1, - sr->len); - iomsg->iov = NULL; + sr->len = iomsg->fast_iov[0].iov_len; + iomsg->free_iov = NULL; } else { + iomsg->free_iov = iomsg->fast_iov; ret = __import_iovec(READ, uiov, iov_len, UIO_FASTIOV, - &iomsg->iov, &iomsg->msg.msg_iter, + &iomsg->free_iov, &iomsg->msg.msg_iter, false); if (ret > 0) ret = 0; @@ -4521,16 +4795,14 @@ static int __io_recvmsg_copy_hdr(struct io_kiocb *req, static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg) { - struct compat_msghdr __user *msg_compat; struct io_sr_msg *sr = &req->sr_msg; struct compat_iovec __user *uiov; compat_uptr_t ptr; compat_size_t len; int ret; - msg_compat = (struct compat_msghdr __user *) sr->umsg; - ret = __get_compat_msghdr(&iomsg->msg, msg_compat, &iomsg->uaddr, - &ptr, &len); + ret = __get_compat_msghdr(&iomsg->msg, sr->umsg_compat, &iomsg->uaddr, + &ptr, &len); if (ret) return ret; @@ -4547,11 +4819,11 @@ static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req, if (clen < 0) return -EINVAL; sr->len = clen; - iomsg->iov[0].iov_len = clen; - iomsg->iov = NULL; + iomsg->free_iov = NULL; } else { + iomsg->free_iov = iomsg->fast_iov; ret = __import_iovec(READ, (struct iovec __user *)uiov, len, - UIO_FASTIOV, &iomsg->iov, + UIO_FASTIOV, &iomsg->free_iov, &iomsg->msg.msg_iter, true); if (ret < 0) return ret; @@ -4565,7 +4837,6 @@ static int io_recvmsg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg) { iomsg->msg.msg_name = &iomsg->addr; - iomsg->iov = iomsg->fast_iov; #ifdef CONFIG_COMPAT if (req->ctx->compat) @@ -4595,38 +4866,42 @@ static inline unsigned int io_put_recv_kbuf(struct io_kiocb *req) return io_put_kbuf(req, req->sr_msg.kbuf); } -static int io_recvmsg_prep(struct io_kiocb *req, - const struct io_uring_sqe *sqe) +static int io_recvmsg_prep_async(struct io_kiocb *req) { - struct io_async_msghdr *async_msg = req->async_data; - struct io_sr_msg *sr = &req->sr_msg; int ret; - if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) - return -EINVAL; - if (unlikely(sqe->addr2 || sqe->splice_fd_in || sqe->ioprio)) - return -EINVAL; - - sr->msg_flags = READ_ONCE(sqe->msg_flags); - sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); - sr->len = READ_ONCE(sqe->len); - sr->bgid = READ_ONCE(sqe->buf_group); - -#ifdef CONFIG_COMPAT - if (req->ctx->compat) - sr->msg_flags |= MSG_CMSG_COMPAT; -#endif - - if (!async_msg || !io_op_defs[req->opcode].needs_async_data) - return 0; - ret = io_recvmsg_copy_hdr(req, async_msg); + ret = io_recvmsg_copy_hdr(req, req->async_data); if (!ret) req->flags |= REQ_F_NEED_CLEANUP; return ret; } -static int io_recvmsg(struct io_kiocb *req, bool force_nonblock, - struct io_comp_state *cs) +static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ + struct io_sr_msg *sr = &req->sr_msg; + + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) + return -EINVAL; + if (unlikely(sqe->addr2 || sqe->file_index)) + return -EINVAL; + if (unlikely(sqe->addr2 || sqe->file_index || sqe->ioprio)) + return -EINVAL; + + sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); + sr->len = READ_ONCE(sqe->len); + sr->bgid = READ_ONCE(sqe->buf_group); + sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; + if (sr->msg_flags & MSG_DONTWAIT) + req->flags |= REQ_F_NOWAIT; + +#ifdef CONFIG_COMPAT + if (req->ctx->compat) + sr->msg_flags |= MSG_CMSG_COMPAT; +#endif + return 0; +} + +static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) { struct io_async_msghdr iomsg, *kmsg; struct socket *sock; @@ -4634,19 +4909,14 @@ static int io_recvmsg(struct io_kiocb *req, bool force_nonblock, unsigned flags; int min_ret = 0; int ret, cflags = 0; + bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; sock = sock_from_file(req->file, &ret); if (unlikely(!sock)) return ret; - if (req->async_data) { - kmsg = req->async_data; - kmsg->msg.msg_name = &kmsg->addr; - /* if iov is set, it's allocated already */ - if (!kmsg->iov) - kmsg->iov = kmsg->fast_iov; - kmsg->msg.msg_iter.iov = kmsg->iov; - } else { + kmsg = req->async_data; + if (!kmsg) { ret = io_recvmsg_copy_hdr(req, &iomsg); if (ret) return ret; @@ -4658,16 +4928,14 @@ static int io_recvmsg(struct io_kiocb *req, bool force_nonblock, if (IS_ERR(kbuf)) return PTR_ERR(kbuf); kmsg->fast_iov[0].iov_base = u64_to_user_ptr(kbuf->addr); - iov_iter_init(&kmsg->msg.msg_iter, READ, kmsg->iov, + kmsg->fast_iov[0].iov_len = req->sr_msg.len; + iov_iter_init(&kmsg->msg.msg_iter, READ, kmsg->fast_iov, 1, req->sr_msg.len); } - flags = req->sr_msg.msg_flags | MSG_NOSIGNAL; - if (flags & MSG_DONTWAIT) - req->flags |= REQ_F_NOWAIT; - else if (force_nonblock) + flags = req->sr_msg.msg_flags; + if (force_nonblock) flags |= MSG_DONTWAIT; - if (flags & MSG_WAITALL) min_ret = iov_iter_count(&kmsg->msg.msg_iter); @@ -4680,17 +4948,17 @@ static int io_recvmsg(struct io_kiocb *req, bool force_nonblock, if (req->flags & REQ_F_BUFFER_SELECTED) cflags = io_put_recv_kbuf(req); - if (kmsg->iov != kmsg->fast_iov) - kfree(kmsg->iov); + /* fast path, check for non-NULL to avoid function call */ + if (kmsg->free_iov) + kfree(kmsg->free_iov); req->flags &= ~REQ_F_NEED_CLEANUP; if (ret < min_ret || ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC)))) - req_set_fail_links(req); - __io_req_complete(req, ret, cflags, cs); + req_set_fail(req); + __io_req_complete(req, issue_flags, ret, cflags); return 0; } -static int io_recv(struct io_kiocb *req, bool force_nonblock, - struct io_comp_state *cs) +static int io_recv(struct io_kiocb *req, unsigned int issue_flags) { struct io_buffer *kbuf; struct io_sr_msg *sr = &req->sr_msg; @@ -4701,6 +4969,7 @@ static int io_recv(struct io_kiocb *req, bool force_nonblock, unsigned flags; int min_ret = 0; int ret, cflags = 0; + bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; sock = sock_from_file(req->file, &ret); if (unlikely(!sock)) @@ -4724,12 +4993,9 @@ static int io_recv(struct io_kiocb *req, bool force_nonblock, msg.msg_iocb = NULL; msg.msg_flags = 0; - flags = req->sr_msg.msg_flags | MSG_NOSIGNAL; - if (flags & MSG_DONTWAIT) - req->flags |= REQ_F_NOWAIT; - else if (force_nonblock) + flags = req->sr_msg.msg_flags; + if (force_nonblock) flags |= MSG_DONTWAIT; - if (flags & MSG_WAITALL) min_ret = iov_iter_count(&msg.msg_iter); @@ -4742,8 +5008,8 @@ out_free: if (req->flags & REQ_F_BUFFER_SELECTED) cflags = io_put_recv_kbuf(req); if (ret < min_ret || ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC)))) - req_set_fail_links(req); - __io_req_complete(req, ret, cflags, cs); + req_set_fail(req); + __io_req_complete(req, issue_flags, ret, cflags); return 0; } @@ -4751,48 +5017,79 @@ static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_accept *accept = &req->accept; - if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL))) + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; - if (sqe->ioprio || sqe->len || sqe->buf_index || sqe->splice_fd_in) + if (sqe->ioprio || sqe->len || sqe->buf_index) return -EINVAL; accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2)); accept->flags = READ_ONCE(sqe->accept_flags); accept->nofile = rlimit(RLIMIT_NOFILE); + + accept->file_slot = READ_ONCE(sqe->file_index); + if (accept->file_slot && (accept->flags & SOCK_CLOEXEC)) + return -EINVAL; + if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) + return -EINVAL; + if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK)) + accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK; return 0; } -static int io_accept(struct io_kiocb *req, bool force_nonblock, - struct io_comp_state *cs) +static int io_accept(struct io_kiocb *req, unsigned int issue_flags) { struct io_accept *accept = &req->accept; + bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0; - int ret; + bool fixed = !!accept->file_slot; + struct file *file; + int ret, fd; if (req->file->f_flags & O_NONBLOCK) req->flags |= REQ_F_NOWAIT; - ret = __sys_accept4_file(req->file, file_flags, accept->addr, - accept->addr_len, accept->flags, - accept->nofile); - if (ret == -EAGAIN && force_nonblock) - return -EAGAIN; - if (ret < 0) { + if (!fixed) { + fd = __get_unused_fd_flags(accept->flags, accept->nofile); + if (unlikely(fd < 0)) + return fd; + } + file = do_accept(req->file, file_flags, accept->addr, accept->addr_len, + accept->flags); + + if (IS_ERR(file)) { + if (!fixed) + put_unused_fd(fd); + ret = PTR_ERR(file); + if (ret == -EAGAIN && force_nonblock) + return -EAGAIN; if (ret == -ERESTARTSYS) ret = -EINTR; - req_set_fail_links(req); + req_set_fail(req); + } else if (!fixed) { + fd_install(fd, file); + ret = fd; + } else { + ret = io_install_fixed_file(req, file, issue_flags, + accept->file_slot - 1); } - __io_req_complete(req, ret, 0, cs); + __io_req_complete(req, issue_flags, ret, 0); return 0; } +static int io_connect_prep_async(struct io_kiocb *req) +{ + struct io_async_connect *io = req->async_data; + struct io_connect *conn = &req->connect; + + return move_addr_to_kernel(conn->addr, conn->addr_len, &io->address); +} + static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_connect *conn = &req->connect; - struct io_async_connect *io = req->async_data; - if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL))) + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; if (sqe->ioprio || sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in) @@ -4800,20 +5097,15 @@ static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); conn->addr_len = READ_ONCE(sqe->addr2); - - if (!io) - return 0; - - return move_addr_to_kernel(conn->addr, conn->addr_len, - &io->address); + return 0; } -static int io_connect(struct io_kiocb *req, bool force_nonblock, - struct io_comp_state *cs) +static int io_connect(struct io_kiocb *req, unsigned int issue_flags) { struct io_async_connect __io, *io; unsigned file_flags; int ret; + bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; if (req->async_data) { io = req->async_data; @@ -4837,7 +5129,6 @@ static int io_connect(struct io_kiocb *req, bool force_nonblock, ret = -ENOMEM; goto out; } - io = req->async_data; memcpy(req->async_data, &__io, sizeof(__io)); return -EAGAIN; } @@ -4845,67 +5136,37 @@ static int io_connect(struct io_kiocb *req, bool force_nonblock, ret = -EINTR; out: if (ret < 0) - req_set_fail_links(req); - __io_req_complete(req, ret, 0, cs); + req_set_fail(req); + __io_req_complete(req, issue_flags, ret, 0); return 0; } #else /* !CONFIG_NET */ -static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) -{ - return -EOPNOTSUPP; +#define IO_NETOP_FN(op) \ +static int io_##op(struct io_kiocb *req, unsigned int issue_flags) \ +{ \ + return -EOPNOTSUPP; \ } -static int io_sendmsg(struct io_kiocb *req, bool force_nonblock, - struct io_comp_state *cs) -{ - return -EOPNOTSUPP; +#define IO_NETOP_PREP(op) \ +IO_NETOP_FN(op) \ +static int io_##op##_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) \ +{ \ + return -EOPNOTSUPP; \ +} \ + +#define IO_NETOP_PREP_ASYNC(op) \ +IO_NETOP_PREP(op) \ +static int io_##op##_prep_async(struct io_kiocb *req) \ +{ \ + return -EOPNOTSUPP; \ } -static int io_send(struct io_kiocb *req, bool force_nonblock, - struct io_comp_state *cs) -{ - return -EOPNOTSUPP; -} - -static int io_recvmsg_prep(struct io_kiocb *req, - const struct io_uring_sqe *sqe) -{ - return -EOPNOTSUPP; -} - -static int io_recvmsg(struct io_kiocb *req, bool force_nonblock, - struct io_comp_state *cs) -{ - return -EOPNOTSUPP; -} - -static int io_recv(struct io_kiocb *req, bool force_nonblock, - struct io_comp_state *cs) -{ - return -EOPNOTSUPP; -} - -static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) -{ - return -EOPNOTSUPP; -} - -static int io_accept(struct io_kiocb *req, bool force_nonblock, - struct io_comp_state *cs) -{ - return -EOPNOTSUPP; -} - -static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) -{ - return -EOPNOTSUPP; -} - -static int io_connect(struct io_kiocb *req, bool force_nonblock, - struct io_comp_state *cs) -{ - return -EOPNOTSUPP; -} +IO_NETOP_PREP_ASYNC(sendmsg); +IO_NETOP_PREP_ASYNC(recvmsg); +IO_NETOP_PREP_ASYNC(connect); +IO_NETOP_PREP(accept); +IO_NETOP_FN(send); +IO_NETOP_FN(recv); #endif /* CONFIG_NET */ struct io_poll_table { @@ -4915,68 +5176,47 @@ struct io_poll_table { int error; }; -static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll, - __poll_t mask, task_work_func_t func) +#define IO_POLL_CANCEL_FLAG BIT(31) +#define IO_POLL_RETRY_FLAG BIT(30) +#define IO_POLL_REF_MASK GENMASK(29, 0) + +/* + * We usually have 1-2 refs taken, 128 is more than enough and we want to + * maximise the margin between this amount and the moment when it overflows. + */ +#define IO_POLL_REF_BIAS 128 + +static bool io_poll_get_ownership_slowpath(struct io_kiocb *req) { - bool twa_signal_ok; - int ret; - - /* for instances that support it check for an event match first: */ - if (mask && !(mask & poll->events)) - return 0; - - trace_io_uring_task_add(req->ctx, req->opcode, req->user_data, mask); - - list_del_init(&poll->wait.entry); - - req->result = mask; - init_task_work(&req->task_work, func); - percpu_ref_get(&req->ctx->refs); + int v; /* - * If we using the signalfd wait_queue_head for this wakeup, then - * it's not safe to use TWA_SIGNAL as we could be recursing on the - * tsk->sighand->siglock on doing the wakeup. Should not be needed - * either, as the normal wakeup will suffice. + * poll_refs are already elevated and we don't have much hope for + * grabbing the ownership. Instead of incrementing set a retry flag + * to notify the loop that there might have been some change. */ - twa_signal_ok = (poll->head != &req->task->sighand->signalfd_wqh); - - /* - * If this fails, then the task is exiting. When a task exits, the - * work gets canceled, so just cancel this request as well instead - * of executing it. We can't safely execute it anyway, as we may not - * have the needed state needed for it anyway. - */ - ret = io_req_task_work_add(req, twa_signal_ok); - if (unlikely(ret)) { - struct task_struct *tsk; - - WRITE_ONCE(poll->canceled, true); - tsk = io_wq_get_task(req->ctx->io_wq); - task_work_add(tsk, &req->task_work, TWA_NONE); - wake_up_process(tsk); - } - return 1; + v = atomic_fetch_or(IO_POLL_RETRY_FLAG, &req->poll_refs); + if (v & IO_POLL_REF_MASK) + return false; + return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK); } -static bool io_poll_rewait(struct io_kiocb *req, struct io_poll_iocb *poll) - __acquires(&req->ctx->completion_lock) +/* + * If refs part of ->poll_refs (see IO_POLL_REF_MASK) is 0, it's free. We can + * bump it and acquire ownership. It's disallowed to modify requests while not + * owning it, that prevents from races for enqueueing task_work's and b/w + * arming poll and wakeups. + */ +static inline bool io_poll_get_ownership(struct io_kiocb *req) { - struct io_ring_ctx *ctx = req->ctx; + if (unlikely(atomic_read(&req->poll_refs) >= IO_POLL_REF_BIAS)) + return io_poll_get_ownership_slowpath(req); + return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK); +} - if (!req->result && !READ_ONCE(poll->canceled)) { - struct poll_table_struct pt = { ._key = poll->events }; - - req->result = vfs_poll(req->file, &pt) & poll->events; - } - - spin_lock_irq(&ctx->completion_lock); - if (!req->result && !READ_ONCE(poll->canceled)) { - add_wait_queue(poll->head, &poll->wait); - return true; - } - - return false; +static void io_poll_mark_cancelled(struct io_kiocb *req) +{ + atomic_or(IO_POLL_CANCEL_FLAG, &req->poll_refs); } static struct io_poll_iocb *io_poll_get_double(struct io_kiocb *req) @@ -4994,99 +5234,254 @@ static struct io_poll_iocb *io_poll_get_single(struct io_kiocb *req) return &req->apoll->poll; } -static void io_poll_remove_double(struct io_kiocb *req) -{ - struct io_poll_iocb *poll = io_poll_get_double(req); - - lockdep_assert_held(&req->ctx->completion_lock); - - if (poll && poll->head) { - struct wait_queue_head *head = poll->head; - - spin_lock(&head->lock); - list_del_init(&poll->wait.entry); - if (poll->wait.private) - refcount_dec(&req->refs); - poll->head = NULL; - spin_unlock(&head->lock); - } -} - -static void io_poll_complete(struct io_kiocb *req, __poll_t mask, int error) +static void io_poll_req_insert(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; + struct hlist_head *list; - io_poll_remove_double(req); - req->poll.done = true; - io_cqring_fill_event(req, error ? error : mangle_poll(mask)); - io_commit_cqring(ctx); -} - -static void io_poll_task_func(struct callback_head *cb) -{ - struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work); - struct io_ring_ctx *ctx = req->ctx; - struct io_kiocb *nxt; - - if (io_poll_rewait(req, &req->poll)) { - spin_unlock_irq(&ctx->completion_lock); - } else { - hash_del(&req->hash_node); - io_poll_complete(req, req->result, 0); - spin_unlock_irq(&ctx->completion_lock); - - nxt = io_put_req_find_next(req); - io_cqring_ev_posted(ctx); - if (nxt) - __io_req_task_submit(nxt); - } - - percpu_ref_put(&ctx->refs); -} - -static int io_poll_double_wake(struct wait_queue_entry *wait, unsigned mode, - int sync, void *key) -{ - struct io_kiocb *req = wait->private; - struct io_poll_iocb *poll = io_poll_get_single(req); - __poll_t mask = key_to_poll(key); - - /* for instances that support it check for an event match first: */ - if (mask && !(mask & poll->events)) - return 0; - - list_del_init(&wait->entry); - - if (poll && poll->head) { - bool done; - - spin_lock(&poll->head->lock); - done = list_empty(&poll->wait.entry); - if (!done) - list_del_init(&poll->wait.entry); - /* make sure double remove sees this as being gone */ - wait->private = NULL; - spin_unlock(&poll->head->lock); - if (!done) { - /* use wait func handler, so it matches the rq type */ - poll->wait.func(&poll->wait, mode, sync, key); - } - } - refcount_dec(&req->refs); - return 1; + list = &ctx->cancel_hash[hash_long(req->user_data, ctx->cancel_hash_bits)]; + hlist_add_head(&req->hash_node, list); } static void io_init_poll_iocb(struct io_poll_iocb *poll, __poll_t events, wait_queue_func_t wake_func) { poll->head = NULL; - poll->done = false; - poll->canceled = false; - poll->events = events; +#define IO_POLL_UNMASK (EPOLLERR|EPOLLHUP|EPOLLNVAL|EPOLLRDHUP) + /* mask in events that we always want/need */ + poll->events = events | IO_POLL_UNMASK; INIT_LIST_HEAD(&poll->wait.entry); init_waitqueue_func_entry(&poll->wait, wake_func); } +static inline void io_poll_remove_entry(struct io_poll_iocb *poll) +{ + struct wait_queue_head *head = smp_load_acquire(&poll->head); + + if (head) { + spin_lock_irq(&head->lock); + list_del_init(&poll->wait.entry); + poll->head = NULL; + spin_unlock_irq(&head->lock); + } +} + +static void io_poll_remove_entries(struct io_kiocb *req) +{ + struct io_poll_iocb *poll = io_poll_get_single(req); + struct io_poll_iocb *poll_double = io_poll_get_double(req); + + /* + * While we hold the waitqueue lock and the waitqueue is nonempty, + * wake_up_pollfree() will wait for us. However, taking the waitqueue + * lock in the first place can race with the waitqueue being freed. + * + * We solve this as eventpoll does: by taking advantage of the fact that + * all users of wake_up_pollfree() will RCU-delay the actual free. If + * we enter rcu_read_lock() and see that the pointer to the queue is + * non-NULL, we can then lock it without the memory being freed out from + * under us. + * + * Keep holding rcu_read_lock() as long as we hold the queue lock, in + * case the caller deletes the entry from the queue, leaving it empty. + * In that case, only RCU prevents the queue memory from being freed. + */ + rcu_read_lock(); + io_poll_remove_entry(poll); + if (poll_double) + io_poll_remove_entry(poll_double); + rcu_read_unlock(); +} + +/* + * All poll tw should go through this. Checks for poll events, manages + * references, does rewait, etc. + * + * Returns a negative error on failure. >0 when no action require, which is + * either spurious wakeup or multishot CQE is served. 0 when it's done with + * the request, then the mask is stored in req->result. + */ +static int io_poll_check_events(struct io_kiocb *req) +{ + struct io_ring_ctx *ctx = req->ctx; + struct io_poll_iocb *poll = io_poll_get_single(req); + int v; + + /* req->task == current here, checking PF_EXITING is safe */ + if (unlikely(req->task->flags & PF_EXITING)) + io_poll_mark_cancelled(req); + + do { + v = atomic_read(&req->poll_refs); + + /* tw handler should be the owner, and so have some references */ + if (WARN_ON_ONCE(!(v & IO_POLL_REF_MASK))) + return 0; + if (v & IO_POLL_CANCEL_FLAG) + return -ECANCELED; + /* + * cqe.res contains only events of the first wake up + * and all others are be lost. Redo vfs_poll() to get + * up to date state. + */ + if ((v & IO_POLL_REF_MASK) != 1) + req->result = 0; + if (v & IO_POLL_RETRY_FLAG) { + req->result = 0; + /* + * We won't find new events that came in between + * vfs_poll and the ref put unless we clear the + * flag in advance. + */ + atomic_andnot(IO_POLL_RETRY_FLAG, &req->poll_refs); + v &= ~IO_POLL_RETRY_FLAG; + } + + if (!req->result) { + struct poll_table_struct pt = { ._key = poll->events }; + + req->result = vfs_poll(req->file, &pt) & poll->events; + } + + /* multishot, just fill an CQE and proceed */ + if (req->result && !(poll->events & EPOLLONESHOT)) { + __poll_t mask = mangle_poll(req->result & poll->events); + bool filled; + + spin_lock(&ctx->completion_lock); + filled = io_fill_cqe_aux(ctx, req->user_data, mask, + IORING_CQE_F_MORE); + io_commit_cqring(ctx); + spin_unlock(&ctx->completion_lock); + if (unlikely(!filled)) + return -ECANCELED; + io_cqring_ev_posted(ctx); + } else if (req->result) { + return 0; + } + + /* force the next iteration to vfs_poll() */ + req->result = 0; + + /* + * Release all references, retry if someone tried to restart + * task_work while we were executing it. + */ + } while (atomic_sub_return(v & IO_POLL_REF_MASK, &req->poll_refs) & + IO_POLL_REF_MASK); + + return 1; +} + +static void io_poll_task_func(struct io_kiocb *req, bool *locked) +{ + struct io_ring_ctx *ctx = req->ctx; + int ret; + + ret = io_poll_check_events(req); + if (ret > 0) + return; + + if (!ret) { + req->result = mangle_poll(req->result & req->poll.events); + } else { + req->result = ret; + req_set_fail(req); + } + + io_poll_remove_entries(req); + spin_lock(&ctx->completion_lock); + hash_del(&req->hash_node); + spin_unlock(&ctx->completion_lock); + io_req_complete_post(req, req->result, 0); +} + +static void io_apoll_task_func(struct io_kiocb *req, bool *locked) +{ + struct io_ring_ctx *ctx = req->ctx; + int ret; + + ret = io_poll_check_events(req); + if (ret > 0) + return; + + io_poll_remove_entries(req); + spin_lock(&ctx->completion_lock); + hash_del(&req->hash_node); + spin_unlock(&ctx->completion_lock); + + if (!ret) + io_req_task_submit(req, locked); + else + io_req_complete_failed(req, ret); +} + +static void __io_poll_execute(struct io_kiocb *req, int mask) +{ + req->result = mask; + if (req->opcode == IORING_OP_POLL_ADD) + req->io_task_work.func = io_poll_task_func; + else + req->io_task_work.func = io_apoll_task_func; + + trace_io_uring_task_add(req->ctx, req->opcode, req->user_data, mask); + io_req_task_work_add(req); +} + +static inline void io_poll_execute(struct io_kiocb *req, int res) +{ + if (io_poll_get_ownership(req)) + __io_poll_execute(req, res); +} + +static void io_poll_cancel_req(struct io_kiocb *req) +{ + io_poll_mark_cancelled(req); + /* kick tw, which should complete the request */ + io_poll_execute(req, 0); +} + +static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, + void *key) +{ + struct io_kiocb *req = wait->private; + struct io_poll_iocb *poll = container_of(wait, struct io_poll_iocb, + wait); + __poll_t mask = key_to_poll(key); + + if (unlikely(mask & POLLFREE)) { + io_poll_mark_cancelled(req); + /* we have to kick tw in case it's not already */ + io_poll_execute(req, 0); + + /* + * If the waitqueue is being freed early but someone is already + * holds ownership over it, we have to tear down the request as + * best we can. That means immediately removing the request from + * its waitqueue and preventing all further accesses to the + * waitqueue via the request. + */ + list_del_init(&poll->wait.entry); + + /* + * Careful: this *must* be the last step, since as soon + * as req->head is NULL'ed out, the request can be + * completed and freed, since aio_poll_complete_work() + * will no longer need to take the waitqueue lock. + */ + smp_store_release(&poll->head, NULL); + return 1; + } + + /* for instances that support it check for an event match first */ + if (mask && !(mask & poll->events)) + return 0; + + if (io_poll_get_ownership(req)) + __io_poll_execute(req, mask); + return 1; +} + static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt, struct wait_queue_head *head, struct io_poll_iocb **poll_ptr) @@ -5099,29 +5494,31 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt, * if this happens. */ if (unlikely(pt->nr_entries)) { - struct io_poll_iocb *poll_one = poll; + struct io_poll_iocb *first = poll; + /* double add on the same waitqueue head, ignore */ + if (first->head == head) + return; /* already have a 2nd entry, fail a third attempt */ if (*poll_ptr) { + if ((*poll_ptr)->head == head) + return; pt->error = -EINVAL; return; } - /* double add on the same waitqueue head, ignore */ - if (poll->head == head) - return; + poll = kmalloc(sizeof(*poll), GFP_ATOMIC); if (!poll) { pt->error = -ENOMEM; return; } - io_init_poll_iocb(poll, poll_one->events, io_poll_double_wake); - refcount_inc(&req->refs); - poll->wait.private = req; + io_init_poll_iocb(poll, first->events, first->wait.func); *poll_ptr = poll; } pt->nr_entries++; poll->head = head; + poll->wait.private = req; if (poll->events & EPOLLEXCLUSIVE) add_wait_queue_exclusive(head, &poll->wait); @@ -5129,6 +5526,73 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt, add_wait_queue(head, &poll->wait); } +static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head, + struct poll_table_struct *p) +{ + struct io_poll_table *pt = container_of(p, struct io_poll_table, pt); + + __io_queue_proc(&pt->req->poll, pt, head, + (struct io_poll_iocb **) &pt->req->async_data); +} + +static int __io_arm_poll_handler(struct io_kiocb *req, + struct io_poll_iocb *poll, + struct io_poll_table *ipt, __poll_t mask) +{ + struct io_ring_ctx *ctx = req->ctx; + + INIT_HLIST_NODE(&req->hash_node); + io_init_poll_iocb(poll, mask, io_poll_wake); + poll->file = req->file; + poll->wait.private = req; + + ipt->pt._key = mask; + ipt->req = req; + ipt->error = 0; + ipt->nr_entries = 0; + + /* + * Take the ownership to delay any tw execution up until we're done + * with poll arming. see io_poll_get_ownership(). + */ + atomic_set(&req->poll_refs, 1); + mask = vfs_poll(req->file, &ipt->pt) & poll->events; + + if (mask && (poll->events & EPOLLONESHOT)) { + io_poll_remove_entries(req); + /* no one else has access to the req, forget about the ref */ + return mask; + } + if (!mask && unlikely(ipt->error || !ipt->nr_entries)) { + io_poll_remove_entries(req); + if (!ipt->error) + ipt->error = -EINVAL; + return 0; + } + + spin_lock(&ctx->completion_lock); + io_poll_req_insert(req); + spin_unlock(&ctx->completion_lock); + + if (mask) { + /* can't multishot if failed, just queue the event we've got */ + if (unlikely(ipt->error || !ipt->nr_entries)) { + poll->events |= EPOLLONESHOT; + ipt->error = 0; + } + __io_poll_execute(req, mask); + return 0; + } + + /* + * Try to release ownership. If we see a change of state, e.g. + * poll was waken up, queue up a tw, it'll deal with it. + */ + if (atomic_cmpxchg(&req->poll_refs, 1, 0) != 1) + __io_poll_execute(req, 0); + return 0; +} + static void io_async_queue_proc(struct file *file, struct wait_queue_head *head, struct poll_table_struct *p) { @@ -5138,240 +5602,87 @@ static void io_async_queue_proc(struct file *file, struct wait_queue_head *head, __io_queue_proc(&apoll->poll, pt, head, &apoll->double_poll); } -static void io_async_task_func(struct callback_head *cb) -{ - struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work); - struct async_poll *apoll = req->apoll; - struct io_ring_ctx *ctx = req->ctx; +enum { + IO_APOLL_OK, + IO_APOLL_ABORTED, + IO_APOLL_READY +}; - trace_io_uring_task_run(req->ctx, req->opcode, req->user_data); - - if (io_poll_rewait(req, &apoll->poll)) { - spin_unlock_irq(&ctx->completion_lock); - percpu_ref_put(&ctx->refs); - return; - } - - /* If req is still hashed, it cannot have been canceled. Don't check. */ - if (hash_hashed(&req->hash_node)) - hash_del(&req->hash_node); - - io_poll_remove_double(req); - spin_unlock_irq(&ctx->completion_lock); - - if (!READ_ONCE(apoll->poll.canceled)) - __io_req_task_submit(req); - else - __io_req_task_cancel(req, -ECANCELED); - - percpu_ref_put(&ctx->refs); - kfree(apoll->double_poll); - kfree(apoll); -} - -static int io_async_wake(struct wait_queue_entry *wait, unsigned mode, int sync, - void *key) -{ - struct io_kiocb *req = wait->private; - struct io_poll_iocb *poll = &req->apoll->poll; - - trace_io_uring_poll_wake(req->ctx, req->opcode, req->user_data, - key_to_poll(key)); - - return __io_async_wake(req, poll, key_to_poll(key), io_async_task_func); -} - -static void io_poll_req_insert(struct io_kiocb *req) -{ - struct io_ring_ctx *ctx = req->ctx; - struct hlist_head *list; - - list = &ctx->cancel_hash[hash_long(req->user_data, ctx->cancel_hash_bits)]; - hlist_add_head(&req->hash_node, list); -} - -static __poll_t __io_arm_poll_handler(struct io_kiocb *req, - struct io_poll_iocb *poll, - struct io_poll_table *ipt, __poll_t mask, - wait_queue_func_t wake_func) - __acquires(&ctx->completion_lock) -{ - struct io_ring_ctx *ctx = req->ctx; - bool cancel = false; - - INIT_HLIST_NODE(&req->hash_node); - io_init_poll_iocb(poll, mask, wake_func); - poll->file = req->file; - poll->wait.private = req; - - ipt->pt._key = mask; - ipt->req = req; - ipt->error = 0; - ipt->nr_entries = 0; - - mask = vfs_poll(req->file, &ipt->pt) & poll->events; - if (unlikely(!ipt->nr_entries) && !ipt->error) - ipt->error = -EINVAL; - - spin_lock_irq(&ctx->completion_lock); - if (ipt->error) - io_poll_remove_double(req); - if (likely(poll->head)) { - spin_lock(&poll->head->lock); - if (unlikely(list_empty(&poll->wait.entry))) { - if (ipt->error) - cancel = true; - ipt->error = 0; - mask = 0; - } - if (mask || ipt->error) - list_del_init(&poll->wait.entry); - else if (cancel) - WRITE_ONCE(poll->canceled, true); - else if (!poll->done) /* actually waiting for an event */ - io_poll_req_insert(req); - spin_unlock(&poll->head->lock); - } - - return mask; -} - -static bool io_arm_poll_handler(struct io_kiocb *req) +static int io_arm_poll_handler(struct io_kiocb *req) { const struct io_op_def *def = &io_op_defs[req->opcode]; struct io_ring_ctx *ctx = req->ctx; struct async_poll *apoll; struct io_poll_table ipt; - __poll_t mask, ret; - int rw; + __poll_t mask = EPOLLONESHOT | POLLERR | POLLPRI; + int ret; if (!req->file || !file_can_poll(req->file)) - return false; + return IO_APOLL_ABORTED; if (req->flags & REQ_F_POLLED) - return false; - if (def->pollin) - rw = READ; - else if (def->pollout) - rw = WRITE; - else - return false; - /* if we can't nonblock try, then no point in arming a poll handler */ - if (!io_file_supports_async(req->file, rw)) - return false; + return IO_APOLL_ABORTED; + if (!def->pollin && !def->pollout) + return IO_APOLL_ABORTED; + + if (def->pollin) { + mask |= POLLIN | POLLRDNORM; + + /* If reading from MSG_ERRQUEUE using recvmsg, ignore POLLIN */ + if ((req->opcode == IORING_OP_RECVMSG) && + (req->sr_msg.msg_flags & MSG_ERRQUEUE)) + mask &= ~POLLIN; + } else { + mask |= POLLOUT | POLLWRNORM; + } apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC); if (unlikely(!apoll)) - return false; + return IO_APOLL_ABORTED; apoll->double_poll = NULL; - - req->flags |= REQ_F_POLLED; req->apoll = apoll; - - mask = 0; - if (def->pollin) - mask |= POLLIN | POLLRDNORM; - if (def->pollout) - mask |= POLLOUT | POLLWRNORM; - - /* If reading from MSG_ERRQUEUE using recvmsg, ignore POLLIN */ - if ((req->opcode == IORING_OP_RECVMSG) && - (req->sr_msg.msg_flags & MSG_ERRQUEUE)) - mask &= ~POLLIN; - - mask |= POLLERR | POLLPRI; - + req->flags |= REQ_F_POLLED; ipt.pt._qproc = io_async_queue_proc; - ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask, - io_async_wake); - if (ret || ipt.error) { - io_poll_remove_double(req); - spin_unlock_irq(&ctx->completion_lock); - kfree(apoll->double_poll); - kfree(apoll); - return false; - } - spin_unlock_irq(&ctx->completion_lock); - trace_io_uring_poll_arm(ctx, req->opcode, req->user_data, mask, - apoll->poll.events); - return true; -} + ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask); + if (ret || ipt.error) + return ret ? IO_APOLL_READY : IO_APOLL_ABORTED; -static bool __io_poll_remove_one(struct io_kiocb *req, - struct io_poll_iocb *poll) -{ - bool do_complete = false; - - spin_lock(&poll->head->lock); - WRITE_ONCE(poll->canceled, true); - if (!list_empty(&poll->wait.entry)) { - list_del_init(&poll->wait.entry); - do_complete = true; - } - spin_unlock(&poll->head->lock); - hash_del(&req->hash_node); - return do_complete; -} - -static bool io_poll_remove_one(struct io_kiocb *req) -{ - bool do_complete; - - io_poll_remove_double(req); - - if (req->opcode == IORING_OP_POLL_ADD) { - do_complete = __io_poll_remove_one(req, &req->poll); - } else { - struct async_poll *apoll = req->apoll; - - /* non-poll requests have submit ref still */ - do_complete = __io_poll_remove_one(req, &apoll->poll); - if (do_complete) { - io_put_req(req); - kfree(apoll->double_poll); - kfree(apoll); - } - } - - if (do_complete) { - io_cqring_fill_event(req, -ECANCELED); - io_commit_cqring(req->ctx); - req_set_fail_links(req); - io_put_req_deferred(req, 1); - } - - return do_complete; + trace_io_uring_poll_arm(ctx, req, req->opcode, req->user_data, + mask, apoll->poll.events); + return IO_APOLL_OK; } /* * Returns true if we found and killed one or more poll requests */ static bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk, - struct files_struct *files) + bool cancel_all) { struct hlist_node *tmp; struct io_kiocb *req; - int posted = 0, i; + bool found = false; + int i; - spin_lock_irq(&ctx->completion_lock); + spin_lock(&ctx->completion_lock); for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) { struct hlist_head *list; list = &ctx->cancel_hash[i]; hlist_for_each_entry_safe(req, tmp, list, hash_node) { - if (io_match_task(req, tsk, files)) - posted += io_poll_remove_one(req); + if (io_match_task_safe(req, tsk, cancel_all)) { + hlist_del_init(&req->hash_node); + io_poll_cancel_req(req); + found = true; + } } } - spin_unlock_irq(&ctx->completion_lock); - - if (posted) - io_cqring_ev_posted(ctx); - - return posted != 0; + spin_unlock(&ctx->completion_lock); + return found; } -static int io_poll_cancel(struct io_ring_ctx *ctx, __u64 sqe_addr) +static struct io_kiocb *io_poll_find(struct io_ring_ctx *ctx, __u64 sqe_addr, + bool poll_only) + __must_hold(&ctx->completion_lock) { struct hlist_head *list; struct io_kiocb *req; @@ -5380,107 +5691,161 @@ static int io_poll_cancel(struct io_ring_ctx *ctx, __u64 sqe_addr) hlist_for_each_entry(req, list, hash_node) { if (sqe_addr != req->user_data) continue; - if (io_poll_remove_one(req)) - return 0; - return -EALREADY; + if (poll_only && req->opcode != IORING_OP_POLL_ADD) + continue; + return req; } - - return -ENOENT; + return NULL; } -static int io_poll_remove_prep(struct io_kiocb *req, - const struct io_uring_sqe *sqe) +static bool io_poll_disarm(struct io_kiocb *req) + __must_hold(&ctx->completion_lock) { - if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) - return -EINVAL; - if (sqe->ioprio || sqe->off || sqe->len || sqe->buf_index || - sqe->poll_events) - return -EINVAL; + if (!io_poll_get_ownership(req)) + return false; + io_poll_remove_entries(req); + hash_del(&req->hash_node); + return true; +} - req->poll.addr = READ_ONCE(sqe->addr); +static int io_poll_cancel(struct io_ring_ctx *ctx, __u64 sqe_addr, + bool poll_only) + __must_hold(&ctx->completion_lock) +{ + struct io_kiocb *req = io_poll_find(ctx, sqe_addr, poll_only); + + if (!req) + return -ENOENT; + io_poll_cancel_req(req); return 0; } -/* - * Find a running poll command that matches one specified in sqe->addr, - * and remove it if found. - */ -static int io_poll_remove(struct io_kiocb *req) +static __poll_t io_poll_parse_events(const struct io_uring_sqe *sqe, + unsigned int flags) { - struct io_ring_ctx *ctx = req->ctx; - u64 addr; - int ret; - - addr = req->poll.addr; - spin_lock_irq(&ctx->completion_lock); - ret = io_poll_cancel(ctx, addr); - spin_unlock_irq(&ctx->completion_lock); - - if (ret < 0) - req_set_fail_links(req); - io_req_complete(req, ret); - return 0; -} - -static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, - void *key) -{ - struct io_kiocb *req = wait->private; - struct io_poll_iocb *poll = &req->poll; - - return __io_async_wake(req, poll, key_to_poll(key), io_poll_task_func); -} - -static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head, - struct poll_table_struct *p) -{ - struct io_poll_table *pt = container_of(p, struct io_poll_table, pt); - - __io_queue_proc(&pt->req->poll, pt, head, (struct io_poll_iocb **) &pt->req->async_data); -} - -static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) -{ - struct io_poll_iocb *poll = &req->poll; u32 events; - if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) - return -EINVAL; - if (sqe->addr || sqe->ioprio || sqe->off || sqe->len || sqe->buf_index) - return -EINVAL; - events = READ_ONCE(sqe->poll32_events); #ifdef __BIG_ENDIAN events = swahw32(events); #endif - poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP | - (events & EPOLLEXCLUSIVE); + if (!(flags & IORING_POLL_ADD_MULTI)) + events |= EPOLLONESHOT; + return demangle_poll(events) | (events & (EPOLLEXCLUSIVE|EPOLLONESHOT)); +} + +static int io_poll_update_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) +{ + struct io_poll_update *upd = &req->poll_update; + u32 flags; + + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) + return -EINVAL; + if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in) + return -EINVAL; + flags = READ_ONCE(sqe->len); + if (flags & ~(IORING_POLL_UPDATE_EVENTS | IORING_POLL_UPDATE_USER_DATA | + IORING_POLL_ADD_MULTI)) + return -EINVAL; + /* meaningless without update */ + if (flags == IORING_POLL_ADD_MULTI) + return -EINVAL; + + upd->old_user_data = READ_ONCE(sqe->addr); + upd->update_events = flags & IORING_POLL_UPDATE_EVENTS; + upd->update_user_data = flags & IORING_POLL_UPDATE_USER_DATA; + + upd->new_user_data = READ_ONCE(sqe->off); + if (!upd->update_user_data && upd->new_user_data) + return -EINVAL; + if (upd->update_events) + upd->events = io_poll_parse_events(sqe, flags); + else if (sqe->poll32_events) + return -EINVAL; + return 0; } -static int io_poll_add(struct io_kiocb *req) +static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ + struct io_poll_iocb *poll = &req->poll; + u32 flags; + + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) + return -EINVAL; + if (sqe->ioprio || sqe->buf_index || sqe->off || sqe->addr) + return -EINVAL; + flags = READ_ONCE(sqe->len); + if (flags & ~IORING_POLL_ADD_MULTI) + return -EINVAL; + + io_req_set_refcount(req); + poll->events = io_poll_parse_events(sqe, flags); + return 0; +} + +static int io_poll_add(struct io_kiocb *req, unsigned int issue_flags) { struct io_poll_iocb *poll = &req->poll; - struct io_ring_ctx *ctx = req->ctx; struct io_poll_table ipt; - __poll_t mask; + int ret; ipt.pt._qproc = io_poll_queue_proc; - mask = __io_arm_poll_handler(req, &req->poll, &ipt, poll->events, - io_poll_wake); + ret = __io_arm_poll_handler(req, &req->poll, &ipt, poll->events); + if (!ret && ipt.error) + req_set_fail(req); + ret = ret ?: ipt.error; + if (ret) + __io_req_complete(req, issue_flags, ret, 0); + return 0; +} - if (mask) { /* no async, we'd stolen it */ - ipt.error = 0; - io_poll_complete(req, mask, 0); - } - spin_unlock_irq(&ctx->completion_lock); +static int io_poll_update(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_ring_ctx *ctx = req->ctx; + struct io_kiocb *preq; + int ret2, ret = 0; - if (mask) { - io_cqring_ev_posted(ctx); - io_put_req(req); + spin_lock(&ctx->completion_lock); + preq = io_poll_find(ctx, req->poll_update.old_user_data, true); + if (!preq || !io_poll_disarm(preq)) { + spin_unlock(&ctx->completion_lock); + ret = preq ? -EALREADY : -ENOENT; + goto out; } - return ipt.error; + spin_unlock(&ctx->completion_lock); + + if (req->poll_update.update_events || req->poll_update.update_user_data) { + /* only mask one event flags, keep behavior flags */ + if (req->poll_update.update_events) { + preq->poll.events &= ~0xffff; + preq->poll.events |= req->poll_update.events & 0xffff; + preq->poll.events |= IO_POLL_UNMASK; + } + if (req->poll_update.update_user_data) + preq->user_data = req->poll_update.new_user_data; + + ret2 = io_poll_add(preq, issue_flags); + /* successfully updated, don't complete poll request */ + if (!ret2) + goto out; + } + req_set_fail(preq); + io_req_complete(preq, -ECANCELED); +out: + if (ret < 0) + req_set_fail(req); + /* complete update request, we're done with it */ + io_req_complete(req, ret); + return 0; +} + +static void io_req_task_timeout(struct io_kiocb *req, bool *locked) +{ + req_set_fail(req); + io_req_complete_post(req, -ETIME, 0); } static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer) @@ -5491,88 +5856,182 @@ static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer) struct io_ring_ctx *ctx = req->ctx; unsigned long flags; - spin_lock_irqsave(&ctx->completion_lock, flags); + spin_lock_irqsave(&ctx->timeout_lock, flags); list_del_init(&req->timeout.list); atomic_set(&req->ctx->cq_timeouts, atomic_read(&req->ctx->cq_timeouts) + 1); + spin_unlock_irqrestore(&ctx->timeout_lock, flags); - io_cqring_fill_event(req, -ETIME); - io_commit_cqring(ctx); - spin_unlock_irqrestore(&ctx->completion_lock, flags); - - io_cqring_ev_posted(ctx); - req_set_fail_links(req); - io_put_req(req); + req->io_task_work.func = io_req_task_timeout; + io_req_task_work_add(req); return HRTIMER_NORESTART; } -static int __io_timeout_cancel(struct io_kiocb *req) +static struct io_kiocb *io_timeout_extract(struct io_ring_ctx *ctx, + __u64 user_data) + __must_hold(&ctx->timeout_lock) { - struct io_timeout_data *io = req->async_data; - int ret; + struct io_timeout_data *io; + struct io_kiocb *req; + bool found = false; - ret = hrtimer_try_to_cancel(&io->timer); - if (ret == -1) - return -EALREADY; + list_for_each_entry(req, &ctx->timeout_list, timeout.list) { + found = user_data == req->user_data; + if (found) + break; + } + if (!found) + return ERR_PTR(-ENOENT); + + io = req->async_data; + if (hrtimer_try_to_cancel(&io->timer) == -1) + return ERR_PTR(-EALREADY); list_del_init(&req->timeout.list); - - req_set_fail_links(req); - io_cqring_fill_event(req, -ECANCELED); - io_put_req_deferred(req, 1); - return 0; + return req; } static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data) + __must_hold(&ctx->completion_lock) + __must_hold(&ctx->timeout_lock) { - struct io_kiocb *req; - int ret = -ENOENT; + struct io_kiocb *req = io_timeout_extract(ctx, user_data); - list_for_each_entry(req, &ctx->timeout_list, timeout.list) { - if (user_data == req->user_data) { - ret = 0; - break; - } + if (IS_ERR(req)) + return PTR_ERR(req); + + req_set_fail(req); + io_fill_cqe_req(req, -ECANCELED, 0); + io_put_req_deferred(req); + return 0; +} + +static clockid_t io_timeout_get_clock(struct io_timeout_data *data) +{ + switch (data->flags & IORING_TIMEOUT_CLOCK_MASK) { + case IORING_TIMEOUT_BOOTTIME: + return CLOCK_BOOTTIME; + case IORING_TIMEOUT_REALTIME: + return CLOCK_REALTIME; + default: + /* can't happen, vetted at prep time */ + WARN_ON_ONCE(1); + fallthrough; + case 0: + return CLOCK_MONOTONIC; } +} - if (ret == -ENOENT) - return ret; +static int io_linked_timeout_update(struct io_ring_ctx *ctx, __u64 user_data, + struct timespec64 *ts, enum hrtimer_mode mode) + __must_hold(&ctx->timeout_lock) +{ + struct io_timeout_data *io; + struct io_kiocb *req; + bool found = false; - return __io_timeout_cancel(req); + list_for_each_entry(req, &ctx->ltimeout_list, timeout.list) { + found = user_data == req->user_data; + if (found) + break; + } + if (!found) + return -ENOENT; + + io = req->async_data; + if (hrtimer_try_to_cancel(&io->timer) == -1) + return -EALREADY; + hrtimer_init(&io->timer, io_timeout_get_clock(io), mode); + io->timer.function = io_link_timeout_fn; + hrtimer_start(&io->timer, timespec64_to_ktime(*ts), mode); + return 0; +} + +static int io_timeout_update(struct io_ring_ctx *ctx, __u64 user_data, + struct timespec64 *ts, enum hrtimer_mode mode) + __must_hold(&ctx->timeout_lock) +{ + struct io_kiocb *req = io_timeout_extract(ctx, user_data); + struct io_timeout_data *data; + + if (IS_ERR(req)) + return PTR_ERR(req); + + req->timeout.off = 0; /* noseq */ + data = req->async_data; + list_add_tail(&req->timeout.list, &ctx->timeout_list); + hrtimer_init(&data->timer, io_timeout_get_clock(data), mode); + data->timer.function = io_timeout_fn; + hrtimer_start(&data->timer, timespec64_to_ktime(*ts), mode); + return 0; } static int io_timeout_remove_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { + struct io_timeout_rem *tr = &req->timeout_rem; + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT))) return -EINVAL; - if (sqe->ioprio || sqe->buf_index || sqe->len || sqe->timeout_flags || - sqe->splice_fd_in) + if (sqe->ioprio || sqe->buf_index || sqe->len || sqe->splice_fd_in) return -EINVAL; - req->timeout_rem.addr = READ_ONCE(sqe->addr); + tr->ltimeout = false; + tr->addr = READ_ONCE(sqe->addr); + tr->flags = READ_ONCE(sqe->timeout_flags); + if (tr->flags & IORING_TIMEOUT_UPDATE_MASK) { + if (hweight32(tr->flags & IORING_TIMEOUT_CLOCK_MASK) > 1) + return -EINVAL; + if (tr->flags & IORING_LINK_TIMEOUT_UPDATE) + tr->ltimeout = true; + if (tr->flags & ~(IORING_TIMEOUT_UPDATE_MASK|IORING_TIMEOUT_ABS)) + return -EINVAL; + if (get_timespec64(&tr->ts, u64_to_user_ptr(sqe->addr2))) + return -EFAULT; + } else if (tr->flags) { + /* timeout removal doesn't support flags */ + return -EINVAL; + } + return 0; } +static inline enum hrtimer_mode io_translate_timeout_mode(unsigned int flags) +{ + return (flags & IORING_TIMEOUT_ABS) ? HRTIMER_MODE_ABS + : HRTIMER_MODE_REL; +} + /* * Remove or update an existing timeout command */ -static int io_timeout_remove(struct io_kiocb *req) +static int io_timeout_remove(struct io_kiocb *req, unsigned int issue_flags) { + struct io_timeout_rem *tr = &req->timeout_rem; struct io_ring_ctx *ctx = req->ctx; int ret; - spin_lock_irq(&ctx->completion_lock); - ret = io_timeout_cancel(ctx, req->timeout_rem.addr); + if (!(req->timeout_rem.flags & IORING_TIMEOUT_UPDATE)) { + spin_lock(&ctx->completion_lock); + spin_lock_irq(&ctx->timeout_lock); + ret = io_timeout_cancel(ctx, tr->addr); + spin_unlock_irq(&ctx->timeout_lock); + spin_unlock(&ctx->completion_lock); + } else { + enum hrtimer_mode mode = io_translate_timeout_mode(tr->flags); + + spin_lock_irq(&ctx->timeout_lock); + if (tr->ltimeout) + ret = io_linked_timeout_update(ctx, tr->addr, &tr->ts, mode); + else + ret = io_timeout_update(ctx, tr->addr, &tr->ts, mode); + spin_unlock_irq(&ctx->timeout_lock); + } - io_cqring_fill_event(req, ret); - io_commit_cqring(ctx); - spin_unlock_irq(&ctx->completion_lock); - io_cqring_ev_posted(ctx); if (ret < 0) - req_set_fail_links(req); - io_put_req(req); + req_set_fail(req); + io_req_complete_post(req, ret, 0); return 0; } @@ -5591,38 +6050,52 @@ static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe, if (off && is_timeout_link) return -EINVAL; flags = READ_ONCE(sqe->timeout_flags); - if (flags & ~IORING_TIMEOUT_ABS) + if (flags & ~(IORING_TIMEOUT_ABS | IORING_TIMEOUT_CLOCK_MASK)) + return -EINVAL; + /* more than one clock specified is invalid, obviously */ + if (hweight32(flags & IORING_TIMEOUT_CLOCK_MASK) > 1) return -EINVAL; + INIT_LIST_HEAD(&req->timeout.list); req->timeout.off = off; + if (unlikely(off && !req->ctx->off_timeout_used)) + req->ctx->off_timeout_used = true; if (!req->async_data && io_alloc_async_data(req)) return -ENOMEM; data = req->async_data; data->req = req; + data->flags = flags; if (get_timespec64(&data->ts, u64_to_user_ptr(sqe->addr))) return -EFAULT; - if (flags & IORING_TIMEOUT_ABS) - data->mode = HRTIMER_MODE_ABS; - else - data->mode = HRTIMER_MODE_REL; - INIT_LIST_HEAD(&req->timeout.list); - hrtimer_init(&data->timer, CLOCK_MONOTONIC, data->mode); + data->mode = io_translate_timeout_mode(flags); + hrtimer_init(&data->timer, io_timeout_get_clock(data), data->mode); + + if (is_timeout_link) { + struct io_submit_link *link = &req->ctx->submit_state.link; + + if (!link->head) + return -EINVAL; + if (link->last->opcode == IORING_OP_LINK_TIMEOUT) + return -EINVAL; + req->timeout.head = link->last; + link->last->flags |= REQ_F_ARM_LTIMEOUT; + } return 0; } -static int io_timeout(struct io_kiocb *req) +static int io_timeout(struct io_kiocb *req, unsigned int issue_flags) { struct io_ring_ctx *ctx = req->ctx; struct io_timeout_data *data = req->async_data; struct list_head *entry; u32 tail, off = req->timeout.off; - spin_lock_irq(&ctx->completion_lock); + spin_lock_irq(&ctx->timeout_lock); /* * sqe->off holds how many events that need to occur for this @@ -5661,23 +6134,34 @@ add: list_add(&req->timeout.list, entry); data->timer.function = io_timeout_fn; hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode); - spin_unlock_irq(&ctx->completion_lock); + spin_unlock_irq(&ctx->timeout_lock); return 0; } +struct io_cancel_data { + struct io_ring_ctx *ctx; + u64 user_data; +}; + static bool io_cancel_cb(struct io_wq_work *work, void *data) { struct io_kiocb *req = container_of(work, struct io_kiocb, work); + struct io_cancel_data *cd = data; - return req->user_data == (unsigned long) data; + return req->ctx == cd->ctx && req->user_data == cd->user_data; } -static int io_async_cancel_one(struct io_ring_ctx *ctx, void *sqe_addr) +static int io_async_cancel_one(struct io_uring_task *tctx, u64 user_data, + struct io_ring_ctx *ctx) { + struct io_cancel_data data = { .ctx = ctx, .user_data = user_data, }; enum io_wq_cancel cancel_ret; int ret = 0; - cancel_ret = io_wq_cancel_cb(ctx->io_wq, io_cancel_cb, sqe_addr, false); + if (!tctx || !tctx->io_wq) + return -ENOENT; + + cancel_ret = io_wq_cancel_cb(tctx->io_wq, io_cancel_cb, &data, false); switch (cancel_ret) { case IO_WQ_CANCEL_OK: ret = 0; @@ -5693,35 +6177,27 @@ static int io_async_cancel_one(struct io_ring_ctx *ctx, void *sqe_addr) return ret; } -static void io_async_find_and_cancel(struct io_ring_ctx *ctx, - struct io_kiocb *req, __u64 sqe_addr, - int success_ret) +static int io_try_cancel_userdata(struct io_kiocb *req, u64 sqe_addr) { - unsigned long flags; + struct io_ring_ctx *ctx = req->ctx; int ret; - ret = io_async_cancel_one(ctx, (void *) (unsigned long) sqe_addr); - if (ret != -ENOENT) { - spin_lock_irqsave(&ctx->completion_lock, flags); - goto done; - } + WARN_ON_ONCE(!io_wq_current_is_worker() && req->task != current); - spin_lock_irqsave(&ctx->completion_lock, flags); - ret = io_timeout_cancel(ctx, sqe_addr); + ret = io_async_cancel_one(req->task->io_uring, sqe_addr, ctx); if (ret != -ENOENT) - goto done; - ret = io_poll_cancel(ctx, sqe_addr); -done: - if (!ret) - ret = success_ret; - io_cqring_fill_event(req, ret); - io_commit_cqring(ctx); - spin_unlock_irqrestore(&ctx->completion_lock, flags); - io_cqring_ev_posted(ctx); + return ret; - if (ret < 0) - req_set_fail_links(req); - io_put_req(req); + spin_lock(&ctx->completion_lock); + spin_lock_irq(&ctx->timeout_lock); + ret = io_timeout_cancel(ctx, sqe_addr); + spin_unlock_irq(&ctx->timeout_lock); + if (ret != -ENOENT) + goto out; + ret = io_poll_cancel(ctx, sqe_addr, false); +out: + spin_unlock(&ctx->completion_lock); + return ret; } static int io_async_cancel_prep(struct io_kiocb *req, @@ -5739,52 +6215,72 @@ static int io_async_cancel_prep(struct io_kiocb *req, return 0; } -static int io_async_cancel(struct io_kiocb *req) +static int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags) { struct io_ring_ctx *ctx = req->ctx; - - io_async_find_and_cancel(ctx, req, req->cancel.addr, 0); - return 0; -} - -static int io_files_update_prep(struct io_kiocb *req, - const struct io_uring_sqe *sqe) -{ - if (unlikely(req->ctx->flags & IORING_SETUP_SQPOLL)) - return -EINVAL; - if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT))) - return -EINVAL; - if (sqe->ioprio || sqe->rw_flags) - return -EINVAL; - - req->files_update.offset = READ_ONCE(sqe->off); - req->files_update.nr_args = READ_ONCE(sqe->len); - if (!req->files_update.nr_args) - return -EINVAL; - req->files_update.arg = READ_ONCE(sqe->addr); - return 0; -} - -static int io_files_update(struct io_kiocb *req, bool force_nonblock, - struct io_comp_state *cs) -{ - struct io_ring_ctx *ctx = req->ctx; - struct io_uring_files_update up; + u64 sqe_addr = req->cancel.addr; + struct io_tctx_node *node; int ret; - if (force_nonblock) - return -EAGAIN; + ret = io_try_cancel_userdata(req, sqe_addr); + if (ret != -ENOENT) + goto done; - up.offset = req->files_update.offset; - up.fds = req->files_update.arg; + /* slow path, try all io-wq's */ + io_ring_submit_lock(ctx, !(issue_flags & IO_URING_F_NONBLOCK)); + ret = -ENOENT; + list_for_each_entry(node, &ctx->tctx_list, ctx_node) { + struct io_uring_task *tctx = node->task->io_uring; - mutex_lock(&ctx->uring_lock); - ret = __io_sqe_files_update(ctx, &up, req->files_update.nr_args); - mutex_unlock(&ctx->uring_lock); + ret = io_async_cancel_one(tctx, req->cancel.addr, ctx); + if (ret != -ENOENT) + break; + } + io_ring_submit_unlock(ctx, !(issue_flags & IO_URING_F_NONBLOCK)); +done: + if (ret < 0) + req_set_fail(req); + io_req_complete_post(req, ret, 0); + return 0; +} + +static int io_rsrc_update_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) +{ + if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT))) + return -EINVAL; + if (sqe->ioprio || sqe->rw_flags || sqe->splice_fd_in) + return -EINVAL; + + req->rsrc_update.offset = READ_ONCE(sqe->off); + req->rsrc_update.nr_args = READ_ONCE(sqe->len); + if (!req->rsrc_update.nr_args) + return -EINVAL; + req->rsrc_update.arg = READ_ONCE(sqe->addr); + return 0; +} + +static int io_files_update(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_ring_ctx *ctx = req->ctx; + struct io_uring_rsrc_update2 up; + int ret; + + up.offset = req->rsrc_update.offset; + up.data = req->rsrc_update.arg; + up.nr = 0; + up.tags = 0; + up.resv = 0; + up.resv2 = 0; + + io_ring_submit_lock(ctx, !(issue_flags & IO_URING_F_NONBLOCK)); + ret = __io_register_rsrc_update(ctx, IORING_RSRC_FILE, + &up, req->rsrc_update.nr_args); + io_ring_submit_unlock(ctx, !(issue_flags & IO_URING_F_NONBLOCK)); if (ret < 0) - req_set_fail_links(req); - __io_req_complete(req, ret, 0, cs); + req_set_fail(req); + __io_req_complete(req, issue_flags, ret, 0); return 0; } @@ -5804,11 +6300,11 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) case IORING_OP_POLL_ADD: return io_poll_add_prep(req, sqe); case IORING_OP_POLL_REMOVE: - return io_poll_remove_prep(req, sqe); + return io_poll_update_prep(req, sqe); case IORING_OP_FSYNC: - return io_prep_fsync(req, sqe); + return io_fsync_prep(req, sqe); case IORING_OP_SYNC_FILE_RANGE: - return io_prep_sfr(req, sqe); + return io_sfr_prep(req, sqe); case IORING_OP_SENDMSG: case IORING_OP_SEND: return io_sendmsg_prep(req, sqe); @@ -5834,7 +6330,7 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) case IORING_OP_CLOSE: return io_close_prep(req, sqe); case IORING_OP_FILES_UPDATE: - return io_files_update_prep(req, sqe); + return io_rsrc_update_prep(req, sqe); case IORING_OP_STATX: return io_statx_prep(req, sqe); case IORING_OP_FADVISE: @@ -5853,100 +6349,131 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) return io_remove_buffers_prep(req, sqe); case IORING_OP_TEE: return io_tee_prep(req, sqe); + case IORING_OP_SHUTDOWN: + return io_shutdown_prep(req, sqe); + case IORING_OP_RENAMEAT: + return io_renameat_prep(req, sqe); + case IORING_OP_UNLINKAT: + return io_unlinkat_prep(req, sqe); } printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n", req->opcode); - return-EINVAL; + return -EINVAL; } -static int io_req_defer_prep(struct io_kiocb *req, - const struct io_uring_sqe *sqe) +static int io_req_prep_async(struct io_kiocb *req) { - if (!sqe) + if (!io_op_defs[req->opcode].needs_async_setup) return 0; + if (WARN_ON_ONCE(req->async_data)) + return -EFAULT; if (io_alloc_async_data(req)) return -EAGAIN; - return io_req_prep(req, sqe); + + switch (req->opcode) { + case IORING_OP_READV: + return io_rw_prep_async(req, READ); + case IORING_OP_WRITEV: + return io_rw_prep_async(req, WRITE); + case IORING_OP_SENDMSG: + return io_sendmsg_prep_async(req); + case IORING_OP_RECVMSG: + return io_recvmsg_prep_async(req); + case IORING_OP_CONNECT: + return io_connect_prep_async(req); + } + printk_once(KERN_WARNING "io_uring: prep_async() bad opcode %d\n", + req->opcode); + return -EFAULT; } static u32 io_get_sequence(struct io_kiocb *req) { - struct io_kiocb *pos; - struct io_ring_ctx *ctx = req->ctx; - u32 total_submitted, nr_reqs = 1; + u32 seq = req->ctx->cached_sq_head; - if (req->flags & REQ_F_LINK_HEAD) - list_for_each_entry(pos, &req->link_list, link_list) - nr_reqs++; - - total_submitted = ctx->cached_sq_head - ctx->cached_sq_dropped; - return total_submitted - nr_reqs; + /* need original cached_sq_head, but it was increased for each req */ + io_for_each_link(req, req) + seq--; + return seq; } -static int io_req_defer(struct io_kiocb *req, const struct io_uring_sqe *sqe) +static bool io_drain_req(struct io_kiocb *req) { + struct io_kiocb *pos; struct io_ring_ctx *ctx = req->ctx; struct io_defer_entry *de; int ret; u32 seq; + if (req->flags & REQ_F_FAIL) { + io_req_complete_fail_submit(req); + return true; + } + + /* + * If we need to drain a request in the middle of a link, drain the + * head request and the next request/link after the current link. + * Considering sequential execution of links, IOSQE_IO_DRAIN will be + * maintained for every request of our link. + */ + if (ctx->drain_next) { + req->flags |= REQ_F_IO_DRAIN; + ctx->drain_next = false; + } + /* not interested in head, start from the first linked */ + io_for_each_link(pos, req->link) { + if (pos->flags & REQ_F_IO_DRAIN) { + ctx->drain_next = true; + req->flags |= REQ_F_IO_DRAIN; + break; + } + } + /* Still need defer if there is pending req in defer list. */ + spin_lock(&ctx->completion_lock); if (likely(list_empty_careful(&ctx->defer_list) && - !(req->flags & REQ_F_IO_DRAIN))) - return 0; + !(req->flags & REQ_F_IO_DRAIN))) { + spin_unlock(&ctx->completion_lock); + ctx->drain_active = false; + return false; + } + spin_unlock(&ctx->completion_lock); seq = io_get_sequence(req); /* Still a chance to pass the sequence check */ if (!req_need_defer(req, seq) && list_empty_careful(&ctx->defer_list)) - return 0; + return false; - if (!req->async_data) { - ret = io_req_defer_prep(req, sqe); - if (ret) - return ret; - } + ret = io_req_prep_async(req); + if (ret) + goto fail; io_prep_async_link(req); de = kmalloc(sizeof(*de), GFP_KERNEL); - if (!de) - return -ENOMEM; + if (!de) { + ret = -ENOMEM; +fail: + io_req_complete_failed(req, ret); + return true; + } - spin_lock_irq(&ctx->completion_lock); + spin_lock(&ctx->completion_lock); if (!req_need_defer(req, seq) && list_empty(&ctx->defer_list)) { - spin_unlock_irq(&ctx->completion_lock); + spin_unlock(&ctx->completion_lock); kfree(de); - io_queue_async_work(req); - return -EIOCBQUEUED; + io_queue_async_work(req, NULL); + return true; } trace_io_uring_defer(ctx, req, req->user_data); de->req = req; de->seq = seq; list_add_tail(&de->list, &ctx->defer_list); - spin_unlock_irq(&ctx->completion_lock); - return -EIOCBQUEUED; + spin_unlock(&ctx->completion_lock); + return true; } -static void io_req_drop_files(struct io_kiocb *req) -{ - struct io_ring_ctx *ctx = req->ctx; - struct io_uring_task *tctx = req->task->io_uring; - unsigned long flags; - - if (req->work.flags & IO_WQ_WORK_FILES) { - put_files_struct(req->work.identity->files); - put_nsproxy(req->work.identity->nsproxy); - } - spin_lock_irqsave(&ctx->inflight_lock, flags); - list_del(&req->inflight_entry); - spin_unlock_irqrestore(&ctx->inflight_lock, flags); - req->flags &= ~REQ_F_INFLIGHT; - req->work.flags &= ~IO_WQ_WORK_FILES; - if (atomic_read(&tctx->in_idle)) - wake_up(&tctx->wait); -} - -static void __io_clean_op(struct io_kiocb *req) +static void io_clean_op(struct io_kiocb *req) { if (req->flags & REQ_F_BUFFER_SELECTED) { switch (req->opcode) { @@ -5960,7 +6487,6 @@ static void __io_clean_op(struct io_kiocb *req) kfree(req->sr_msg.kbuf); break; } - req->flags &= ~REQ_F_BUFFER_SELECTED; } if (req->flags & REQ_F_NEED_CLEANUP) { @@ -5972,588 +6498,423 @@ static void __io_clean_op(struct io_kiocb *req) case IORING_OP_WRITE_FIXED: case IORING_OP_WRITE: { struct io_async_rw *io = req->async_data; - if (io->free_iovec) - kfree(io->free_iovec); + + kfree(io->free_iovec); break; } case IORING_OP_RECVMSG: case IORING_OP_SENDMSG: { struct io_async_msghdr *io = req->async_data; - if (io->iov != io->fast_iov) - kfree(io->iov); + + kfree(io->free_iov); break; } - case IORING_OP_SPLICE: - case IORING_OP_TEE: - io_put_file(req, req->splice.file_in, - (req->splice.flags & SPLICE_F_FD_IN_FIXED)); - break; case IORING_OP_OPENAT: case IORING_OP_OPENAT2: if (req->open.filename) putname(req->open.filename); break; + case IORING_OP_RENAMEAT: + putname(req->rename.oldpath); + putname(req->rename.newpath); + break; + case IORING_OP_UNLINKAT: + putname(req->unlink.filename); + break; } - req->flags &= ~REQ_F_NEED_CLEANUP; } + if ((req->flags & REQ_F_POLLED) && req->apoll) { + kfree(req->apoll->double_poll); + kfree(req->apoll); + req->apoll = NULL; + } + if (req->flags & REQ_F_INFLIGHT) { + struct io_uring_task *tctx = req->task->io_uring; + + atomic_dec(&tctx->inflight_tracked); + } + if (req->flags & REQ_F_CREDS) + put_cred(req->creds); + + req->flags &= ~IO_REQ_CLEAN_FLAGS; } -static int io_issue_sqe(struct io_kiocb *req, bool force_nonblock, - struct io_comp_state *cs) +static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags) { struct io_ring_ctx *ctx = req->ctx; + const struct cred *creds = NULL; int ret; + if ((req->flags & REQ_F_CREDS) && req->creds != current_cred()) + creds = override_creds(req->creds); + switch (req->opcode) { case IORING_OP_NOP: - ret = io_nop(req, cs); + ret = io_nop(req, issue_flags); break; case IORING_OP_READV: case IORING_OP_READ_FIXED: case IORING_OP_READ: - ret = io_read(req, force_nonblock, cs); + ret = io_read(req, issue_flags); break; case IORING_OP_WRITEV: case IORING_OP_WRITE_FIXED: case IORING_OP_WRITE: - ret = io_write(req, force_nonblock, cs); + ret = io_write(req, issue_flags); break; case IORING_OP_FSYNC: - ret = io_fsync(req, force_nonblock); + ret = io_fsync(req, issue_flags); break; case IORING_OP_POLL_ADD: - ret = io_poll_add(req); + ret = io_poll_add(req, issue_flags); break; case IORING_OP_POLL_REMOVE: - ret = io_poll_remove(req); + ret = io_poll_update(req, issue_flags); break; case IORING_OP_SYNC_FILE_RANGE: - ret = io_sync_file_range(req, force_nonblock); + ret = io_sync_file_range(req, issue_flags); break; case IORING_OP_SENDMSG: - ret = io_sendmsg(req, force_nonblock, cs); + ret = io_sendmsg(req, issue_flags); break; case IORING_OP_SEND: - ret = io_send(req, force_nonblock, cs); + ret = io_send(req, issue_flags); break; case IORING_OP_RECVMSG: - ret = io_recvmsg(req, force_nonblock, cs); + ret = io_recvmsg(req, issue_flags); break; case IORING_OP_RECV: - ret = io_recv(req, force_nonblock, cs); + ret = io_recv(req, issue_flags); break; case IORING_OP_TIMEOUT: - ret = io_timeout(req); + ret = io_timeout(req, issue_flags); break; case IORING_OP_TIMEOUT_REMOVE: - ret = io_timeout_remove(req); + ret = io_timeout_remove(req, issue_flags); break; case IORING_OP_ACCEPT: - ret = io_accept(req, force_nonblock, cs); + ret = io_accept(req, issue_flags); break; case IORING_OP_CONNECT: - ret = io_connect(req, force_nonblock, cs); + ret = io_connect(req, issue_flags); break; case IORING_OP_ASYNC_CANCEL: - ret = io_async_cancel(req); + ret = io_async_cancel(req, issue_flags); break; case IORING_OP_FALLOCATE: - ret = io_fallocate(req, force_nonblock); + ret = io_fallocate(req, issue_flags); break; case IORING_OP_OPENAT: - ret = io_openat(req, force_nonblock); + ret = io_openat(req, issue_flags); break; case IORING_OP_CLOSE: - ret = io_close(req, force_nonblock, cs); + ret = io_close(req, issue_flags); break; case IORING_OP_FILES_UPDATE: - ret = io_files_update(req, force_nonblock, cs); + ret = io_files_update(req, issue_flags); break; case IORING_OP_STATX: - ret = io_statx(req, force_nonblock); + ret = io_statx(req, issue_flags); break; case IORING_OP_FADVISE: - ret = io_fadvise(req, force_nonblock); + ret = io_fadvise(req, issue_flags); break; case IORING_OP_MADVISE: - ret = io_madvise(req, force_nonblock); + ret = io_madvise(req, issue_flags); break; case IORING_OP_OPENAT2: - ret = io_openat2(req, force_nonblock); + ret = io_openat2(req, issue_flags); break; case IORING_OP_EPOLL_CTL: - ret = io_epoll_ctl(req, force_nonblock, cs); + ret = io_epoll_ctl(req, issue_flags); break; case IORING_OP_SPLICE: - ret = io_splice(req, force_nonblock); + ret = io_splice(req, issue_flags); break; case IORING_OP_PROVIDE_BUFFERS: - ret = io_provide_buffers(req, force_nonblock, cs); + ret = io_provide_buffers(req, issue_flags); break; case IORING_OP_REMOVE_BUFFERS: - ret = io_remove_buffers(req, force_nonblock, cs); + ret = io_remove_buffers(req, issue_flags); break; case IORING_OP_TEE: - ret = io_tee(req, force_nonblock); + ret = io_tee(req, issue_flags); + break; + case IORING_OP_SHUTDOWN: + ret = io_shutdown(req, issue_flags); + break; + case IORING_OP_RENAMEAT: + ret = io_renameat(req, issue_flags); + break; + case IORING_OP_UNLINKAT: + ret = io_unlinkat(req, issue_flags); break; default: ret = -EINVAL; break; } + if (creds) + revert_creds(creds); if (ret) return ret; - /* If the op doesn't have a file, we're not polling for it */ - if ((ctx->flags & IORING_SETUP_IOPOLL) && req->file) { - const bool in_async = io_wq_current_is_worker(); - - /* workqueue context doesn't hold uring_lock, grab it now */ - if (in_async) - mutex_lock(&ctx->uring_lock); - + if ((ctx->flags & IORING_SETUP_IOPOLL) && req->file) io_iopoll_req_issued(req); - if (in_async) - mutex_unlock(&ctx->uring_lock); - } - return 0; } -static struct io_wq_work *io_wq_submit_work(struct io_wq_work *work) +static struct io_wq_work *io_wq_free_work(struct io_wq_work *work) +{ + struct io_kiocb *req = container_of(work, struct io_kiocb, work); + + req = io_put_req_find_next(req); + return req ? &req->work : NULL; +} + +static void io_wq_submit_work(struct io_wq_work *work) { struct io_kiocb *req = container_of(work, struct io_kiocb, work); struct io_kiocb *timeout; int ret = 0; + /* one will be dropped by ->io_free_work() after returning to io-wq */ + if (!(req->flags & REQ_F_REFCOUNT)) + __io_req_set_refcount(req, 2); + else + req_ref_get(req); + timeout = io_prep_linked_timeout(req); if (timeout) io_queue_linked_timeout(timeout); - /* if NO_CANCEL is set, we must still run the work */ - if ((work->flags & (IO_WQ_WORK_CANCEL|IO_WQ_WORK_NO_CANCEL)) == - IO_WQ_WORK_CANCEL) { + /* either cancelled or io-wq is dying, so don't touch tctx->iowq */ + if (work->flags & IO_WQ_WORK_CANCEL) ret = -ECANCELED; - } if (!ret) { do { - ret = io_issue_sqe(req, false, NULL); + ret = io_issue_sqe(req, 0); /* * We can get EAGAIN for polled IO even though we're * forcing a sync submission from here, since we can't * wait for request slots on the block side. */ - if (ret != -EAGAIN) + if (ret != -EAGAIN || !(req->ctx->flags & IORING_SETUP_IOPOLL)) break; cond_resched(); } while (1); } - if (ret) { - struct io_ring_ctx *lock_ctx = NULL; + /* avoid locking problems by failing it from a clean context */ + if (ret) + io_req_task_queue_fail(req, ret); +} - if (req->ctx->flags & IORING_SETUP_IOPOLL) - lock_ctx = req->ctx; - - /* - * io_iopoll_complete() does not hold completion_lock to - * complete polled io, so here for polled io, we can not call - * io_req_complete() directly, otherwise there maybe concurrent - * access to cqring, defer_list, etc, which is not safe. Given - * that io_iopoll_complete() is always called under uring_lock, - * so here for polled io, we also get uring_lock to complete - * it. - */ - if (lock_ctx) - mutex_lock(&lock_ctx->uring_lock); - - req_set_fail_links(req); - io_req_complete(req, ret); - - if (lock_ctx) - mutex_unlock(&lock_ctx->uring_lock); - } - - return io_steal_work(req); +static inline struct io_fixed_file *io_fixed_file_slot(struct io_file_table *table, + unsigned i) +{ + return &table->files[i]; } static inline struct file *io_file_from_index(struct io_ring_ctx *ctx, int index) { - struct fixed_file_table *table; + struct io_fixed_file *slot = io_fixed_file_slot(&ctx->file_table, index); - table = &ctx->file_data->table[index >> IORING_FILE_TABLE_SHIFT]; - return table->files[index & IORING_FILE_TABLE_MASK]; + return (struct file *) (slot->file_ptr & FFS_MASK); } -static struct file *io_file_get(struct io_submit_state *state, - struct io_kiocb *req, int fd, bool fixed) +static void io_fixed_file_set(struct io_fixed_file *file_slot, struct file *file) +{ + unsigned long file_ptr = (unsigned long) file; + + if (__io_file_supports_nowait(file, READ)) + file_ptr |= FFS_ASYNC_READ; + if (__io_file_supports_nowait(file, WRITE)) + file_ptr |= FFS_ASYNC_WRITE; + if (S_ISREG(file_inode(file)->i_mode)) + file_ptr |= FFS_ISREG; + file_slot->file_ptr = file_ptr; +} + +static inline struct file *io_file_get_fixed(struct io_ring_ctx *ctx, + struct io_kiocb *req, int fd) { - struct io_ring_ctx *ctx = req->ctx; struct file *file; + unsigned long file_ptr; - if (fixed) { - if (unlikely((unsigned int)fd >= ctx->nr_user_files)) - return NULL; - fd = array_index_nospec(fd, ctx->nr_user_files); - file = io_file_from_index(ctx, fd); - if (file) { - req->fixed_file_refs = &ctx->file_data->node->refs; - percpu_ref_get(req->fixed_file_refs); - } - } else { - trace_io_uring_file_get(ctx, fd); - file = __io_file_get(state, fd); - } - - if (file && file->f_op == &io_uring_fops && - !(req->flags & REQ_F_INFLIGHT)) { - io_req_init_async(req); - req->flags |= REQ_F_INFLIGHT; - - spin_lock_irq(&ctx->inflight_lock); - list_add(&req->inflight_entry, &ctx->inflight_list); - spin_unlock_irq(&ctx->inflight_lock); - } - + if (unlikely((unsigned int)fd >= ctx->nr_user_files)) + return NULL; + fd = array_index_nospec(fd, ctx->nr_user_files); + file_ptr = io_fixed_file_slot(&ctx->file_table, fd)->file_ptr; + file = (struct file *) (file_ptr & FFS_MASK); + file_ptr &= ~FFS_MASK; + /* mask in overlapping REQ_F and FFS bits */ + req->flags |= (file_ptr << REQ_F_NOWAIT_READ_BIT); + io_req_set_rsrc_node(req); return file; } -static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req, - int fd) +static struct file *io_file_get_normal(struct io_ring_ctx *ctx, + struct io_kiocb *req, int fd) { - bool fixed; + struct file *file = fget(fd); - fixed = (req->flags & REQ_F_FIXED_FILE) != 0; - if (unlikely(!fixed && io_async_submit(req->ctx))) - return -EBADF; + trace_io_uring_file_get(ctx, fd); - req->file = io_file_get(state, req, fd, fixed); - if (req->file || io_op_defs[req->opcode].needs_file_no_error) - return 0; - return -EBADF; + /* we don't allow fixed io_uring files */ + if (file && unlikely(file->f_op == &io_uring_fops)) + io_req_track_inflight(req); + return file; +} + +static inline struct file *io_file_get(struct io_ring_ctx *ctx, + struct io_kiocb *req, int fd, bool fixed) +{ + if (fixed) + return io_file_get_fixed(ctx, req, fd); + else + return io_file_get_normal(ctx, req, fd); +} + +static void io_req_task_link_timeout(struct io_kiocb *req, bool *locked) +{ + struct io_kiocb *prev = req->timeout.prev; + int ret = -ENOENT; + + if (prev) { + if (!(req->task->flags & PF_EXITING)) + ret = io_try_cancel_userdata(req, prev->user_data); + io_req_complete_post(req, ret ?: -ETIME, 0); + io_put_req(prev); + } else { + io_req_complete_post(req, -ETIME, 0); + } } static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer) { struct io_timeout_data *data = container_of(timer, struct io_timeout_data, timer); - struct io_kiocb *req = data->req; + struct io_kiocb *prev, *req = data->req; struct io_ring_ctx *ctx = req->ctx; - struct io_kiocb *prev = NULL; unsigned long flags; - spin_lock_irqsave(&ctx->completion_lock, flags); + spin_lock_irqsave(&ctx->timeout_lock, flags); + prev = req->timeout.head; + req->timeout.head = NULL; /* * We don't expect the list to be empty, that will only happen if we * race with the completion of the linked work. */ - if (!list_empty(&req->link_list)) { - prev = list_entry(req->link_list.prev, struct io_kiocb, - link_list); - list_del_init(&req->link_list); - if (!refcount_inc_not_zero(&prev->refs)) + if (prev) { + io_remove_next_linked(prev); + if (!req_ref_inc_not_zero(prev)) prev = NULL; } - list_del(&req->timeout.list); - spin_unlock_irqrestore(&ctx->completion_lock, flags); + req->timeout.prev = prev; + spin_unlock_irqrestore(&ctx->timeout_lock, flags); - if (prev) { - io_async_find_and_cancel(ctx, req, prev->user_data, -ETIME); - io_put_req_deferred(prev, 1); - } else { - io_cqring_add_event(req, -ETIME, 0); - io_put_req_deferred(req, 1); - } + req->io_task_work.func = io_req_task_link_timeout; + io_req_task_work_add(req); return HRTIMER_NORESTART; } -static void __io_queue_linked_timeout(struct io_kiocb *req) -{ - /* - * If the list is now empty, then our linked request finished before - * we got a chance to setup the timer - */ - if (!list_empty(&req->link_list)) { - struct io_timeout_data *data = req->async_data; - - data->timer.function = io_link_timeout_fn; - hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), - data->mode); - } -} - static void io_queue_linked_timeout(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; - spin_lock_irq(&ctx->completion_lock); - __io_queue_linked_timeout(req); - spin_unlock_irq(&ctx->completion_lock); + spin_lock_irq(&ctx->timeout_lock); + /* + * If the back reference is NULL, then our linked request finished + * before we got a chance to setup the timer + */ + if (req->timeout.head) { + struct io_timeout_data *data = req->async_data; + data->timer.function = io_link_timeout_fn; + hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), + data->mode); + list_add_tail(&req->timeout.list, &ctx->ltimeout_list); + } + spin_unlock_irq(&ctx->timeout_lock); /* drop submission reference */ io_put_req(req); } -static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req) -{ - struct io_kiocb *nxt; - - if (!(req->flags & REQ_F_LINK_HEAD)) - return NULL; - if (req->flags & REQ_F_LINK_TIMEOUT) - return NULL; - - nxt = list_first_entry_or_null(&req->link_list, struct io_kiocb, - link_list); - if (!nxt || nxt->opcode != IORING_OP_LINK_TIMEOUT) - return NULL; - - nxt->flags |= REQ_F_LTIMEOUT_ACTIVE; - req->flags |= REQ_F_LINK_TIMEOUT; - return nxt; -} - -static void __io_queue_sqe(struct io_kiocb *req, struct io_comp_state *cs) +static void __io_queue_sqe(struct io_kiocb *req) + __must_hold(&req->ctx->uring_lock) { struct io_kiocb *linked_timeout; - const struct cred *old_creds = NULL; int ret; -again: - linked_timeout = io_prep_linked_timeout(req); - - if ((req->flags & REQ_F_WORK_INITIALIZED) && - (req->work.flags & IO_WQ_WORK_CREDS) && - req->work.identity->creds != current_cred()) { - if (old_creds) - revert_creds(old_creds); - if (old_creds == req->work.identity->creds) - old_creds = NULL; /* restored original creds */ - else - old_creds = override_creds(req->work.identity->creds); - } - - ret = io_issue_sqe(req, true, cs); +issue_sqe: + ret = io_issue_sqe(req, IO_URING_F_NONBLOCK|IO_URING_F_COMPLETE_DEFER); /* * We async punt it if the file wasn't marked NOWAIT, or if the file * doesn't support non-blocking read/write attempts */ - if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) { - if (!io_arm_poll_handler(req)) { + if (likely(!ret)) { + if (req->flags & REQ_F_COMPLETE_INLINE) { + struct io_ring_ctx *ctx = req->ctx; + struct io_submit_state *state = &ctx->submit_state; + + state->compl_reqs[state->compl_nr++] = req; + if (state->compl_nr == ARRAY_SIZE(state->compl_reqs)) + io_submit_flush_completions(ctx); + return; + } + + linked_timeout = io_prep_linked_timeout(req); + if (linked_timeout) + io_queue_linked_timeout(linked_timeout); + } else if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) { + linked_timeout = io_prep_linked_timeout(req); + + switch (io_arm_poll_handler(req)) { + case IO_APOLL_READY: + if (linked_timeout) + io_queue_linked_timeout(linked_timeout); + goto issue_sqe; + case IO_APOLL_ABORTED: /* * Queued up for async execution, worker will release * submit reference when the iocb is actually submitted. */ - io_queue_async_work(req); + io_queue_async_work(req, NULL); + break; } if (linked_timeout) io_queue_linked_timeout(linked_timeout); - } else if (likely(!ret)) { - /* drop submission reference */ - req = io_put_req_find_next(req); - if (linked_timeout) - io_queue_linked_timeout(linked_timeout); - - if (req) { - if (!(req->flags & REQ_F_FORCE_ASYNC)) - goto again; - io_queue_async_work(req); - } } else { - /* un-prep timeout, so it'll be killed as any other linked */ - req->flags &= ~REQ_F_LINK_TIMEOUT; - req_set_fail_links(req); - io_put_req(req); - io_req_complete(req, ret); - } - - if (old_creds) - revert_creds(old_creds); -} - -static void io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe, - struct io_comp_state *cs) -{ - int ret; - - ret = io_req_defer(req, sqe); - if (ret) { - if (ret != -EIOCBQUEUED) { -fail_req: - req_set_fail_links(req); - io_put_req(req); - io_req_complete(req, ret); - } - } else if (req->flags & REQ_F_FORCE_ASYNC) { - if (!req->async_data) { - ret = io_req_defer_prep(req, sqe); - if (unlikely(ret)) - goto fail_req; - } - io_queue_async_work(req); - } else { - if (sqe) { - ret = io_req_prep(req, sqe); - if (unlikely(ret)) - goto fail_req; - } - __io_queue_sqe(req, cs); + io_req_complete_failed(req, ret); } } -static inline void io_queue_link_head(struct io_kiocb *req, - struct io_comp_state *cs) +static inline void io_queue_sqe(struct io_kiocb *req) + __must_hold(&req->ctx->uring_lock) { - if (unlikely(req->flags & REQ_F_FAIL_LINK)) { - io_put_req(req); - io_req_complete(req, -ECANCELED); - } else - io_queue_sqe(req, NULL, cs); -} + if (unlikely(req->ctx->drain_active) && io_drain_req(req)) + return; -static int io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe, - struct io_kiocb **link, struct io_comp_state *cs) -{ - struct io_ring_ctx *ctx = req->ctx; - int ret; - - /* - * If we already have a head request, queue this one for async - * submittal once the head completes. If we don't have a head but - * IOSQE_IO_LINK is set in the sqe, start a new head. This one will be - * submitted sync once the chain is complete. If none of those - * conditions are true (normal request), then just queue it. - */ - if (*link) { - struct io_kiocb *head = *link; - - /* - * Taking sequential execution of a link, draining both sides - * of the link also fullfils IOSQE_IO_DRAIN semantics for all - * requests in the link. So, it drains the head and the - * next after the link request. The last one is done via - * drain_next flag to persist the effect across calls. - */ - if (req->flags & REQ_F_IO_DRAIN) { - head->flags |= REQ_F_IO_DRAIN; - ctx->drain_next = 1; - } - ret = io_req_defer_prep(req, sqe); - if (unlikely(ret)) { - /* fail even hard links since we don't submit */ - head->flags |= REQ_F_FAIL_LINK; - return ret; - } - trace_io_uring_link(ctx, req, head); - list_add_tail(&req->link_list, &head->link_list); - - /* last request of a link, enqueue the link */ - if (!(req->flags & (REQ_F_LINK | REQ_F_HARDLINK))) { - io_queue_link_head(head, cs); - *link = NULL; - } + if (likely(!(req->flags & (REQ_F_FORCE_ASYNC | REQ_F_FAIL)))) { + __io_queue_sqe(req); + } else if (req->flags & REQ_F_FAIL) { + io_req_complete_fail_submit(req); } else { - if (unlikely(ctx->drain_next)) { - req->flags |= REQ_F_IO_DRAIN; - ctx->drain_next = 0; - } - if (req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) { - req->flags |= REQ_F_LINK_HEAD; - INIT_LIST_HEAD(&req->link_list); + int ret = io_req_prep_async(req); - ret = io_req_defer_prep(req, sqe); - if (unlikely(ret)) - req->flags |= REQ_F_FAIL_LINK; - *link = req; - } else { - io_queue_sqe(req, sqe, cs); - } + if (unlikely(ret)) + io_req_complete_failed(req, ret); + else + io_queue_async_work(req, NULL); } - - return 0; -} - -/* - * Batched submission is done, ensure local IO is flushed out. - */ -static void io_submit_state_end(struct io_submit_state *state) -{ - if (!list_empty(&state->comp.list)) - io_submit_flush_completions(&state->comp); - blk_finish_plug(&state->plug); - io_state_file_put(state); - if (state->free_reqs) - kmem_cache_free_bulk(req_cachep, state->free_reqs, state->reqs); -} - -/* - * Start submission side cache. - */ -static void io_submit_state_start(struct io_submit_state *state, - struct io_ring_ctx *ctx, unsigned int max_ios) -{ - blk_start_plug(&state->plug); - state->comp.nr = 0; - INIT_LIST_HEAD(&state->comp.list); - state->comp.ctx = ctx; - state->free_reqs = 0; - state->file = NULL; - state->ios_left = max_ios; -} - -static void io_commit_sqring(struct io_ring_ctx *ctx) -{ - struct io_rings *rings = ctx->rings; - - /* - * Ensure any loads from the SQEs are done at this point, - * since once we write the new head, the application could - * write new data to them. - */ - smp_store_release(&rings->sq.head, ctx->cached_sq_head); -} - -/* - * Fetch an sqe, if one is available. Note that sqe_ptr will point to memory - * that is mapped by userspace. This means that care needs to be taken to - * ensure that reads are stable, as we cannot rely on userspace always - * being a good citizen. If members of the sqe are validated and then later - * used, it's important that those reads are done through READ_ONCE() to - * prevent a re-load down the line. - */ -static const struct io_uring_sqe *io_get_sqe(struct io_ring_ctx *ctx) -{ - u32 *sq_array = ctx->sq_array; - unsigned head; - - /* - * The cached sq head (or cq tail) serves two purposes: - * - * 1) allows us to batch the cost of updating the user visible - * head updates. - * 2) allows the kernel side to track the head on its own, even - * though the application is the one updating it. - */ - head = READ_ONCE(sq_array[ctx->cached_sq_head & ctx->sq_mask]); - if (likely(head < ctx->sq_entries)) - return &ctx->sq_sqes[head]; - - /* drop invalid entries */ - ctx->cached_sq_dropped++; - WRITE_ONCE(ctx->rings->sq_dropped, ctx->cached_sq_dropped); - return NULL; -} - -static inline void io_consume_sqe(struct io_ring_ctx *ctx) -{ - ctx->cached_sq_head++; } /* @@ -6565,7 +6926,7 @@ static inline bool io_check_restriction(struct io_ring_ctx *ctx, struct io_kiocb *req, unsigned int sqe_flags) { - if (!ctx->restricted) + if (likely(!ctx->restricted)) return true; if (!test_bit(req->opcode, ctx->restrictions.sqe_op)) @@ -6582,382 +6943,459 @@ static inline bool io_check_restriction(struct io_ring_ctx *ctx, return true; } -#define SQE_VALID_FLAGS (IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK| \ - IOSQE_IO_HARDLINK | IOSQE_ASYNC | \ - IOSQE_BUFFER_SELECT) - static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, - const struct io_uring_sqe *sqe, - struct io_submit_state *state) + const struct io_uring_sqe *sqe) + __must_hold(&ctx->uring_lock) { + struct io_submit_state *state; unsigned int sqe_flags; - int id, ret; + int personality, ret = 0; + /* req is partially pre-initialised, see io_preinit_req() */ req->opcode = READ_ONCE(sqe->opcode); + /* same numerical values with corresponding REQ_F_*, safe to copy */ + req->flags = sqe_flags = READ_ONCE(sqe->flags); req->user_data = READ_ONCE(sqe->user_data); - req->async_data = NULL; req->file = NULL; - req->ctx = ctx; - req->flags = 0; - /* one is dropped after submission, the other at completion */ - refcount_set(&req->refs, 2); + req->fixed_rsrc_refs = NULL; req->task = current; - req->result = 0; - if (unlikely(req->opcode >= IORING_OP_LAST)) - return -EINVAL; - - if (unlikely(io_sq_thread_acquire_mm(ctx, req))) - return -EFAULT; - - sqe_flags = READ_ONCE(sqe->flags); /* enforce forwards compatibility on users */ if (unlikely(sqe_flags & ~SQE_VALID_FLAGS)) return -EINVAL; - - if (unlikely(!io_check_restriction(ctx, req, sqe_flags))) + if (unlikely(req->opcode >= IORING_OP_LAST)) + return -EINVAL; + if (!io_check_restriction(ctx, req, sqe_flags)) return -EACCES; if ((sqe_flags & IOSQE_BUFFER_SELECT) && !io_op_defs[req->opcode].buffer_select) return -EOPNOTSUPP; + if (unlikely(sqe_flags & IOSQE_IO_DRAIN)) + ctx->drain_active = true; - id = READ_ONCE(sqe->personality); - if (id) { - struct io_identity *iod; - - iod = xa_load(&ctx->personalities, id); - if (unlikely(!iod)) + personality = READ_ONCE(sqe->personality); + if (personality) { + req->creds = xa_load(&ctx->personalities, personality); + if (!req->creds) return -EINVAL; - refcount_inc(&iod->count); + get_cred(req->creds); + req->flags |= REQ_F_CREDS; + } + state = &ctx->submit_state; - __io_req_init_async(req); - get_cred(iod->creds); - req->work.identity = iod; - req->work.flags |= IO_WQ_WORK_CREDS; + /* + * Plug now if we have more than 1 IO left after this, and the target + * is potentially a read/write to block based storage. + */ + if (!state->plug_started && state->ios_left > 1 && + io_op_defs[req->opcode].plug) { + blk_start_plug(&state->plug); + state->plug_started = true; } - /* same numerical values with corresponding REQ_F_*, safe to copy */ - req->flags |= sqe_flags; + if (io_op_defs[req->opcode].needs_file) { + req->file = io_file_get(ctx, req, READ_ONCE(sqe->fd), + (sqe_flags & IOSQE_FIXED_FILE)); + if (unlikely(!req->file)) + ret = -EBADF; + } - if (!io_op_defs[req->opcode].needs_file) - return 0; - - ret = io_req_set_file(state, req, READ_ONCE(sqe->fd)); state->ios_left--; return ret; } -static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr) +static int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, + const struct io_uring_sqe *sqe) + __must_hold(&ctx->uring_lock) { - struct io_submit_state state; - struct io_kiocb *link = NULL; - int i, submitted = 0; + struct io_submit_link *link = &ctx->submit_state.link; + int ret; - /* if we have a backlog and couldn't flush it all, return BUSY */ - if (test_bit(0, &ctx->sq_check_overflow)) { - if (!__io_cqring_overflow_flush(ctx, false, NULL, NULL)) - return -EBUSY; + ret = io_init_req(ctx, req, sqe); + if (unlikely(ret)) { +fail_req: + /* fail even hard links since we don't submit */ + if (link->head) { + /* + * we can judge a link req is failed or cancelled by if + * REQ_F_FAIL is set, but the head is an exception since + * it may be set REQ_F_FAIL because of other req's failure + * so let's leverage req->result to distinguish if a head + * is set REQ_F_FAIL because of its failure or other req's + * failure so that we can set the correct ret code for it. + * init result here to avoid affecting the normal path. + */ + if (!(link->head->flags & REQ_F_FAIL)) + req_fail_link_node(link->head, -ECANCELED); + } else if (!(req->flags & (REQ_F_LINK | REQ_F_HARDLINK))) { + /* + * the current req is a normal req, we should return + * error and thus break the submittion loop. + */ + io_req_complete_failed(req, ret); + return ret; + } + req_fail_link_node(req, ret); + } else { + ret = io_req_prep(req, sqe); + if (unlikely(ret)) + goto fail_req; } + /* don't need @sqe from now on */ + trace_io_uring_submit_sqe(ctx, req, req->opcode, req->user_data, + req->flags, true, + ctx->flags & IORING_SETUP_SQPOLL); + + /* + * If we already have a head request, queue this one for async + * submittal once the head completes. If we don't have a head but + * IOSQE_IO_LINK is set in the sqe, start a new head. This one will be + * submitted sync once the chain is complete. If none of those + * conditions are true (normal request), then just queue it. + */ + if (link->head) { + struct io_kiocb *head = link->head; + + if (!(req->flags & REQ_F_FAIL)) { + ret = io_req_prep_async(req); + if (unlikely(ret)) { + req_fail_link_node(req, ret); + if (!(head->flags & REQ_F_FAIL)) + req_fail_link_node(head, -ECANCELED); + } + } + trace_io_uring_link(ctx, req, head); + link->last->link = req; + link->last = req; + + /* last request of a link, enqueue the link */ + if (!(req->flags & (REQ_F_LINK | REQ_F_HARDLINK))) { + link->head = NULL; + io_queue_sqe(head); + } + } else { + if (req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) { + link->head = req; + link->last = req; + } else { + io_queue_sqe(req); + } + } + + return 0; +} + +/* + * Batched submission is done, ensure local IO is flushed out. + */ +static void io_submit_state_end(struct io_submit_state *state, + struct io_ring_ctx *ctx) +{ + if (state->link.head) + io_queue_sqe(state->link.head); + if (state->compl_nr) + io_submit_flush_completions(ctx); + if (state->plug_started) + blk_finish_plug(&state->plug); +} + +/* + * Start submission side cache. + */ +static void io_submit_state_start(struct io_submit_state *state, + unsigned int max_ios) +{ + state->plug_started = false; + state->ios_left = max_ios; + /* set only head, no need to init link_last in advance */ + state->link.head = NULL; +} + +static void io_commit_sqring(struct io_ring_ctx *ctx) +{ + struct io_rings *rings = ctx->rings; + + /* + * Ensure any loads from the SQEs are done at this point, + * since once we write the new head, the application could + * write new data to them. + */ + smp_store_release(&rings->sq.head, ctx->cached_sq_head); +} + +/* + * Fetch an sqe, if one is available. Note this returns a pointer to memory + * that is mapped by userspace. This means that care needs to be taken to + * ensure that reads are stable, as we cannot rely on userspace always + * being a good citizen. If members of the sqe are validated and then later + * used, it's important that those reads are done through READ_ONCE() to + * prevent a re-load down the line. + */ +static const struct io_uring_sqe *io_get_sqe(struct io_ring_ctx *ctx) +{ + unsigned head, mask = ctx->sq_entries - 1; + unsigned sq_idx = ctx->cached_sq_head++ & mask; + + /* + * The cached sq head (or cq tail) serves two purposes: + * + * 1) allows us to batch the cost of updating the user visible + * head updates. + * 2) allows the kernel side to track the head on its own, even + * though the application is the one updating it. + */ + head = READ_ONCE(ctx->sq_array[sq_idx]); + if (likely(head < ctx->sq_entries)) + return &ctx->sq_sqes[head]; + + /* drop invalid entries */ + ctx->cq_extra--; + WRITE_ONCE(ctx->rings->sq_dropped, + READ_ONCE(ctx->rings->sq_dropped) + 1); + return NULL; +} + +static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr) + __must_hold(&ctx->uring_lock) +{ + int submitted = 0; + /* make sure SQ entry isn't read before tail */ nr = min3(nr, ctx->sq_entries, io_sqring_entries(ctx)); - if (!percpu_ref_tryget_many(&ctx->refs, nr)) return -EAGAIN; + io_get_task_refs(nr); - percpu_counter_add(¤t->io_uring->inflight, nr); - refcount_add(nr, ¤t->usage); - - io_submit_state_start(&state, ctx, nr); - - for (i = 0; i < nr; i++) { + io_submit_state_start(&ctx->submit_state, nr); + while (submitted < nr) { const struct io_uring_sqe *sqe; struct io_kiocb *req; - int err; - sqe = io_get_sqe(ctx); - if (unlikely(!sqe)) { - io_consume_sqe(ctx); - break; - } - req = io_alloc_req(ctx, &state); + req = io_alloc_req(ctx); if (unlikely(!req)) { if (!submitted) submitted = -EAGAIN; break; } - io_consume_sqe(ctx); - /* will complete beyond this point, count as submitted */ - submitted++; - - err = io_init_req(ctx, req, sqe, &state); - if (unlikely(err)) { -fail_req: - io_put_req(req); - io_req_complete(req, err); + sqe = io_get_sqe(ctx); + if (unlikely(!sqe)) { + list_add(&req->inflight_entry, &ctx->submit_state.free_list); break; } - - trace_io_uring_submit_sqe(ctx, req->opcode, req->user_data, - true, io_async_submit(ctx)); - err = io_submit_sqe(req, sqe, &link, &state.comp); - if (err) - goto fail_req; + /* will complete beyond this point, count as submitted */ + submitted++; + if (io_submit_sqe(ctx, req, sqe)) + break; } if (unlikely(submitted != nr)) { int ref_used = (submitted == -EAGAIN) ? 0 : submitted; - struct io_uring_task *tctx = current->io_uring; int unused = nr - ref_used; + current->io_uring->cached_refs += unused; percpu_ref_put_many(&ctx->refs, unused); - percpu_counter_sub(&tctx->inflight, unused); - put_task_struct_many(current, unused); } - if (link) - io_queue_link_head(link, &state.comp); - io_submit_state_end(&state); + io_submit_state_end(&ctx->submit_state, ctx); /* Commit SQ ring head once we've consumed and submitted all SQEs */ io_commit_sqring(ctx); return submitted; } +static inline bool io_sqd_events_pending(struct io_sq_data *sqd) +{ + return READ_ONCE(sqd->state); +} + static inline void io_ring_set_wakeup_flag(struct io_ring_ctx *ctx) { /* Tell userspace we may need a wakeup call */ - spin_lock_irq(&ctx->completion_lock); - ctx->rings->sq_flags |= IORING_SQ_NEED_WAKEUP; - spin_unlock_irq(&ctx->completion_lock); + spin_lock(&ctx->completion_lock); + WRITE_ONCE(ctx->rings->sq_flags, + ctx->rings->sq_flags | IORING_SQ_NEED_WAKEUP); + spin_unlock(&ctx->completion_lock); } static inline void io_ring_clear_wakeup_flag(struct io_ring_ctx *ctx) { - spin_lock_irq(&ctx->completion_lock); - ctx->rings->sq_flags &= ~IORING_SQ_NEED_WAKEUP; - spin_unlock_irq(&ctx->completion_lock); + spin_lock(&ctx->completion_lock); + WRITE_ONCE(ctx->rings->sq_flags, + ctx->rings->sq_flags & ~IORING_SQ_NEED_WAKEUP); + spin_unlock(&ctx->completion_lock); } -static int io_sq_wake_function(struct wait_queue_entry *wqe, unsigned mode, - int sync, void *key) +static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries) { - struct io_ring_ctx *ctx = container_of(wqe, struct io_ring_ctx, sqo_wait_entry); - int ret; - - ret = autoremove_wake_function(wqe, mode, sync, key); - if (ret) { - unsigned long flags; - - spin_lock_irqsave(&ctx->completion_lock, flags); - ctx->rings->sq_flags &= ~IORING_SQ_NEED_WAKEUP; - spin_unlock_irqrestore(&ctx->completion_lock, flags); - } - return ret; -} - -enum sq_ret { - SQT_IDLE = 1, - SQT_SPIN = 2, - SQT_DID_WORK = 4, -}; - -static enum sq_ret __io_sq_thread(struct io_ring_ctx *ctx, - unsigned long start_jiffies, bool cap_entries) -{ - unsigned long timeout = start_jiffies + ctx->sq_thread_idle; - struct io_sq_data *sqd = ctx->sq_data; unsigned int to_submit; int ret = 0; -again: - if (!list_empty(&ctx->iopoll_list)) { + to_submit = io_sqring_entries(ctx); + /* if we're handling multiple rings, cap submit size for fairness */ + if (cap_entries && to_submit > IORING_SQPOLL_CAP_ENTRIES_VALUE) + to_submit = IORING_SQPOLL_CAP_ENTRIES_VALUE; + + if (!list_empty(&ctx->iopoll_list) || to_submit) { unsigned nr_events = 0; + const struct cred *creds = NULL; + + if (ctx->sq_creds != current_cred()) + creds = override_creds(ctx->sq_creds); mutex_lock(&ctx->uring_lock); - if (!list_empty(&ctx->iopoll_list) && !need_resched()) + if (!list_empty(&ctx->iopoll_list)) io_do_iopoll(ctx, &nr_events, 0); + + /* + * Don't submit if refs are dying, good for io_uring_register(), + * but also it is relied upon by io_ring_exit_work() + */ + if (to_submit && likely(!percpu_ref_is_dying(&ctx->refs)) && + !(ctx->flags & IORING_SETUP_R_DISABLED)) + ret = io_submit_sqes(ctx, to_submit); mutex_unlock(&ctx->uring_lock); + + if (to_submit && wq_has_sleeper(&ctx->sqo_sq_wait)) + wake_up(&ctx->sqo_sq_wait); + if (creds) + revert_creds(creds); } - to_submit = io_sqring_entries(ctx); - - /* - * If submit got -EBUSY, flag us as needing the application - * to enter the kernel to reap and flush events. - */ - if (!to_submit || ret == -EBUSY || need_resched()) { - /* - * Drop cur_mm before scheduling, we can't hold it for - * long periods (or over schedule()). Do this before - * adding ourselves to the waitqueue, as the unuse/drop - * may sleep. - */ - io_sq_thread_drop_mm(); - - /* - * We're polling. If we're within the defined idle - * period, then let us spin without work before going - * to sleep. The exception is if we got EBUSY doing - * more IO, we should wait for the application to - * reap events and wake us up. - */ - if (!list_empty(&ctx->iopoll_list) || need_resched() || - (!time_after(jiffies, timeout) && ret != -EBUSY && - !percpu_ref_is_dying(&ctx->refs))) - return SQT_SPIN; - - prepare_to_wait(&sqd->wait, &ctx->sqo_wait_entry, - TASK_INTERRUPTIBLE); - - /* - * While doing polled IO, before going to sleep, we need - * to check if there are new reqs added to iopoll_list, - * it is because reqs may have been punted to io worker - * and will be added to iopoll_list later, hence check - * the iopoll_list again. - */ - if ((ctx->flags & IORING_SETUP_IOPOLL) && - !list_empty_careful(&ctx->iopoll_list)) { - finish_wait(&sqd->wait, &ctx->sqo_wait_entry); - goto again; - } - - to_submit = io_sqring_entries(ctx); - if (!to_submit || ret == -EBUSY) - return SQT_IDLE; - } - - finish_wait(&sqd->wait, &ctx->sqo_wait_entry); - io_ring_clear_wakeup_flag(ctx); - - /* if we're handling multiple rings, cap submit size for fairness */ - if (cap_entries && to_submit > 8) - to_submit = 8; - - mutex_lock(&ctx->uring_lock); - if (likely(!percpu_ref_is_dying(&ctx->refs) && !ctx->sqo_dead)) - ret = io_submit_sqes(ctx, to_submit); - mutex_unlock(&ctx->uring_lock); - - if (!io_sqring_full(ctx) && wq_has_sleeper(&ctx->sqo_sq_wait)) - wake_up(&ctx->sqo_sq_wait); - - return SQT_DID_WORK; + return ret; } -static void io_sqd_init_new(struct io_sq_data *sqd) +static void io_sqd_update_thread_idle(struct io_sq_data *sqd) { struct io_ring_ctx *ctx; + unsigned sq_thread_idle = 0; - while (!list_empty(&sqd->ctx_new_list)) { - ctx = list_first_entry(&sqd->ctx_new_list, struct io_ring_ctx, sqd_list); - init_wait(&ctx->sqo_wait_entry); - ctx->sqo_wait_entry.func = io_sq_wake_function; - list_move_tail(&ctx->sqd_list, &sqd->ctx_list); - complete(&ctx->sq_thread_comp); + list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) + sq_thread_idle = max(sq_thread_idle, ctx->sq_thread_idle); + sqd->sq_thread_idle = sq_thread_idle; +} + +static bool io_sqd_handle_event(struct io_sq_data *sqd) +{ + bool did_sig = false; + struct ksignal ksig; + + if (test_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state) || + signal_pending(current)) { + mutex_unlock(&sqd->lock); + if (signal_pending(current)) + did_sig = get_signal(&ksig); + cond_resched(); + mutex_lock(&sqd->lock); } + return did_sig || test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state); } static int io_sq_thread(void *data) { - struct cgroup_subsys_state *cur_css = NULL; - const struct cred *old_cred = NULL; struct io_sq_data *sqd = data; struct io_ring_ctx *ctx; - unsigned long start_jiffies; + unsigned long timeout = 0; + char buf[TASK_COMM_LEN]; + DEFINE_WAIT(wait); - start_jiffies = jiffies; - while (!kthread_should_stop()) { - enum sq_ret ret = 0; - bool cap_entries; + snprintf(buf, sizeof(buf), "iou-sqp-%d", sqd->task_pid); + set_task_comm(current, buf); - /* - * Any changes to the sqd lists are synchronized through the - * kthread parking. This synchronizes the thread vs users, - * the users are synchronized on the sqd->ctx_lock. - */ - if (kthread_should_park()) { - kthread_parkme(); - /* - * When sq thread is unparked, in case the previous park operation - * comes from io_put_sq_data(), which means that sq thread is going - * to be stopped, so here needs to have a check. - */ - if (kthread_should_stop()) + if (sqd->sq_cpu != -1) + set_cpus_allowed_ptr(current, cpumask_of(sqd->sq_cpu)); + else + set_cpus_allowed_ptr(current, cpu_online_mask); + current->flags |= PF_NO_SETAFFINITY; + + mutex_lock(&sqd->lock); + while (1) { + bool cap_entries, sqt_spin = false; + + if (io_sqd_events_pending(sqd) || signal_pending(current)) { + if (io_sqd_handle_event(sqd)) break; + timeout = jiffies + sqd->sq_thread_idle; } - if (unlikely(!list_empty(&sqd->ctx_new_list))) - io_sqd_init_new(sqd); - cap_entries = !list_is_singular(&sqd->ctx_list); - list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) { - if (current->cred != ctx->creds) { - if (old_cred) - revert_creds(old_cred); - old_cred = override_creds(ctx->creds); - } - io_sq_thread_associate_blkcg(ctx, &cur_css); -#ifdef CONFIG_AUDIT - current->loginuid = ctx->loginuid; - current->sessionid = ctx->sessionid; -#endif + int ret = __io_sq_thread(ctx, cap_entries); - ret |= __io_sq_thread(ctx, start_jiffies, cap_entries); + if (!sqt_spin && (ret > 0 || !list_empty(&ctx->iopoll_list))) + sqt_spin = true; + } + if (io_run_task_work()) + sqt_spin = true; - io_sq_thread_drop_mm(); + if (sqt_spin || !time_after(jiffies, timeout)) { + cond_resched(); + if (sqt_spin) + timeout = jiffies + sqd->sq_thread_idle; + continue; } - if (ret & SQT_SPIN) { - io_run_task_work(); - io_sq_thread_drop_mm(); - cond_resched(); - } else if (ret == SQT_IDLE) { - if (kthread_should_park()) - continue; - list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) + prepare_to_wait(&sqd->wait, &wait, TASK_INTERRUPTIBLE); + if (!io_sqd_events_pending(sqd) && !current->task_works) { + bool needs_sched = true; + + list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) { io_ring_set_wakeup_flag(ctx); - schedule(); - start_jiffies = jiffies; + + if ((ctx->flags & IORING_SETUP_IOPOLL) && + !list_empty_careful(&ctx->iopoll_list)) { + needs_sched = false; + break; + } + if (io_sqring_entries(ctx)) { + needs_sched = false; + break; + } + } + + if (needs_sched) { + mutex_unlock(&sqd->lock); + schedule(); + mutex_lock(&sqd->lock); + } list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) io_ring_clear_wakeup_flag(ctx); } + + finish_wait(&sqd->wait, &wait); + timeout = jiffies + sqd->sq_thread_idle; } + io_uring_cancel_generic(true, sqd); + sqd->thread = NULL; + list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) + io_ring_set_wakeup_flag(ctx); io_run_task_work(); - io_sq_thread_drop_mm(); + mutex_unlock(&sqd->lock); - if (cur_css) - io_sq_thread_unassociate_blkcg(); - if (old_cred) - revert_creds(old_cred); - - kthread_parkme(); - - return 0; + complete(&sqd->exited); + do_exit(0); } struct io_wait_queue { struct wait_queue_entry wq; struct io_ring_ctx *ctx; - unsigned to_wait; + unsigned cq_tail; unsigned nr_timeouts; }; static inline bool io_should_wake(struct io_wait_queue *iowq) { struct io_ring_ctx *ctx = iowq->ctx; + int dist = ctx->cached_cq_tail - (int) iowq->cq_tail; /* * Wake up if we have enough events, or if a timeout occurred since we * started waiting. For timeouts, we always want to return to userspace, * regardless of event count. */ - return io_cqring_events(ctx) >= iowq->to_wait || - atomic_read(&ctx->cq_timeouts) != iowq->nr_timeouts; + return dist >= 0 || atomic_read(&ctx->cq_timeouts) != iowq->nr_timeouts; } static int io_wake_function(struct wait_queue_entry *curr, unsigned int mode, @@ -6970,7 +7408,7 @@ static int io_wake_function(struct wait_queue_entry *curr, unsigned int mode, * Cannot safely flush overflowed CQEs from here, ensure we wake up * the task, and the next invocation will do it. */ - if (io_should_wake(iowq) || test_bit(0, &iowq->ctx->cq_check_overflow)) + if (io_should_wake(iowq) || test_bit(0, &iowq->ctx->check_cq_overflow)) return autoremove_wake_function(curr, mode, wake_flags, key); return -1; } @@ -6981,43 +7419,60 @@ static int io_run_task_work_sig(void) return 1; if (!signal_pending(current)) return 0; - if (current->jobctl & JOBCTL_TASK_WORK) { - spin_lock_irq(¤t->sighand->siglock); - current->jobctl &= ~JOBCTL_TASK_WORK; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - return 1; - } + if (test_thread_flag(TIF_NOTIFY_SIGNAL)) + return -ERESTARTSYS; return -EINTR; } +/* when returns >0, the caller should retry */ +static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx, + struct io_wait_queue *iowq, + ktime_t timeout) +{ + int ret; + + /* make sure we run task_work before checking for signals */ + ret = io_run_task_work_sig(); + if (ret || io_should_wake(iowq)) + return ret; + /* let the caller flush overflows, retry */ + if (test_bit(0, &ctx->check_cq_overflow)) + return 1; + + if (!schedule_hrtimeout(&timeout, HRTIMER_MODE_ABS)) + return -ETIME; + return 1; +} + /* * Wait until events become available, if we don't already have some. The * application must reap them itself, as they reside on the shared cq ring. */ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, - const sigset_t __user *sig, size_t sigsz) + const sigset_t __user *sig, size_t sigsz, + struct __kernel_timespec __user *uts) { - struct io_wait_queue iowq = { - .wq = { - .private = current, - .func = io_wake_function, - .entry = LIST_HEAD_INIT(iowq.wq.entry), - }, - .ctx = ctx, - .to_wait = min_events, - }; + struct io_wait_queue iowq; struct io_rings *rings = ctx->rings; - int ret = 0; + ktime_t timeout = KTIME_MAX; + int ret; do { - io_cqring_overflow_flush(ctx, false, NULL, NULL); + io_cqring_overflow_flush(ctx); if (io_cqring_events(ctx) >= min_events) return 0; if (!io_run_task_work()) break; } while (1); + if (uts) { + struct timespec64 ts; + + if (get_timespec64(&ts, uts)) + return -EFAULT; + timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns()); + } + if (sig) { #ifdef CONFIG_COMPAT if (in_compat_syscall()) @@ -7031,35 +7486,269 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, return ret; } + init_waitqueue_func_entry(&iowq.wq, io_wake_function); + iowq.wq.private = current; + INIT_LIST_HEAD(&iowq.wq.entry); + iowq.ctx = ctx; iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts); + iowq.cq_tail = READ_ONCE(ctx->rings->cq.head) + min_events; + trace_io_uring_cqring_wait(ctx, min_events); do { - io_cqring_overflow_flush(ctx, false, NULL, NULL); - prepare_to_wait_exclusive(&ctx->wait, &iowq.wq, + /* if we can't even flush overflow, don't wait for more */ + if (!io_cqring_overflow_flush(ctx)) { + ret = -EBUSY; + break; + } + prepare_to_wait_exclusive(&ctx->cq_wait, &iowq.wq, TASK_INTERRUPTIBLE); - /* make sure we run task_work before checking for signals */ - ret = io_run_task_work_sig(); - if (ret > 0) { - finish_wait(&ctx->wait, &iowq.wq); - continue; - } - else if (ret < 0) - break; - if (io_should_wake(&iowq)) - break; - if (test_bit(0, &ctx->cq_check_overflow)) { - finish_wait(&ctx->wait, &iowq.wq); - continue; - } - schedule(); - } while (1); - finish_wait(&ctx->wait, &iowq.wq); + ret = io_cqring_wait_schedule(ctx, &iowq, timeout); + finish_wait(&ctx->cq_wait, &iowq.wq); + cond_resched(); + } while (ret > 0); restore_saved_sigmask_unless(ret == -EINTR); return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0; } +static void io_free_page_table(void **table, size_t size) +{ + unsigned i, nr_tables = DIV_ROUND_UP(size, PAGE_SIZE); + + for (i = 0; i < nr_tables; i++) + kfree(table[i]); + kfree(table); +} + +static void **io_alloc_page_table(size_t size) +{ + unsigned i, nr_tables = DIV_ROUND_UP(size, PAGE_SIZE); + size_t init_size = size; + void **table; + + table = kcalloc(nr_tables, sizeof(*table), GFP_KERNEL_ACCOUNT); + if (!table) + return NULL; + + for (i = 0; i < nr_tables; i++) { + unsigned int this_size = min_t(size_t, size, PAGE_SIZE); + + table[i] = kzalloc(this_size, GFP_KERNEL_ACCOUNT); + if (!table[i]) { + io_free_page_table(table, init_size); + return NULL; + } + size -= this_size; + } + return table; +} + +static void io_rsrc_node_destroy(struct io_rsrc_node *ref_node) +{ + percpu_ref_exit(&ref_node->refs); + kfree(ref_node); +} + +static void io_rsrc_node_ref_zero(struct percpu_ref *ref) +{ + struct io_rsrc_node *node = container_of(ref, struct io_rsrc_node, refs); + struct io_ring_ctx *ctx = node->rsrc_data->ctx; + unsigned long flags; + bool first_add = false; + unsigned long delay = HZ; + + spin_lock_irqsave(&ctx->rsrc_ref_lock, flags); + node->done = true; + + /* if we are mid-quiesce then do not delay */ + if (node->rsrc_data->quiesce) + delay = 0; + + while (!list_empty(&ctx->rsrc_ref_list)) { + node = list_first_entry(&ctx->rsrc_ref_list, + struct io_rsrc_node, node); + /* recycle ref nodes in order */ + if (!node->done) + break; + list_del(&node->node); + first_add |= llist_add(&node->llist, &ctx->rsrc_put_llist); + } + spin_unlock_irqrestore(&ctx->rsrc_ref_lock, flags); + + if (first_add) + mod_delayed_work(system_wq, &ctx->rsrc_put_work, delay); +} + +static struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx) +{ + struct io_rsrc_node *ref_node; + + ref_node = kzalloc(sizeof(*ref_node), GFP_KERNEL); + if (!ref_node) + return NULL; + + if (percpu_ref_init(&ref_node->refs, io_rsrc_node_ref_zero, + 0, GFP_KERNEL)) { + kfree(ref_node); + return NULL; + } + INIT_LIST_HEAD(&ref_node->node); + INIT_LIST_HEAD(&ref_node->rsrc_list); + ref_node->done = false; + return ref_node; +} + +static void io_rsrc_node_switch(struct io_ring_ctx *ctx, + struct io_rsrc_data *data_to_kill) +{ + WARN_ON_ONCE(!ctx->rsrc_backup_node); + WARN_ON_ONCE(data_to_kill && !ctx->rsrc_node); + + if (data_to_kill) { + struct io_rsrc_node *rsrc_node = ctx->rsrc_node; + + rsrc_node->rsrc_data = data_to_kill; + spin_lock_irq(&ctx->rsrc_ref_lock); + list_add_tail(&rsrc_node->node, &ctx->rsrc_ref_list); + spin_unlock_irq(&ctx->rsrc_ref_lock); + + atomic_inc(&data_to_kill->refs); + percpu_ref_kill(&rsrc_node->refs); + ctx->rsrc_node = NULL; + } + + if (!ctx->rsrc_node) { + ctx->rsrc_node = ctx->rsrc_backup_node; + ctx->rsrc_backup_node = NULL; + } +} + +static int io_rsrc_node_switch_start(struct io_ring_ctx *ctx) +{ + if (ctx->rsrc_backup_node) + return 0; + ctx->rsrc_backup_node = io_rsrc_node_alloc(ctx); + return ctx->rsrc_backup_node ? 0 : -ENOMEM; +} + +static int io_rsrc_ref_quiesce(struct io_rsrc_data *data, struct io_ring_ctx *ctx) +{ + int ret; + + /* As we may drop ->uring_lock, other task may have started quiesce */ + if (data->quiesce) + return -ENXIO; + + data->quiesce = true; + do { + ret = io_rsrc_node_switch_start(ctx); + if (ret) + break; + io_rsrc_node_switch(ctx, data); + + /* kill initial ref, already quiesced if zero */ + if (atomic_dec_and_test(&data->refs)) + break; + mutex_unlock(&ctx->uring_lock); + flush_delayed_work(&ctx->rsrc_put_work); + ret = wait_for_completion_interruptible(&data->done); + if (!ret) { + mutex_lock(&ctx->uring_lock); + if (atomic_read(&data->refs) > 0) { + /* + * it has been revived by another thread while + * we were unlocked + */ + mutex_unlock(&ctx->uring_lock); + } else { + break; + } + } + + atomic_inc(&data->refs); + /* wait for all works potentially completing data->done */ + flush_delayed_work(&ctx->rsrc_put_work); + reinit_completion(&data->done); + + ret = io_run_task_work_sig(); + mutex_lock(&ctx->uring_lock); + } while (ret >= 0); + data->quiesce = false; + + return ret; +} + +static u64 *io_get_tag_slot(struct io_rsrc_data *data, unsigned int idx) +{ + unsigned int off = idx & IO_RSRC_TAG_TABLE_MASK; + unsigned int table_idx = idx >> IO_RSRC_TAG_TABLE_SHIFT; + + return &data->tags[table_idx][off]; +} + +static void io_rsrc_data_free(struct io_rsrc_data *data) +{ + size_t size = data->nr * sizeof(data->tags[0][0]); + + if (data->tags) + io_free_page_table((void **)data->tags, size); + kfree(data); +} + +static int io_rsrc_data_alloc(struct io_ring_ctx *ctx, rsrc_put_fn *do_put, + u64 __user *utags, unsigned nr, + struct io_rsrc_data **pdata) +{ + struct io_rsrc_data *data; + int ret = -ENOMEM; + unsigned i; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + data->tags = (u64 **)io_alloc_page_table(nr * sizeof(data->tags[0][0])); + if (!data->tags) { + kfree(data); + return -ENOMEM; + } + + data->nr = nr; + data->ctx = ctx; + data->do_put = do_put; + if (utags) { + ret = -EFAULT; + for (i = 0; i < nr; i++) { + u64 *tag_slot = io_get_tag_slot(data, i); + + if (copy_from_user(tag_slot, &utags[i], + sizeof(*tag_slot))) + goto fail; + } + } + + atomic_set(&data->refs, 1); + init_completion(&data->done); + *pdata = data; + return 0; +fail: + io_rsrc_data_free(data); + return ret; +} + +static bool io_alloc_file_tables(struct io_file_table *table, unsigned nr_files) +{ + table->files = kvcalloc(nr_files, sizeof(table->files[0]), + GFP_KERNEL_ACCOUNT); + return !!table->files; +} + +static void io_free_file_tables(struct io_file_table *table) +{ + kvfree(table->files); + table->files = NULL; +} + static void __io_sqe_files_unregister(struct io_ring_ctx *ctx) { #if defined(CONFIG_UNIX) @@ -7081,129 +7770,97 @@ static void __io_sqe_files_unregister(struct io_ring_ctx *ctx) fput(file); } #endif -} - -static void io_file_ref_kill(struct percpu_ref *ref) -{ - struct fixed_file_data *data; - - data = container_of(ref, struct fixed_file_data, refs); - complete(&data->done); -} - -static void io_sqe_files_set_node(struct fixed_file_data *file_data, - struct fixed_file_ref_node *ref_node) -{ - spin_lock_bh(&file_data->lock); - file_data->node = ref_node; - list_add_tail(&ref_node->node, &file_data->ref_list); - spin_unlock_bh(&file_data->lock); - percpu_ref_get(&file_data->refs); -} - - -static void io_sqe_files_kill_node(struct fixed_file_data *data) -{ - struct fixed_file_ref_node *ref_node = NULL; - - spin_lock_bh(&data->lock); - ref_node = data->node; - spin_unlock_bh(&data->lock); - if (ref_node) - percpu_ref_kill(&ref_node->refs); -} - -static int io_file_ref_quiesce(struct fixed_file_data *data, - struct io_ring_ctx *ctx) -{ - int ret; - struct fixed_file_ref_node *backup_node; - - if (data->quiesce) - return -ENXIO; - - data->quiesce = true; - do { - backup_node = alloc_fixed_file_ref_node(ctx); - if (!backup_node) - break; - - io_sqe_files_kill_node(data); - percpu_ref_kill(&data->refs); - flush_delayed_work(&ctx->file_put_work); - - ret = wait_for_completion_interruptible(&data->done); - if (!ret) - break; - - percpu_ref_resurrect(&data->refs); - io_sqe_files_set_node(data, backup_node); - backup_node = NULL; - reinit_completion(&data->done); - mutex_unlock(&ctx->uring_lock); - ret = io_run_task_work_sig(); - mutex_lock(&ctx->uring_lock); - - if (ret < 0) - break; - backup_node = alloc_fixed_file_ref_node(ctx); - ret = -ENOMEM; - if (!backup_node) - break; - } while (1); - data->quiesce = false; - - if (backup_node) - destroy_fixed_file_ref_node(backup_node); - return ret; + io_free_file_tables(&ctx->file_table); + io_rsrc_data_free(ctx->file_data); + ctx->file_data = NULL; + ctx->nr_user_files = 0; } static int io_sqe_files_unregister(struct io_ring_ctx *ctx) { - struct fixed_file_data *data = ctx->file_data; - unsigned nr_tables, i; + unsigned nr = ctx->nr_user_files; int ret; - /* - * percpu_ref_is_dying() is to stop parallel files unregister - * Since we possibly drop uring lock later in this function to - * run task work. - */ - if (!data || percpu_ref_is_dying(&data->refs)) + if (!ctx->file_data) return -ENXIO; - ret = io_file_ref_quiesce(data, ctx); - if (ret) - return ret; - __io_sqe_files_unregister(ctx); - nr_tables = DIV_ROUND_UP(ctx->nr_user_files, IORING_MAX_FILES_TABLE); - for (i = 0; i < nr_tables; i++) - kfree(data->table[i].files); - kfree(data->table); - percpu_ref_exit(&data->refs); - kfree(data); - ctx->file_data = NULL; + /* + * Quiesce may unlock ->uring_lock, and while it's not held + * prevent new requests using the table. + */ ctx->nr_user_files = 0; - return 0; + ret = io_rsrc_ref_quiesce(ctx->file_data, ctx); + ctx->nr_user_files = nr; + if (!ret) + __io_sqe_files_unregister(ctx); + return ret; +} + +static void io_sq_thread_unpark(struct io_sq_data *sqd) + __releases(&sqd->lock) +{ + WARN_ON_ONCE(sqd->thread == current); + + /* + * Do the dance but not conditional clear_bit() because it'd race with + * other threads incrementing park_pending and setting the bit. + */ + clear_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state); + if (atomic_dec_return(&sqd->park_pending)) + set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state); + mutex_unlock(&sqd->lock); +} + +static void io_sq_thread_park(struct io_sq_data *sqd) + __acquires(&sqd->lock) +{ + WARN_ON_ONCE(sqd->thread == current); + + atomic_inc(&sqd->park_pending); + set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state); + mutex_lock(&sqd->lock); + if (sqd->thread) + wake_up_process(sqd->thread); +} + +static void io_sq_thread_stop(struct io_sq_data *sqd) +{ + WARN_ON_ONCE(sqd->thread == current); + WARN_ON_ONCE(test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state)); + + set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state); + mutex_lock(&sqd->lock); + if (sqd->thread) + wake_up_process(sqd->thread); + mutex_unlock(&sqd->lock); + wait_for_completion(&sqd->exited); } static void io_put_sq_data(struct io_sq_data *sqd) { if (refcount_dec_and_test(&sqd->refs)) { - /* - * The park is a bit of a work-around, without it we get - * warning spews on shutdown with SQPOLL set and affinity - * set to a single CPU. - */ - if (sqd->thread) { - kthread_park(sqd->thread); - kthread_stop(sqd->thread); - } + WARN_ON_ONCE(atomic_read(&sqd->park_pending)); + io_sq_thread_stop(sqd); kfree(sqd); } } +static void io_sq_thread_finish(struct io_ring_ctx *ctx) +{ + struct io_sq_data *sqd = ctx->sq_data; + + if (sqd) { + io_sq_thread_park(sqd); + list_del_init(&ctx->sqd_list); + io_sqd_update_thread_idle(sqd); + io_sq_thread_unpark(sqd); + + io_put_sq_data(sqd); + ctx->sq_data = NULL; + } +} + static struct io_sq_data *io_attach_sq_data(struct io_uring_params *p) { struct io_ring_ctx *ctx_attach; @@ -7224,92 +7881,46 @@ static struct io_sq_data *io_attach_sq_data(struct io_uring_params *p) fdput(f); return ERR_PTR(-EINVAL); } + if (sqd->task_tgid != current->tgid) { + fdput(f); + return ERR_PTR(-EPERM); + } refcount_inc(&sqd->refs); fdput(f); return sqd; } -static struct io_sq_data *io_get_sq_data(struct io_uring_params *p) +static struct io_sq_data *io_get_sq_data(struct io_uring_params *p, + bool *attached) { struct io_sq_data *sqd; - if (p->flags & IORING_SETUP_ATTACH_WQ) - return io_attach_sq_data(p); + *attached = false; + if (p->flags & IORING_SETUP_ATTACH_WQ) { + sqd = io_attach_sq_data(p); + if (!IS_ERR(sqd)) { + *attached = true; + return sqd; + } + /* fall through for EPERM case, setup new sqd/task */ + if (PTR_ERR(sqd) != -EPERM) + return sqd; + } sqd = kzalloc(sizeof(*sqd), GFP_KERNEL); if (!sqd) return ERR_PTR(-ENOMEM); + atomic_set(&sqd->park_pending, 0); refcount_set(&sqd->refs, 1); INIT_LIST_HEAD(&sqd->ctx_list); - INIT_LIST_HEAD(&sqd->ctx_new_list); - mutex_init(&sqd->ctx_lock); mutex_init(&sqd->lock); init_waitqueue_head(&sqd->wait); + init_completion(&sqd->exited); return sqd; } -static void io_sq_thread_unpark(struct io_sq_data *sqd) - __releases(&sqd->lock) -{ - if (!sqd->thread) - return; - kthread_unpark(sqd->thread); - mutex_unlock(&sqd->lock); -} - -static void io_sq_thread_park(struct io_sq_data *sqd) - __acquires(&sqd->lock) -{ - if (!sqd->thread) - return; - mutex_lock(&sqd->lock); - kthread_park(sqd->thread); -} - -static void io_sq_thread_stop(struct io_ring_ctx *ctx) -{ - struct io_sq_data *sqd = ctx->sq_data; - - if (sqd) { - if (sqd->thread) { - /* - * We may arrive here from the error branch in - * io_sq_offload_create() where the kthread is created - * without being waked up, thus wake it up now to make - * sure the wait will complete. - */ - wake_up_process(sqd->thread); - wait_for_completion(&ctx->sq_thread_comp); - - io_sq_thread_park(sqd); - } - - mutex_lock(&sqd->ctx_lock); - list_del(&ctx->sqd_list); - mutex_unlock(&sqd->ctx_lock); - - if (sqd->thread) { - finish_wait(&sqd->wait, &ctx->sqo_wait_entry); - io_sq_thread_unpark(sqd); - } - - io_put_sq_data(sqd); - ctx->sq_data = NULL; - } -} - -static void io_finish_async(struct io_ring_ctx *ctx) -{ - io_sq_thread_stop(ctx); - - if (ctx->io_wq) { - io_wq_destroy(ctx->io_wq); - ctx->io_wq = NULL; - } -} - #if defined(CONFIG_UNIX) /* * Ensure the UNIX gc is aware of our file set, so we are certain that @@ -7337,7 +7948,7 @@ static int __io_sqe_files_scm(struct io_ring_ctx *ctx, int nr, int offset) skb->scm_io_uring = 1; nr_files = 0; - fpl->user = get_uid(ctx->user); + fpl->user = get_uid(current_user()); for (i = 0; i < nr; i++) { struct file *file = io_file_from_index(ctx, i + offset); @@ -7413,35 +8024,9 @@ static int io_sqe_files_scm(struct io_ring_ctx *ctx) } #endif -static int io_sqe_alloc_file_tables(struct fixed_file_data *file_data, - unsigned nr_tables, unsigned nr_files) -{ - int i; - - for (i = 0; i < nr_tables; i++) { - struct fixed_file_table *table = &file_data->table[i]; - unsigned this_files; - - this_files = min(nr_files, IORING_MAX_FILES_TABLE); - table->files = kcalloc(this_files, sizeof(struct file *), - GFP_KERNEL_ACCOUNT); - if (!table->files) - break; - nr_files -= this_files; - } - - if (i == nr_tables) - return 0; - - for (i = 0; i < nr_tables; i++) { - struct fixed_file_table *table = &file_data->table[i]; - kfree(table->files); - } - return 1; -} - -static void io_ring_file_put(struct io_ring_ctx *ctx, struct file *file) +static void io_rsrc_file_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc) { + struct file *file = prsrc->file; #if defined(CONFIG_UNIX) struct sock *sock = ctx->ring_sock->sk; struct sk_buff_head list, *head = &sock->sk_receive_queue; @@ -7502,117 +8087,61 @@ static void io_ring_file_put(struct io_ring_ctx *ctx, struct file *file) #endif } -struct io_file_put { - struct list_head list; - struct file *file; -}; - -static void __io_file_put_work(struct fixed_file_ref_node *ref_node) +static void __io_rsrc_put_work(struct io_rsrc_node *ref_node) { - struct fixed_file_data *file_data = ref_node->file_data; - struct io_ring_ctx *ctx = file_data->ctx; - struct io_file_put *pfile, *tmp; + struct io_rsrc_data *rsrc_data = ref_node->rsrc_data; + struct io_ring_ctx *ctx = rsrc_data->ctx; + struct io_rsrc_put *prsrc, *tmp; - list_for_each_entry_safe(pfile, tmp, &ref_node->file_list, list) { - list_del(&pfile->list); - io_ring_file_put(ctx, pfile->file); - kfree(pfile); + list_for_each_entry_safe(prsrc, tmp, &ref_node->rsrc_list, list) { + list_del(&prsrc->list); + + if (prsrc->tag) { + bool lock_ring = ctx->flags & IORING_SETUP_IOPOLL; + + io_ring_submit_lock(ctx, lock_ring); + spin_lock(&ctx->completion_lock); + io_fill_cqe_aux(ctx, prsrc->tag, 0, 0); + io_commit_cqring(ctx); + spin_unlock(&ctx->completion_lock); + io_cqring_ev_posted(ctx); + io_ring_submit_unlock(ctx, lock_ring); + } + + rsrc_data->do_put(ctx, prsrc); + kfree(prsrc); } - percpu_ref_exit(&ref_node->refs); - kfree(ref_node); - percpu_ref_put(&file_data->refs); + io_rsrc_node_destroy(ref_node); + if (atomic_dec_and_test(&rsrc_data->refs)) + complete(&rsrc_data->done); } -static void io_file_put_work(struct work_struct *work) +static void io_rsrc_put_work(struct work_struct *work) { struct io_ring_ctx *ctx; struct llist_node *node; - ctx = container_of(work, struct io_ring_ctx, file_put_work.work); - node = llist_del_all(&ctx->file_put_llist); + ctx = container_of(work, struct io_ring_ctx, rsrc_put_work.work); + node = llist_del_all(&ctx->rsrc_put_llist); while (node) { - struct fixed_file_ref_node *ref_node; + struct io_rsrc_node *ref_node; struct llist_node *next = node->next; - ref_node = llist_entry(node, struct fixed_file_ref_node, llist); - __io_file_put_work(ref_node); + ref_node = llist_entry(node, struct io_rsrc_node, llist); + __io_rsrc_put_work(ref_node); node = next; } } -static void io_file_data_ref_zero(struct percpu_ref *ref) -{ - struct fixed_file_ref_node *ref_node; - struct fixed_file_data *data; - struct io_ring_ctx *ctx; - bool first_add = false; - int delay = HZ; - - ref_node = container_of(ref, struct fixed_file_ref_node, refs); - data = ref_node->file_data; - ctx = data->ctx; - - spin_lock_bh(&data->lock); - ref_node->done = true; - - while (!list_empty(&data->ref_list)) { - ref_node = list_first_entry(&data->ref_list, - struct fixed_file_ref_node, node); - /* recycle ref nodes in order */ - if (!ref_node->done) - break; - list_del(&ref_node->node); - first_add |= llist_add(&ref_node->llist, &ctx->file_put_llist); - } - spin_unlock_bh(&data->lock); - - if (percpu_ref_is_dying(&data->refs)) - delay = 0; - - if (!delay) - mod_delayed_work(system_wq, &ctx->file_put_work, 0); - else if (first_add) - queue_delayed_work(system_wq, &ctx->file_put_work, delay); -} - -static struct fixed_file_ref_node *alloc_fixed_file_ref_node( - struct io_ring_ctx *ctx) -{ - struct fixed_file_ref_node *ref_node; - - ref_node = kzalloc(sizeof(*ref_node), GFP_KERNEL); - if (!ref_node) - return NULL; - - if (percpu_ref_init(&ref_node->refs, io_file_data_ref_zero, - 0, GFP_KERNEL)) { - kfree(ref_node); - return NULL; - } - INIT_LIST_HEAD(&ref_node->node); - INIT_LIST_HEAD(&ref_node->file_list); - ref_node->file_data = ctx->file_data; - ref_node->done = false; - return ref_node; -} - -static void destroy_fixed_file_ref_node(struct fixed_file_ref_node *ref_node) -{ - percpu_ref_exit(&ref_node->refs); - kfree(ref_node); -} - static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, - unsigned nr_args) + unsigned nr_args, u64 __user *tags) { __s32 __user *fds = (__s32 __user *) arg; - unsigned nr_tables, i; struct file *file; - int fd, ret = -ENOMEM; - struct fixed_file_ref_node *ref_node; - struct fixed_file_data *file_data; + int fd, ret; + unsigned i; if (ctx->file_data) return -EBUSY; @@ -7622,44 +8151,34 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, return -EMFILE; if (nr_args > rlimit(RLIMIT_NOFILE)) return -EMFILE; + ret = io_rsrc_node_switch_start(ctx); + if (ret) + return ret; + ret = io_rsrc_data_alloc(ctx, io_rsrc_file_put, tags, nr_args, + &ctx->file_data); + if (ret) + return ret; - file_data = kzalloc(sizeof(*ctx->file_data), GFP_KERNEL_ACCOUNT); - if (!file_data) - return -ENOMEM; - file_data->ctx = ctx; - init_completion(&file_data->done); - INIT_LIST_HEAD(&file_data->ref_list); - spin_lock_init(&file_data->lock); - - nr_tables = DIV_ROUND_UP(nr_args, IORING_MAX_FILES_TABLE); - file_data->table = kcalloc(nr_tables, sizeof(*file_data->table), - GFP_KERNEL_ACCOUNT); - if (!file_data->table) + ret = -ENOMEM; + if (!io_alloc_file_tables(&ctx->file_table, nr_args)) goto out_free; - if (percpu_ref_init(&file_data->refs, io_file_ref_kill, - PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) - goto out_free; - - if (io_sqe_alloc_file_tables(file_data, nr_tables, nr_args)) - goto out_ref; - ctx->file_data = file_data; - for (i = 0; i < nr_args; i++, ctx->nr_user_files++) { - struct fixed_file_table *table; - unsigned index; - if (copy_from_user(&fd, &fds[i], sizeof(fd))) { ret = -EFAULT; goto out_fput; } /* allow sparse sets */ - if (fd == -1) + if (fd == -1) { + ret = -EINVAL; + if (unlikely(*io_get_tag_slot(ctx->file_data, i))) + goto out_fput; continue; + } file = fget(fd); ret = -EBADF; - if (!file) + if (unlikely(!file)) goto out_fput; /* @@ -7673,24 +8192,16 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, fput(file); goto out_fput; } - table = &file_data->table[i >> IORING_FILE_TABLE_SHIFT]; - index = i & IORING_FILE_TABLE_MASK; - table->files[index] = file; + io_fixed_file_set(io_fixed_file_slot(&ctx->file_table, i), file); } ret = io_sqe_files_scm(ctx); if (ret) { - io_sqe_files_unregister(ctx); + __io_sqe_files_unregister(ctx); return ret; } - ref_node = alloc_fixed_file_ref_node(ctx); - if (!ref_node) { - io_sqe_files_unregister(ctx); - return -ENOMEM; - } - - io_sqe_files_set_node(file_data, ref_node); + io_rsrc_node_switch(ctx, NULL); return ret; out_fput: for (i = 0; i < ctx->nr_user_files; i++) { @@ -7698,14 +8209,10 @@ out_fput: if (file) fput(file); } - for (i = 0; i < nr_tables; i++) - kfree(file_data->table[i].files); + io_free_file_tables(&ctx->file_table); ctx->nr_user_files = 0; -out_ref: - percpu_ref_exit(&file_data->refs); out_free: - kfree(file_data->table); - kfree(file_data); + io_rsrc_data_free(ctx->file_data); ctx->file_data = NULL; return ret; } @@ -7753,63 +8260,159 @@ static int io_sqe_file_register(struct io_ring_ctx *ctx, struct file *file, #endif } -static int io_queue_file_removal(struct fixed_file_data *data, - struct file *file) +static int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx, + struct io_rsrc_node *node, void *rsrc) { - struct io_file_put *pfile; - struct fixed_file_ref_node *ref_node = data->node; + u64 *tag_slot = io_get_tag_slot(data, idx); + struct io_rsrc_put *prsrc; - pfile = kzalloc(sizeof(*pfile), GFP_KERNEL); - if (!pfile) + prsrc = kzalloc(sizeof(*prsrc), GFP_KERNEL); + if (!prsrc) return -ENOMEM; - pfile->file = file; - list_add(&pfile->list, &ref_node->file_list); - + prsrc->tag = *tag_slot; + *tag_slot = 0; + prsrc->rsrc = rsrc; + list_add(&prsrc->list, &node->rsrc_list); return 0; } +static int io_install_fixed_file(struct io_kiocb *req, struct file *file, + unsigned int issue_flags, u32 slot_index) +{ + struct io_ring_ctx *ctx = req->ctx; + bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; + bool needs_switch = false; + struct io_fixed_file *file_slot; + int ret = -EBADF; + + io_ring_submit_lock(ctx, !force_nonblock); + if (file->f_op == &io_uring_fops) + goto err; + ret = -ENXIO; + if (!ctx->file_data) + goto err; + ret = -EINVAL; + if (slot_index >= ctx->nr_user_files) + goto err; + + slot_index = array_index_nospec(slot_index, ctx->nr_user_files); + file_slot = io_fixed_file_slot(&ctx->file_table, slot_index); + + if (file_slot->file_ptr) { + struct file *old_file; + + ret = io_rsrc_node_switch_start(ctx); + if (ret) + goto err; + + old_file = (struct file *)(file_slot->file_ptr & FFS_MASK); + ret = io_queue_rsrc_removal(ctx->file_data, slot_index, + ctx->rsrc_node, old_file); + if (ret) + goto err; + file_slot->file_ptr = 0; + needs_switch = true; + } + + *io_get_tag_slot(ctx->file_data, slot_index) = 0; + io_fixed_file_set(file_slot, file); + ret = io_sqe_file_register(ctx, file, slot_index); + if (ret) { + file_slot->file_ptr = 0; + goto err; + } + + ret = 0; +err: + if (needs_switch) + io_rsrc_node_switch(ctx, ctx->file_data); + io_ring_submit_unlock(ctx, !force_nonblock); + if (ret) + fput(file); + return ret; +} + +static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags) +{ + unsigned int offset = req->close.file_slot - 1; + struct io_ring_ctx *ctx = req->ctx; + struct io_fixed_file *file_slot; + struct file *file; + int ret; + + io_ring_submit_lock(ctx, !(issue_flags & IO_URING_F_NONBLOCK)); + ret = -ENXIO; + if (unlikely(!ctx->file_data)) + goto out; + ret = -EINVAL; + if (offset >= ctx->nr_user_files) + goto out; + ret = io_rsrc_node_switch_start(ctx); + if (ret) + goto out; + + offset = array_index_nospec(offset, ctx->nr_user_files); + file_slot = io_fixed_file_slot(&ctx->file_table, offset); + ret = -EBADF; + if (!file_slot->file_ptr) + goto out; + + file = (struct file *)(file_slot->file_ptr & FFS_MASK); + ret = io_queue_rsrc_removal(ctx->file_data, offset, ctx->rsrc_node, file); + if (ret) + goto out; + + file_slot->file_ptr = 0; + io_rsrc_node_switch(ctx, ctx->file_data); + ret = 0; +out: + io_ring_submit_unlock(ctx, !(issue_flags & IO_URING_F_NONBLOCK)); + return ret; +} + static int __io_sqe_files_update(struct io_ring_ctx *ctx, - struct io_uring_files_update *up, + struct io_uring_rsrc_update2 *up, unsigned nr_args) { - struct fixed_file_data *data = ctx->file_data; - struct fixed_file_ref_node *ref_node; + u64 __user *tags = u64_to_user_ptr(up->tags); + __s32 __user *fds = u64_to_user_ptr(up->data); + struct io_rsrc_data *data = ctx->file_data; + struct io_fixed_file *file_slot; struct file *file; - __s32 __user *fds; - int fd, i, err; - __u32 done; + int fd, i, err = 0; + unsigned int done; bool needs_switch = false; - if (check_add_overflow(up->offset, nr_args, &done)) - return -EOVERFLOW; - if (done > ctx->nr_user_files) + if (!ctx->file_data) + return -ENXIO; + if (up->offset + nr_args > ctx->nr_user_files) return -EINVAL; - ref_node = alloc_fixed_file_ref_node(ctx); - if (!ref_node) - return -ENOMEM; + for (done = 0; done < nr_args; done++) { + u64 tag = 0; - done = 0; - fds = u64_to_user_ptr(up->fds); - while (nr_args) { - struct fixed_file_table *table; - unsigned index; - - err = 0; - if (copy_from_user(&fd, &fds[done], sizeof(fd))) { + if ((tags && copy_from_user(&tag, &tags[done], sizeof(tag))) || + copy_from_user(&fd, &fds[done], sizeof(fd))) { err = -EFAULT; break; } - i = array_index_nospec(up->offset, ctx->nr_user_files); - table = &ctx->file_data->table[i >> IORING_FILE_TABLE_SHIFT]; - index = i & IORING_FILE_TABLE_MASK; - if (table->files[index]) { - file = table->files[index]; - err = io_queue_file_removal(data, file); + if ((fd == IORING_REGISTER_FILES_SKIP || fd == -1) && tag) { + err = -EINVAL; + break; + } + if (fd == IORING_REGISTER_FILES_SKIP) + continue; + + i = array_index_nospec(up->offset + done, ctx->nr_user_files); + file_slot = io_fixed_file_slot(&ctx->file_table, i); + + if (file_slot->file_ptr) { + file = (struct file *)(file_slot->file_ptr & FFS_MASK); + err = io_queue_rsrc_removal(data, i, ctx->rsrc_node, file); if (err) break; - table->files[index] = NULL; + file_slot->file_ptr = 0; needs_switch = true; } if (fd != -1) { @@ -7831,106 +8434,61 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, err = -EBADF; break; } - table->files[index] = file; + *io_get_tag_slot(data, i) = tag; + io_fixed_file_set(file_slot, file); err = io_sqe_file_register(ctx, file, i); if (err) { - table->files[index] = NULL; + file_slot->file_ptr = 0; fput(file); break; } } - nr_args--; - done++; - up->offset++; } - if (needs_switch) { - percpu_ref_kill(&data->node->refs); - io_sqe_files_set_node(data, ref_node); - } else - destroy_fixed_file_ref_node(ref_node); - + if (needs_switch) + io_rsrc_node_switch(ctx, data); return done ? done : err; } -static int io_sqe_files_update(struct io_ring_ctx *ctx, void __user *arg, - unsigned nr_args) -{ - struct io_uring_files_update up; - - if (!ctx->file_data) - return -ENXIO; - if (!nr_args) - return -EINVAL; - if (copy_from_user(&up, arg, sizeof(up))) - return -EFAULT; - if (up.resv) - return -EINVAL; - - return __io_sqe_files_update(ctx, &up, nr_args); -} - -static void io_free_work(struct io_wq_work *work) -{ - struct io_kiocb *req = container_of(work, struct io_kiocb, work); - - /* Consider that io_steal_work() relies on this ref */ - io_put_req(req); -} - -static int io_init_wq_offload(struct io_ring_ctx *ctx, - struct io_uring_params *p) +static struct io_wq *io_init_wq_offload(struct io_ring_ctx *ctx, + struct task_struct *task) { + struct io_wq_hash *hash; struct io_wq_data data; - struct fd f; - struct io_ring_ctx *ctx_attach; unsigned int concurrency; - int ret = 0; - data.user = ctx->user; - data.free_work = io_free_work; + mutex_lock(&ctx->uring_lock); + hash = ctx->hash_map; + if (!hash) { + hash = kzalloc(sizeof(*hash), GFP_KERNEL); + if (!hash) { + mutex_unlock(&ctx->uring_lock); + return ERR_PTR(-ENOMEM); + } + refcount_set(&hash->refs, 1); + init_waitqueue_head(&hash->wait); + ctx->hash_map = hash; + } + mutex_unlock(&ctx->uring_lock); + + data.hash = hash; + data.task = task; + data.free_work = io_wq_free_work; data.do_work = io_wq_submit_work; - if (!(p->flags & IORING_SETUP_ATTACH_WQ)) { - /* Do QD, or 4 * CPUS, whatever is smallest */ - concurrency = min(ctx->sq_entries, 4 * num_online_cpus()); + /* Do QD, or 4 * CPUS, whatever is smallest */ + concurrency = min(ctx->sq_entries, 4 * num_online_cpus()); - ctx->io_wq = io_wq_create(concurrency, &data); - if (IS_ERR(ctx->io_wq)) { - ret = PTR_ERR(ctx->io_wq); - ctx->io_wq = NULL; - } - return ret; - } - - f = fdget(p->wq_fd); - if (!f.file) - return -EBADF; - - if (f.file->f_op != &io_uring_fops) { - ret = -EINVAL; - goto out_fput; - } - - ctx_attach = f.file->private_data; - /* @io_wq is protected by holding the fd */ - if (!io_wq_get(ctx_attach->io_wq, &data)) { - ret = -EINVAL; - goto out_fput; - } - - ctx->io_wq = ctx_attach->io_wq; -out_fput: - fdput(f); - return ret; + return io_wq_create(concurrency, &data); } -static int io_uring_alloc_task_context(struct task_struct *task) +static int io_uring_alloc_task_context(struct task_struct *task, + struct io_ring_ctx *ctx) { struct io_uring_task *tctx; int ret; - tctx = kmalloc(sizeof(*tctx), GFP_KERNEL); + tctx = kzalloc(sizeof(*tctx), GFP_KERNEL); if (unlikely(!tctx)) return -ENOMEM; @@ -7940,14 +8498,22 @@ static int io_uring_alloc_task_context(struct task_struct *task) return ret; } + tctx->io_wq = io_init_wq_offload(ctx, task); + if (IS_ERR(tctx->io_wq)) { + ret = PTR_ERR(tctx->io_wq); + percpu_counter_destroy(&tctx->inflight); + kfree(tctx); + return ret; + } + xa_init(&tctx->xa); init_waitqueue_head(&tctx->wait); - tctx->last = NULL; atomic_set(&tctx->in_idle, 0); - tctx->sqpoll = false; - io_init_identity(&tctx->__identity); - tctx->identity = &tctx->__identity; + atomic_set(&tctx->inflight_tracked, 0); task->io_uring = tctx; + spin_lock_init(&tctx->task_lock); + INIT_WQ_LIST(&tctx->task_list); + init_task_work(&tctx->task_work, tctx_task_work); return 0; } @@ -7956,9 +8522,9 @@ void __io_uring_free(struct task_struct *tsk) struct io_uring_task *tctx = tsk->io_uring; WARN_ON_ONCE(!xa_empty(&tctx->xa)); - WARN_ON_ONCE(refcount_read(&tctx->identity->count) != 1); - if (tctx->identity != &tctx->__identity) - kfree(tctx->identity); + WARN_ON_ONCE(tctx->io_wq); + WARN_ON_ONCE(tctx->cached_refs); + percpu_counter_destroy(&tctx->inflight); kfree(tctx); tsk->io_uring = NULL; @@ -7969,54 +8535,71 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx, { int ret; + /* Retain compatibility with failing for an invalid attach attempt */ + if ((ctx->flags & (IORING_SETUP_ATTACH_WQ | IORING_SETUP_SQPOLL)) == + IORING_SETUP_ATTACH_WQ) { + struct fd f; + + f = fdget(p->wq_fd); + if (!f.file) + return -ENXIO; + if (f.file->f_op != &io_uring_fops) { + fdput(f); + return -EINVAL; + } + fdput(f); + } if (ctx->flags & IORING_SETUP_SQPOLL) { + struct task_struct *tsk; struct io_sq_data *sqd; + bool attached; - ret = -EPERM; - if (!capable(CAP_SYS_ADMIN)) - goto err; - - sqd = io_get_sq_data(p); + sqd = io_get_sq_data(p, &attached); if (IS_ERR(sqd)) { ret = PTR_ERR(sqd); goto err; } + ctx->sq_creds = get_current_cred(); ctx->sq_data = sqd; - io_sq_thread_park(sqd); - mutex_lock(&sqd->ctx_lock); - list_add(&ctx->sqd_list, &sqd->ctx_new_list); - mutex_unlock(&sqd->ctx_lock); - io_sq_thread_unpark(sqd); - ctx->sq_thread_idle = msecs_to_jiffies(p->sq_thread_idle); if (!ctx->sq_thread_idle) ctx->sq_thread_idle = HZ; - if (sqd->thread) - goto done; + io_sq_thread_park(sqd); + list_add(&ctx->sqd_list, &sqd->ctx_list); + io_sqd_update_thread_idle(sqd); + /* don't attach to a dying SQPOLL thread, would be racy */ + ret = (attached && !sqd->thread) ? -ENXIO : 0; + io_sq_thread_unpark(sqd); + + if (ret < 0) + goto err; + if (attached) + return 0; if (p->flags & IORING_SETUP_SQ_AFF) { int cpu = p->sq_thread_cpu; ret = -EINVAL; - if (cpu >= nr_cpu_ids) - goto err; - if (!cpu_online(cpu)) - goto err; - - sqd->thread = kthread_create_on_cpu(io_sq_thread, sqd, - cpu, "io_uring-sq"); + if (cpu >= nr_cpu_ids || !cpu_online(cpu)) + goto err_sqpoll; + sqd->sq_cpu = cpu; } else { - sqd->thread = kthread_create(io_sq_thread, sqd, - "io_uring-sq"); + sqd->sq_cpu = -1; } - if (IS_ERR(sqd->thread)) { - ret = PTR_ERR(sqd->thread); - sqd->thread = NULL; - goto err; + + sqd->task_pid = current->pid; + sqd->task_tgid = current->tgid; + tsk = create_io_thread(io_sq_thread, sqd, NUMA_NO_NODE); + if (IS_ERR(tsk)) { + ret = PTR_ERR(tsk); + goto err_sqpoll; } - ret = io_uring_alloc_task_context(sqd->thread); + + sqd->thread = tsk; + ret = io_uring_alloc_task_context(tsk, ctx); + wake_up_new_task(tsk); if (ret) goto err; } else if (p->flags & IORING_SETUP_SQ_AFF) { @@ -8025,26 +8608,14 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx, goto err; } -done: - ret = io_init_wq_offload(ctx, p); - if (ret) - goto err; - return 0; +err_sqpoll: + complete(&ctx->sq_data->exited); err: - io_finish_async(ctx); + io_sq_thread_finish(ctx); return ret; } -static void io_sq_offload_start(struct io_ring_ctx *ctx) -{ - struct io_sq_data *sqd = ctx->sq_data; - - ctx->flags &= ~IORING_SETUP_R_DISABLED; - if ((ctx->flags & IORING_SETUP_SQPOLL) && sqd && sqd->thread) - wake_up_process(sqd->thread); -} - static inline void __io_unaccount_mem(struct user_struct *user, unsigned long nr_pages) { @@ -8070,37 +8641,27 @@ static inline int __io_account_mem(struct user_struct *user, return 0; } -static void io_unaccount_mem(struct io_ring_ctx *ctx, unsigned long nr_pages, - enum io_mem_account acct) +static void io_unaccount_mem(struct io_ring_ctx *ctx, unsigned long nr_pages) { - if (ctx->limit_mem) + if (ctx->user) __io_unaccount_mem(ctx->user, nr_pages); - if (ctx->mm_account) { - if (acct == ACCT_LOCKED) - ctx->mm_account->locked_vm -= nr_pages; - else if (acct == ACCT_PINNED) - atomic64_sub(nr_pages, &ctx->mm_account->pinned_vm); - } + if (ctx->mm_account) + atomic64_sub(nr_pages, &ctx->mm_account->pinned_vm); } -static int io_account_mem(struct io_ring_ctx *ctx, unsigned long nr_pages, - enum io_mem_account acct) +static int io_account_mem(struct io_ring_ctx *ctx, unsigned long nr_pages) { int ret; - if (ctx->limit_mem) { + if (ctx->user) { ret = __io_account_mem(ctx->user, nr_pages); if (ret) return ret; } - if (ctx->mm_account) { - if (acct == ACCT_LOCKED) - ctx->mm_account->locked_vm += nr_pages; - else if (acct == ACCT_PINNED) - atomic64_add(nr_pages, &ctx->mm_account->pinned_vm); - } + if (ctx->mm_account) + atomic64_add(nr_pages, &ctx->mm_account->pinned_vm); return 0; } @@ -8119,10 +8680,9 @@ static void io_mem_free(void *ptr) static void *io_mem_alloc(size_t size) { - gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP | - __GFP_NORETRY; + gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP; - return (void *) __get_free_pages(gfp_flags, get_order(size)); + return (void *) __get_free_pages(gfp, get_order(size)); } static unsigned long rings_size(unsigned sq_entries, unsigned cq_entries, @@ -8154,41 +8714,58 @@ static unsigned long rings_size(unsigned sq_entries, unsigned cq_entries, return off; } -static unsigned long ring_pages(unsigned sq_entries, unsigned cq_entries) +static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_mapped_ubuf **slot) { - size_t pages; + struct io_mapped_ubuf *imu = *slot; + unsigned int i; - pages = (size_t)1 << get_order( - rings_size(sq_entries, cq_entries, NULL)); - pages += (size_t)1 << get_order( - array_size(sizeof(struct io_uring_sqe), sq_entries)); - - return pages; + if (imu != ctx->dummy_ubuf) { + for (i = 0; i < imu->nr_bvecs; i++) + unpin_user_page(imu->bvec[i].bv_page); + if (imu->acct_pages) + io_unaccount_mem(ctx, imu->acct_pages); + kvfree(imu); + } + *slot = NULL; } -static int io_sqe_buffer_unregister(struct io_ring_ctx *ctx) +static void io_rsrc_buf_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc) { - int i, j; + io_buffer_unmap(ctx, &prsrc->buf); + prsrc->buf = NULL; +} - if (!ctx->user_bufs) +static void __io_sqe_buffers_unregister(struct io_ring_ctx *ctx) +{ + unsigned int i; + + for (i = 0; i < ctx->nr_user_bufs; i++) + io_buffer_unmap(ctx, &ctx->user_bufs[i]); + kfree(ctx->user_bufs); + io_rsrc_data_free(ctx->buf_data); + ctx->user_bufs = NULL; + ctx->buf_data = NULL; + ctx->nr_user_bufs = 0; +} + +static int io_sqe_buffers_unregister(struct io_ring_ctx *ctx) +{ + unsigned nr = ctx->nr_user_bufs; + int ret; + + if (!ctx->buf_data) return -ENXIO; - for (i = 0; i < ctx->nr_user_bufs; i++) { - struct io_mapped_ubuf *imu = &ctx->user_bufs[i]; - - for (j = 0; j < imu->nr_bvecs; j++) - unpin_user_page(imu->bvec[j].bv_page); - - if (imu->acct_pages) - io_unaccount_mem(ctx, imu->acct_pages, ACCT_PINNED); - kvfree(imu->bvec); - imu->nr_bvecs = 0; - } - - kfree(ctx->user_bufs); - ctx->user_bufs = NULL; + /* + * Quiesce may unlock ->uring_lock, and while it's not held + * prevent new requests using the table. + */ ctx->nr_user_bufs = 0; - return 0; + ret = io_rsrc_ref_quiesce(ctx->buf_data, ctx); + ctx->nr_user_bufs = nr; + if (!ret) + __io_sqe_buffers_unregister(ctx); + return ret; } static int io_copy_iov(struct io_ring_ctx *ctx, struct iovec *dst, @@ -8240,7 +8817,7 @@ static bool headpage_already_acct(struct io_ring_ctx *ctx, struct page **pages, /* check previously registered pages */ for (i = 0; i < ctx->nr_user_bufs; i++) { - struct io_mapped_ubuf *imu = &ctx->user_bufs[i]; + struct io_mapped_ubuf *imu = ctx->user_bufs[i]; for (j = 0; j < imu->nr_bvecs; j++) { if (!PageCompound(imu->bvec[j].bv_page)) @@ -8259,6 +8836,7 @@ static int io_buffer_account_pin(struct io_ring_ctx *ctx, struct page **pages, { int i, ret; + imu->acct_pages = 0; for (i = 0; i < nr_pages; i++) { if (!PageCompound(pages[i])) { imu->acct_pages++; @@ -8278,149 +8856,254 @@ static int io_buffer_account_pin(struct io_ring_ctx *ctx, struct page **pages, if (!imu->acct_pages) return 0; - ret = io_account_mem(ctx, imu->acct_pages, ACCT_PINNED); + ret = io_account_mem(ctx, imu->acct_pages); if (ret) imu->acct_pages = 0; return ret; } -static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg, - unsigned nr_args) +static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov, + struct io_mapped_ubuf **pimu, + struct page **last_hpage) { + struct io_mapped_ubuf *imu = NULL; struct vm_area_struct **vmas = NULL; struct page **pages = NULL; + unsigned long off, start, end, ubuf; + size_t size; + int ret, pret, nr_pages, i; + + if (!iov->iov_base) { + *pimu = ctx->dummy_ubuf; + return 0; + } + + ubuf = (unsigned long) iov->iov_base; + end = (ubuf + iov->iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT; + start = ubuf >> PAGE_SHIFT; + nr_pages = end - start; + + *pimu = NULL; + ret = -ENOMEM; + + pages = kvmalloc_array(nr_pages, sizeof(struct page *), GFP_KERNEL); + if (!pages) + goto done; + + vmas = kvmalloc_array(nr_pages, sizeof(struct vm_area_struct *), + GFP_KERNEL); + if (!vmas) + goto done; + + imu = kvmalloc(struct_size(imu, bvec, nr_pages), GFP_KERNEL); + if (!imu) + goto done; + + ret = 0; + mmap_read_lock(current->mm); + pret = pin_user_pages(ubuf, nr_pages, FOLL_WRITE | FOLL_LONGTERM, + pages, vmas); + if (pret == nr_pages) { + /* don't support file backed memory */ + for (i = 0; i < nr_pages; i++) { + struct vm_area_struct *vma = vmas[i]; + + if (vma_is_shmem(vma)) + continue; + if (vma->vm_file && + !is_file_hugepages(vma->vm_file)) { + ret = -EOPNOTSUPP; + break; + } + } + } else { + ret = pret < 0 ? pret : -EFAULT; + } + mmap_read_unlock(current->mm); + if (ret) { + /* + * if we did partial map, or found file backed vmas, + * release any pages we did get + */ + if (pret > 0) + unpin_user_pages(pages, pret); + goto done; + } + + ret = io_buffer_account_pin(ctx, pages, pret, imu, last_hpage); + if (ret) { + unpin_user_pages(pages, pret); + goto done; + } + + off = ubuf & ~PAGE_MASK; + size = iov->iov_len; + for (i = 0; i < nr_pages; i++) { + size_t vec_len; + + vec_len = min_t(size_t, size, PAGE_SIZE - off); + imu->bvec[i].bv_page = pages[i]; + imu->bvec[i].bv_len = vec_len; + imu->bvec[i].bv_offset = off; + off = 0; + size -= vec_len; + } + /* store original address for later verification */ + imu->ubuf = ubuf; + imu->ubuf_end = ubuf + iov->iov_len; + imu->nr_bvecs = nr_pages; + *pimu = imu; + ret = 0; +done: + if (ret) + kvfree(imu); + kvfree(pages); + kvfree(vmas); + return ret; +} + +static int io_buffers_map_alloc(struct io_ring_ctx *ctx, unsigned int nr_args) +{ + ctx->user_bufs = kcalloc(nr_args, sizeof(*ctx->user_bufs), GFP_KERNEL); + return ctx->user_bufs ? 0 : -ENOMEM; +} + +static int io_buffer_validate(struct iovec *iov) +{ + unsigned long tmp, acct_len = iov->iov_len + (PAGE_SIZE - 1); + + /* + * Don't impose further limits on the size and buffer + * constraints here, we'll -EINVAL later when IO is + * submitted if they are wrong. + */ + if (!iov->iov_base) + return iov->iov_len ? -EFAULT : 0; + if (!iov->iov_len) + return -EFAULT; + + /* arbitrary limit, but we need something */ + if (iov->iov_len > SZ_1G) + return -EFAULT; + + if (check_add_overflow((unsigned long)iov->iov_base, acct_len, &tmp)) + return -EOVERFLOW; + + return 0; +} + +static int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg, + unsigned int nr_args, u64 __user *tags) +{ struct page *last_hpage = NULL; - int i, j, got_pages = 0; - int ret = -EINVAL; + struct io_rsrc_data *data; + int i, ret; + struct iovec iov; if (ctx->user_bufs) return -EBUSY; - if (!nr_args || nr_args > UIO_MAXIOV) + if (!nr_args || nr_args > IORING_MAX_REG_BUFFERS) return -EINVAL; + ret = io_rsrc_node_switch_start(ctx); + if (ret) + return ret; + ret = io_rsrc_data_alloc(ctx, io_rsrc_buf_put, tags, nr_args, &data); + if (ret) + return ret; + ret = io_buffers_map_alloc(ctx, nr_args); + if (ret) { + io_rsrc_data_free(data); + return ret; + } - ctx->user_bufs = kcalloc(nr_args, sizeof(struct io_mapped_ubuf), - GFP_KERNEL); - if (!ctx->user_bufs) - return -ENOMEM; - - for (i = 0; i < nr_args; i++) { - struct io_mapped_ubuf *imu = &ctx->user_bufs[i]; - unsigned long off, start, end, ubuf; - int pret, nr_pages; - struct iovec iov; - size_t size; - + for (i = 0; i < nr_args; i++, ctx->nr_user_bufs++) { ret = io_copy_iov(ctx, &iov, arg, i); if (ret) - goto err; - - /* - * Don't impose further limits on the size and buffer - * constraints here, we'll -EINVAL later when IO is - * submitted if they are wrong. - */ - ret = -EFAULT; - if (!iov.iov_base || !iov.iov_len) - goto err; - - /* arbitrary limit, but we need something */ - if (iov.iov_len > SZ_1G) - goto err; - - ubuf = (unsigned long) iov.iov_base; - end = (ubuf + iov.iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT; - start = ubuf >> PAGE_SHIFT; - nr_pages = end - start; - - ret = 0; - if (!pages || nr_pages > got_pages) { - kvfree(vmas); - kvfree(pages); - pages = kvmalloc_array(nr_pages, sizeof(struct page *), - GFP_KERNEL); - vmas = kvmalloc_array(nr_pages, - sizeof(struct vm_area_struct *), - GFP_KERNEL); - if (!pages || !vmas) { - ret = -ENOMEM; - goto err; - } - got_pages = nr_pages; + break; + ret = io_buffer_validate(&iov); + if (ret) + break; + if (!iov.iov_base && *io_get_tag_slot(data, i)) { + ret = -EINVAL; + break; } - imu->bvec = kvmalloc_array(nr_pages, sizeof(struct bio_vec), - GFP_KERNEL); - ret = -ENOMEM; - if (!imu->bvec) - goto err; - - ret = 0; - mmap_read_lock(current->mm); - pret = pin_user_pages(ubuf, nr_pages, - FOLL_WRITE | FOLL_LONGTERM, - pages, vmas); - if (pret == nr_pages) { - /* don't support file backed memory */ - for (j = 0; j < nr_pages; j++) { - struct vm_area_struct *vma = vmas[j]; - - if (vma->vm_file && - !is_file_hugepages(vma->vm_file)) { - ret = -EOPNOTSUPP; - break; - } - } - } else { - ret = pret < 0 ? pret : -EFAULT; - } - mmap_read_unlock(current->mm); - if (ret) { - /* - * if we did partial map, or found file backed vmas, - * release any pages we did get - */ - if (pret > 0) - unpin_user_pages(pages, pret); - kvfree(imu->bvec); - goto err; - } - - ret = io_buffer_account_pin(ctx, pages, pret, imu, &last_hpage); - if (ret) { - unpin_user_pages(pages, pret); - kvfree(imu->bvec); - goto err; - } - - off = ubuf & ~PAGE_MASK; - size = iov.iov_len; - for (j = 0; j < nr_pages; j++) { - size_t vec_len; - - vec_len = min_t(size_t, size, PAGE_SIZE - off); - imu->bvec[j].bv_page = pages[j]; - imu->bvec[j].bv_len = vec_len; - imu->bvec[j].bv_offset = off; - off = 0; - size -= vec_len; - } - /* store original address for later verification */ - imu->ubuf = ubuf; - imu->len = iov.iov_len; - imu->nr_bvecs = nr_pages; - - ctx->nr_user_bufs++; + ret = io_sqe_buffer_register(ctx, &iov, &ctx->user_bufs[i], + &last_hpage); + if (ret) + break; } - kvfree(pages); - kvfree(vmas); - return 0; -err: - kvfree(pages); - kvfree(vmas); - io_sqe_buffer_unregister(ctx); + + WARN_ON_ONCE(ctx->buf_data); + + ctx->buf_data = data; + if (ret) + __io_sqe_buffers_unregister(ctx); + else + io_rsrc_node_switch(ctx, NULL); return ret; } +static int __io_sqe_buffers_update(struct io_ring_ctx *ctx, + struct io_uring_rsrc_update2 *up, + unsigned int nr_args) +{ + u64 __user *tags = u64_to_user_ptr(up->tags); + struct iovec iov, __user *iovs = u64_to_user_ptr(up->data); + struct page *last_hpage = NULL; + bool needs_switch = false; + __u32 done; + int i, err; + + if (!ctx->buf_data) + return -ENXIO; + if (up->offset + nr_args > ctx->nr_user_bufs) + return -EINVAL; + + for (done = 0; done < nr_args; done++) { + struct io_mapped_ubuf *imu; + int offset = up->offset + done; + u64 tag = 0; + + err = io_copy_iov(ctx, &iov, iovs, done); + if (err) + break; + if (tags && copy_from_user(&tag, &tags[done], sizeof(tag))) { + err = -EFAULT; + break; + } + err = io_buffer_validate(&iov); + if (err) + break; + if (!iov.iov_base && tag) { + err = -EINVAL; + break; + } + err = io_sqe_buffer_register(ctx, &iov, &imu, &last_hpage); + if (err) + break; + + i = array_index_nospec(offset, ctx->nr_user_bufs); + if (ctx->user_bufs[i] != ctx->dummy_ubuf) { + err = io_queue_rsrc_removal(ctx->buf_data, i, + ctx->rsrc_node, ctx->user_bufs[i]); + if (unlikely(err)) { + io_buffer_unmap(ctx, &imu); + break; + } + ctx->user_bufs[i] = NULL; + needs_switch = true; + } + + ctx->user_bufs[i] = imu; + *io_get_tag_slot(ctx->buf_data, offset) = tag; + } + + if (needs_switch) + io_rsrc_node_switch(ctx, ctx->buf_data); + return done ? done : err; +} + static int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg) { __s32 __user *fds = arg; @@ -8435,6 +9118,7 @@ static int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg) ctx->cq_ev_fd = eventfd_ctx_fdget(fd); if (IS_ERR(ctx->cq_ev_fd)) { int ret = PTR_ERR(ctx->cq_ev_fd); + ctx->cq_ev_fd = NULL; return ret; } @@ -8462,26 +9146,68 @@ static void io_destroy_buffers(struct io_ring_ctx *ctx) __io_remove_buffers(ctx, buf, index, -1U); } -static void io_ring_ctx_free(struct io_ring_ctx *ctx) +static void io_req_cache_free(struct list_head *list) { - io_finish_async(ctx); - io_sqe_buffer_unregister(ctx); + struct io_kiocb *req, *nxt; - if (ctx->sqo_task) { - put_task_struct(ctx->sqo_task); - ctx->sqo_task = NULL; + list_for_each_entry_safe(req, nxt, list, inflight_entry) { + list_del(&req->inflight_entry); + kmem_cache_free(req_cachep, req); } +} -#ifdef CONFIG_BLK_CGROUP - if (ctx->sqo_blkcg_css) - css_put(ctx->sqo_blkcg_css); -#endif +static void io_req_caches_free(struct io_ring_ctx *ctx) +{ + struct io_submit_state *state = &ctx->submit_state; mutex_lock(&ctx->uring_lock); - io_sqe_files_unregister(ctx); + + if (state->free_reqs) { + kmem_cache_free_bulk(req_cachep, state->free_reqs, state->reqs); + state->free_reqs = 0; + } + + io_flush_cached_locked_reqs(ctx, state); + io_req_cache_free(&state->free_list); + mutex_unlock(&ctx->uring_lock); +} + +static void io_wait_rsrc_data(struct io_rsrc_data *data) +{ + if (data && !atomic_dec_and_test(&data->refs)) + wait_for_completion(&data->done); +} + +static void io_ring_ctx_free(struct io_ring_ctx *ctx) +{ + io_sq_thread_finish(ctx); + + /* __io_rsrc_put_work() may need uring_lock to progress, wait w/o it */ + io_wait_rsrc_data(ctx->buf_data); + io_wait_rsrc_data(ctx->file_data); + + mutex_lock(&ctx->uring_lock); + if (ctx->buf_data) + __io_sqe_buffers_unregister(ctx); + if (ctx->file_data) + __io_sqe_files_unregister(ctx); + if (ctx->rings) + __io_cqring_overflow_flush(ctx, true); mutex_unlock(&ctx->uring_lock); io_eventfd_unregister(ctx); io_destroy_buffers(ctx); + if (ctx->sq_creds) + put_cred(ctx->sq_creds); + + /* there are no registered resources left, nobody uses it */ + if (ctx->rsrc_node) + io_rsrc_node_destroy(ctx->rsrc_node); + if (ctx->rsrc_backup_node) + io_rsrc_node_destroy(ctx->rsrc_backup_node); + flush_delayed_work(&ctx->rsrc_put_work); + + WARN_ON_ONCE(!list_empty(&ctx->rsrc_ref_list)); + WARN_ON_ONCE(!llist_empty(&ctx->rsrc_put_llist)); #if defined(CONFIG_UNIX) if (ctx->ring_sock) { @@ -8489,6 +9215,7 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx) sock_release(ctx->ring_sock); } #endif + WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list)); if (ctx->mm_account) { mmdrop(ctx->mm_account); @@ -8500,9 +9227,11 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx) percpu_ref_exit(&ctx->refs); free_uid(ctx->user); - put_cred(ctx->creds); + io_req_caches_free(ctx); + if (ctx->hash_map) + io_wq_put_hash(ctx->hash_map); kfree(ctx->cancel_hash); - kmem_cache_free(req_cachep, ctx->fallback_req); + kfree(ctx->dummy_ubuf); kfree(ctx); } @@ -8511,7 +9240,7 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait) struct io_ring_ctx *ctx = file->private_data; __poll_t mask = 0; - poll_wait(file, &ctx->cq_wait, wait); + poll_wait(file, &ctx->poll_wait, wait); /* * synchronizes with barrier from wq_has_sleeper call in * io_commit_cqring @@ -8533,49 +9262,46 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait) * Users may get EPOLLIN meanwhile seeing nothing in cqring, this * pushs them to do the flush. */ - if (io_cqring_events(ctx) || test_bit(0, &ctx->cq_check_overflow)) + if (io_cqring_events(ctx) || test_bit(0, &ctx->check_cq_overflow)) mask |= EPOLLIN | EPOLLRDNORM; return mask; } -static int io_uring_fasync(int fd, struct file *file, int on) -{ - struct io_ring_ctx *ctx = file->private_data; - - return fasync_helper(fd, file, on, &ctx->cq_fasync); -} - static int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id) { - struct io_identity *iod; + const struct cred *creds; - iod = xa_erase(&ctx->personalities, id); - if (iod) { - put_cred(iod->creds); - if (refcount_dec_and_test(&iod->count)) - kfree(iod); + creds = xa_erase(&ctx->personalities, id); + if (creds) { + put_cred(creds); return 0; } return -EINVAL; } -static void io_ring_exit_work(struct work_struct *work) -{ - struct io_ring_ctx *ctx = container_of(work, struct io_ring_ctx, - exit_work); +struct io_tctx_exit { + struct callback_head task_work; + struct completion completion; + struct io_ring_ctx *ctx; +}; +static void io_tctx_exit_cb(struct callback_head *cb) +{ + struct io_uring_task *tctx = current->io_uring; + struct io_tctx_exit *work; + + work = container_of(cb, struct io_tctx_exit, task_work); /* - * If we're doing polled IO and end up having requests being - * submitted async (out-of-line), then completions can come in while - * we're waiting for refs to drop. We need to reap these manually, - * as nobody else will be looking for them. + * When @in_idle, we're in cancellation and it's racy to remove the + * node. It'll be removed by the end of cancellation, just ignore it. + * tctx can be NULL if the queueing of this task_work raced with + * work cancelation off the exec path. */ - do { - io_iopoll_try_reap_events(ctx); - } while (!wait_for_completion_timeout(&ctx->ref_comp, HZ/20)); - io_ring_ctx_free(ctx); + if (tctx && !atomic_read(&tctx->in_idle)) + io_uring_del_tctx_node((unsigned long)work->ctx); + complete(&work->completion); } static bool io_cancel_ctx_cb(struct io_wq_work *work, void *data) @@ -8585,41 +9311,116 @@ static bool io_cancel_ctx_cb(struct io_wq_work *work, void *data) return req->ctx == data; } +static void io_ring_exit_work(struct work_struct *work) +{ + struct io_ring_ctx *ctx = container_of(work, struct io_ring_ctx, exit_work); + unsigned long timeout = jiffies + HZ * 60 * 5; + unsigned long interval = HZ / 20; + struct io_tctx_exit exit; + struct io_tctx_node *node; + int ret; + + /* + * If we're doing polled IO and end up having requests being + * submitted async (out-of-line), then completions can come in while + * we're waiting for refs to drop. We need to reap these manually, + * as nobody else will be looking for them. + */ + do { + io_uring_try_cancel_requests(ctx, NULL, true); + if (ctx->sq_data) { + struct io_sq_data *sqd = ctx->sq_data; + struct task_struct *tsk; + + io_sq_thread_park(sqd); + tsk = sqd->thread; + if (tsk && tsk->io_uring && tsk->io_uring->io_wq) + io_wq_cancel_cb(tsk->io_uring->io_wq, + io_cancel_ctx_cb, ctx, true); + io_sq_thread_unpark(sqd); + } + + if (WARN_ON_ONCE(time_after(jiffies, timeout))) { + /* there is little hope left, don't run it too often */ + interval = HZ * 60; + } + } while (!wait_for_completion_timeout(&ctx->ref_comp, interval)); + + init_completion(&exit.completion); + init_task_work(&exit.task_work, io_tctx_exit_cb); + exit.ctx = ctx; + /* + * Some may use context even when all refs and requests have been put, + * and they are free to do so while still holding uring_lock or + * completion_lock, see io_req_task_submit(). Apart from other work, + * this lock/unlock section also waits them to finish. + */ + mutex_lock(&ctx->uring_lock); + while (!list_empty(&ctx->tctx_list)) { + WARN_ON_ONCE(time_after(jiffies, timeout)); + + node = list_first_entry(&ctx->tctx_list, struct io_tctx_node, + ctx_node); + /* don't spin on a single task if cancellation failed */ + list_rotate_left(&ctx->tctx_list); + ret = task_work_add(node->task, &exit.task_work, TWA_SIGNAL); + if (WARN_ON_ONCE(ret)) + continue; + wake_up_process(node->task); + + mutex_unlock(&ctx->uring_lock); + wait_for_completion(&exit.completion); + mutex_lock(&ctx->uring_lock); + } + mutex_unlock(&ctx->uring_lock); + spin_lock(&ctx->completion_lock); + spin_unlock(&ctx->completion_lock); + + io_ring_ctx_free(ctx); +} + +/* Returns true if we found and killed one or more timeouts */ +static bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk, + bool cancel_all) +{ + struct io_kiocb *req, *tmp; + int canceled = 0; + + spin_lock(&ctx->completion_lock); + spin_lock_irq(&ctx->timeout_lock); + list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) { + if (io_match_task(req, tsk, cancel_all)) { + io_kill_timeout(req, -ECANCELED); + canceled++; + } + } + spin_unlock_irq(&ctx->timeout_lock); + if (canceled != 0) + io_commit_cqring(ctx); + spin_unlock(&ctx->completion_lock); + if (canceled != 0) + io_cqring_ev_posted(ctx); + return canceled != 0; +} + static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) { unsigned long index; - struct io_identify *iod; + struct creds *creds; mutex_lock(&ctx->uring_lock); percpu_ref_kill(&ctx->refs); - /* if force is set, the ring is going away. always drop after that */ - - if (WARN_ON_ONCE((ctx->flags & IORING_SETUP_SQPOLL) && !ctx->sqo_dead)) - ctx->sqo_dead = 1; - - ctx->cq_overflow_flushed = 1; if (ctx->rings) - __io_cqring_overflow_flush(ctx, true, NULL, NULL); + __io_cqring_overflow_flush(ctx, true); + xa_for_each(&ctx->personalities, index, creds) + io_unregister_personality(ctx, index); mutex_unlock(&ctx->uring_lock); - io_kill_timeouts(ctx, NULL, NULL); - io_poll_remove_all(ctx, NULL, NULL); - - if (ctx->io_wq) - io_wq_cancel_cb(ctx->io_wq, io_cancel_ctx_cb, ctx, true); + io_kill_timeouts(ctx, NULL, true); + io_poll_remove_all(ctx, NULL, true); /* if we failed setting up the ctx, we might not have any rings */ io_iopoll_try_reap_events(ctx); - xa_for_each(&ctx->personalities, index, iod) - io_unregister_personality(ctx, index); - - /* - * Do this upfront, so we won't have a grace period where the ring - * is closed but resources aren't reaped yet. This can cause - * spurious failure in setting up a new ring. - */ - io_unaccount_mem(ctx, ring_pages(ctx->sq_entries, ctx->cq_entries), - ACCT_LOCKED); INIT_WORK(&ctx->exit_work, io_ring_exit_work); /* @@ -8642,352 +9443,290 @@ static int io_uring_release(struct inode *inode, struct file *file) struct io_task_cancel { struct task_struct *task; - struct files_struct *files; + bool all; }; static bool io_cancel_task_cb(struct io_wq_work *work, void *data) { struct io_kiocb *req = container_of(work, struct io_kiocb, work); struct io_task_cancel *cancel = data; - bool ret; - if (cancel->files && (req->flags & REQ_F_LINK_TIMEOUT)) { - unsigned long flags; - struct io_ring_ctx *ctx = req->ctx; - - /* protect against races with linked timeouts */ - spin_lock_irqsave(&ctx->completion_lock, flags); - ret = io_match_task(req, cancel->task, cancel->files); - spin_unlock_irqrestore(&ctx->completion_lock, flags); - } else { - ret = io_match_task(req, cancel->task, cancel->files); - } - return ret; + return io_match_task_safe(req, cancel->task, cancel->all); } -static void io_cancel_defer_files(struct io_ring_ctx *ctx, - struct task_struct *task, - struct files_struct *files) +static bool io_cancel_defer_files(struct io_ring_ctx *ctx, + struct task_struct *task, bool cancel_all) { - struct io_defer_entry *de = NULL; + struct io_defer_entry *de; LIST_HEAD(list); - spin_lock_irq(&ctx->completion_lock); + spin_lock(&ctx->completion_lock); list_for_each_entry_reverse(de, &ctx->defer_list, list) { - if (io_match_task(de->req, task, files)) { + if (io_match_task_safe(de->req, task, cancel_all)) { list_cut_position(&list, &ctx->defer_list, &de->list); break; } } - spin_unlock_irq(&ctx->completion_lock); + spin_unlock(&ctx->completion_lock); + if (list_empty(&list)) + return false; while (!list_empty(&list)) { de = list_first_entry(&list, struct io_defer_entry, list); list_del_init(&de->list); - req_set_fail_links(de->req); - io_put_req(de->req); - io_req_complete(de->req, -ECANCELED); + io_req_complete_failed(de->req, -ECANCELED); kfree(de); } + return true; } -static int io_uring_count_inflight(struct io_ring_ctx *ctx, - struct task_struct *task, - struct files_struct *files) +static bool io_uring_try_cancel_iowq(struct io_ring_ctx *ctx) { - struct io_kiocb *req; - int cnt = 0; + struct io_tctx_node *node; + enum io_wq_cancel cret; + bool ret = false; - spin_lock_irq(&ctx->inflight_lock); - list_for_each_entry(req, &ctx->inflight_list, inflight_entry) - cnt += io_match_task(req, task, files); - spin_unlock_irq(&ctx->inflight_lock); - return cnt; -} + mutex_lock(&ctx->uring_lock); + list_for_each_entry(node, &ctx->tctx_list, ctx_node) { + struct io_uring_task *tctx = node->task->io_uring; -static void io_uring_cancel_files(struct io_ring_ctx *ctx, - struct task_struct *task, - struct files_struct *files) -{ - while (!list_empty_careful(&ctx->inflight_list)) { - struct io_task_cancel cancel = { .task = task, .files = files }; - DEFINE_WAIT(wait); - int inflight; - - inflight = io_uring_count_inflight(ctx, task, files); - if (!inflight) - break; - - io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, &cancel, true); - io_poll_remove_all(ctx, task, files); - io_kill_timeouts(ctx, task, files); - /* cancellations _may_ trigger task work */ - io_run_task_work(); - - prepare_to_wait(&task->io_uring->wait, &wait, - TASK_UNINTERRUPTIBLE); - if (inflight == io_uring_count_inflight(ctx, task, files)) - schedule(); - finish_wait(&task->io_uring->wait, &wait); + /* + * io_wq will stay alive while we hold uring_lock, because it's + * killed after ctx nodes, which requires to take the lock. + */ + if (!tctx || !tctx->io_wq) + continue; + cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_ctx_cb, ctx, true); + ret |= (cret != IO_WQ_CANCEL_NOTFOUND); } + mutex_unlock(&ctx->uring_lock); + + return ret; } -static void __io_uring_cancel_task_requests(struct io_ring_ctx *ctx, - struct task_struct *task) +static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx, + struct task_struct *task, + bool cancel_all) { + struct io_task_cancel cancel = { .task = task, .all = cancel_all, }; + struct io_uring_task *tctx = task ? task->io_uring : NULL; + while (1) { - struct io_task_cancel cancel = { .task = task, .files = NULL, }; enum io_wq_cancel cret; bool ret = false; - cret = io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, &cancel, true); - if (cret != IO_WQ_CANCEL_NOTFOUND) - ret = true; + if (!task) { + ret |= io_uring_try_cancel_iowq(ctx); + } else if (tctx && tctx->io_wq) { + /* + * Cancels requests of all rings, not only @ctx, but + * it's fine as the task is in exit/exec. + */ + cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_task_cb, + &cancel, true); + ret |= (cret != IO_WQ_CANCEL_NOTFOUND); + } /* SQPOLL thread does its own polling */ - if (!(ctx->flags & IORING_SETUP_SQPOLL)) { + if ((!(ctx->flags & IORING_SETUP_SQPOLL) && cancel_all) || + (ctx->sq_data && ctx->sq_data->thread == current)) { while (!list_empty_careful(&ctx->iopoll_list)) { io_iopoll_try_reap_events(ctx); ret = true; } } - ret |= io_poll_remove_all(ctx, task, NULL); - ret |= io_kill_timeouts(ctx, task, NULL); + ret |= io_cancel_defer_files(ctx, task, cancel_all); + ret |= io_poll_remove_all(ctx, task, cancel_all); + ret |= io_kill_timeouts(ctx, task, cancel_all); + if (task) + ret |= io_run_task_work(); if (!ret) break; - io_run_task_work(); cond_resched(); } } -static void io_disable_sqo_submit(struct io_ring_ctx *ctx) +static int __io_uring_add_tctx_node(struct io_ring_ctx *ctx) { - mutex_lock(&ctx->uring_lock); - ctx->sqo_dead = 1; - if (ctx->flags & IORING_SETUP_R_DISABLED) - io_sq_offload_start(ctx); - mutex_unlock(&ctx->uring_lock); + struct io_uring_task *tctx = current->io_uring; + struct io_tctx_node *node; + int ret; - /* make sure callers enter the ring to get error */ - if (ctx->rings) - io_ring_set_wakeup_flag(ctx); -} + if (unlikely(!tctx)) { + ret = io_uring_alloc_task_context(current, ctx); + if (unlikely(ret)) + return ret; -/* - * We need to iteratively cancel requests, in case a request has dependent - * hard links. These persist even for failure of cancelations, hence keep - * looping until none are found. - */ -static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx, - struct files_struct *files) -{ - struct task_struct *task = current; + tctx = current->io_uring; + if (ctx->iowq_limits_set) { + unsigned int limits[2] = { ctx->iowq_limits[0], + ctx->iowq_limits[1], }; - if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) { - io_disable_sqo_submit(ctx); - task = ctx->sq_data->thread; - atomic_inc(&task->io_uring->in_idle); - io_sq_thread_park(ctx->sq_data); + ret = io_wq_max_workers(tctx->io_wq, limits); + if (ret) + return ret; + } } + if (!xa_load(&tctx->xa, (unsigned long)ctx)) { + node = kmalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return -ENOMEM; + node->ctx = ctx; + node->task = current; - io_cancel_defer_files(ctx, task, files); - io_cqring_overflow_flush(ctx, true, task, files); + ret = xa_err(xa_store(&tctx->xa, (unsigned long)ctx, + node, GFP_KERNEL)); + if (ret) { + kfree(node); + return ret; + } - if (!files) - __io_uring_cancel_task_requests(ctx, task); - else - io_uring_cancel_files(ctx, task, files); - - if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) { - atomic_dec(&task->io_uring->in_idle); - io_sq_thread_unpark(ctx->sq_data); + mutex_lock(&ctx->uring_lock); + list_add(&node->ctx_node, &ctx->tctx_list); + mutex_unlock(&ctx->uring_lock); } + tctx->last = ctx; + return 0; } /* * Note that this task has used io_uring. We use it for cancelation purposes. */ -static int io_uring_add_task_file(struct io_ring_ctx *ctx, struct file *file) +static inline int io_uring_add_tctx_node(struct io_ring_ctx *ctx) { struct io_uring_task *tctx = current->io_uring; - int ret; - if (unlikely(!tctx)) { - ret = io_uring_alloc_task_context(current); - if (unlikely(ret)) - return ret; - tctx = current->io_uring; - } - if (tctx->last != file) { - void *old = xa_load(&tctx->xa, (unsigned long)file); - - if (!old) { - get_file(file); - ret = xa_err(xa_store(&tctx->xa, (unsigned long)file, - file, GFP_KERNEL)); - if (ret) { - fput(file); - return ret; - } - } - tctx->last = file; - } - - /* - * This is race safe in that the task itself is doing this, hence it - * cannot be going through the exit/cancel paths at the same time. - * This cannot be modified while exit/cancel is running. - */ - if (!tctx->sqpoll && (ctx->flags & IORING_SETUP_SQPOLL)) - tctx->sqpoll = true; - - return 0; + if (likely(tctx && tctx->last == ctx)) + return 0; + return __io_uring_add_tctx_node(ctx); } /* * Remove this io_uring_file -> task mapping. */ -static void io_uring_del_task_file(struct file *file) +static void io_uring_del_tctx_node(unsigned long index) { struct io_uring_task *tctx = current->io_uring; + struct io_tctx_node *node; - if (tctx->last == file) + if (!tctx) + return; + node = xa_erase(&tctx->xa, index); + if (!node) + return; + + WARN_ON_ONCE(current != node->task); + WARN_ON_ONCE(list_empty(&node->ctx_node)); + + mutex_lock(&node->ctx->uring_lock); + list_del(&node->ctx_node); + mutex_unlock(&node->ctx->uring_lock); + + if (tctx->last == node->ctx) tctx->last = NULL; - file = xa_erase(&tctx->xa, (unsigned long)file); - if (file) - fput(file); + kfree(node); } -static void io_uring_remove_task_files(struct io_uring_task *tctx) +static void io_uring_clean_tctx(struct io_uring_task *tctx) { - struct file *file; + struct io_wq *wq = tctx->io_wq; + struct io_tctx_node *node; unsigned long index; - xa_for_each(&tctx->xa, index, file) - io_uring_del_task_file(file); -} - -void __io_uring_files_cancel(struct files_struct *files) -{ - struct io_uring_task *tctx = current->io_uring; - struct file *file; - unsigned long index; - - /* make sure overflow events are dropped */ - atomic_inc(&tctx->in_idle); - xa_for_each(&tctx->xa, index, file) - io_uring_cancel_task_requests(file->private_data, files); - atomic_dec(&tctx->in_idle); - - if (files) - io_uring_remove_task_files(tctx); -} - -static s64 tctx_inflight(struct io_uring_task *tctx) -{ - unsigned long index; - struct file *file; - s64 inflight; - - inflight = percpu_counter_sum(&tctx->inflight); - if (!tctx->sqpoll) - return inflight; - - /* - * If we have SQPOLL rings, then we need to iterate and find them, and - * add the pending count for those. - */ - xa_for_each(&tctx->xa, index, file) { - struct io_ring_ctx *ctx = file->private_data; - - if (ctx->flags & IORING_SETUP_SQPOLL) { - struct io_uring_task *__tctx = ctx->sqo_task->io_uring; - - inflight += percpu_counter_sum(&__tctx->inflight); - } + xa_for_each(&tctx->xa, index, node) { + io_uring_del_tctx_node(index); + cond_resched(); } + if (wq) { + /* + * Must be after io_uring_del_task_file() (removes nodes under + * uring_lock) to avoid race with io_uring_try_cancel_iowq(). + */ + io_wq_put_and_exit(wq); + tctx->io_wq = NULL; + } +} - return inflight; +static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked) +{ + if (tracked) + return atomic_read(&tctx->inflight_tracked); + return percpu_counter_sum(&tctx->inflight); } /* - * Find any io_uring fd that this task has registered or done IO on, and cancel - * requests. + * Find any io_uring ctx that this task has registered or done IO on, and cancel + * requests. @sqd should be not-null IFF it's an SQPOLL thread cancellation. */ -void __io_uring_task_cancel(void) +static void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd) { struct io_uring_task *tctx = current->io_uring; - DEFINE_WAIT(wait); + struct io_ring_ctx *ctx; s64 inflight; + DEFINE_WAIT(wait); + + WARN_ON_ONCE(sqd && sqd->thread != current); + + if (!current->io_uring) + return; + if (tctx->io_wq) + io_wq_exit_start(tctx->io_wq); - /* make sure overflow events are dropped */ atomic_inc(&tctx->in_idle); - - /* trigger io_disable_sqo_submit() */ - if (tctx->sqpoll) - __io_uring_files_cancel(NULL); - do { + io_uring_drop_tctx_refs(current); /* read completions before cancelations */ - inflight = tctx_inflight(tctx); + inflight = tctx_inflight(tctx, !cancel_all); if (!inflight) break; - __io_uring_files_cancel(NULL); - prepare_to_wait(&tctx->wait, &wait, TASK_UNINTERRUPTIBLE); + if (!sqd) { + struct io_tctx_node *node; + unsigned long index; + + xa_for_each(&tctx->xa, index, node) { + /* sqpoll task will cancel all its requests */ + if (node->ctx->sq_data) + continue; + io_uring_try_cancel_requests(node->ctx, current, + cancel_all); + } + } else { + list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) + io_uring_try_cancel_requests(ctx, current, + cancel_all); + } + + prepare_to_wait(&tctx->wait, &wait, TASK_INTERRUPTIBLE); + io_run_task_work(); + io_uring_drop_tctx_refs(current); /* * If we've seen completions, retry without waiting. This * avoids a race where a completion comes in before we did * prepare_to_wait(). */ - if (inflight == tctx_inflight(tctx)) + if (inflight == tctx_inflight(tctx, !cancel_all)) schedule(); finish_wait(&tctx->wait, &wait); } while (1); - atomic_dec(&tctx->in_idle); - - io_uring_remove_task_files(tctx); + io_uring_clean_tctx(tctx); + if (cancel_all) { + /* + * We shouldn't run task_works after cancel, so just leave + * ->in_idle set for normal exit. + */ + atomic_dec(&tctx->in_idle); + /* for exec all current's requests should be gone, kill tctx */ + __io_uring_free(current); + } } -static int io_uring_flush(struct file *file, void *data) +void __io_uring_cancel(bool cancel_all) { - struct io_uring_task *tctx = current->io_uring; - struct io_ring_ctx *ctx = file->private_data; - - if (fatal_signal_pending(current) || (current->flags & PF_EXITING)) - io_uring_cancel_task_requests(ctx, NULL); - - if (!tctx) - return 0; - - /* we should have cancelled and erased it before PF_EXITING */ - WARN_ON_ONCE((current->flags & PF_EXITING) && - xa_load(&tctx->xa, (unsigned long)file)); - - /* - * fput() is pending, will be 2 if the only other ref is our potential - * task file note. If the task is exiting, drop regardless of count. - */ - if (atomic_long_read(&file->f_count) != 2) - return 0; - - if (ctx->flags & IORING_SETUP_SQPOLL) { - /* there is only one file note, which is owned by sqo_task */ - WARN_ON_ONCE(ctx->sqo_task != current && - xa_load(&tctx->xa, (unsigned long)file)); - /* sqo_dead check is for when this happens after cancellation */ - WARN_ON_ONCE(ctx->sqo_task == current && !ctx->sqo_dead && - !xa_load(&tctx->xa, (unsigned long)file)); - - io_disable_sqo_submit(ctx); - } - - if (!(ctx->flags & IORING_SETUP_SQPOLL) || ctx->sqo_task == current) - io_uring_del_task_file(file); - return 0; + io_uring_cancel_generic(cancel_all, NULL); } static void *io_uring_validate_mmap_request(struct file *file, @@ -9062,60 +9801,84 @@ static unsigned long io_uring_nommu_get_unmapped_area(struct file *file, static int io_sqpoll_wait_sq(struct io_ring_ctx *ctx) { - int ret = 0; DEFINE_WAIT(wait); do { if (!io_sqring_full(ctx)) break; - prepare_to_wait(&ctx->sqo_sq_wait, &wait, TASK_INTERRUPTIBLE); - if (unlikely(ctx->sqo_dead)) { - ret = -EOWNERDEAD; - break; - } - if (!io_sqring_full(ctx)) break; - schedule(); } while (!signal_pending(current)); finish_wait(&ctx->sqo_sq_wait, &wait); - return ret; + return 0; +} + +static int io_get_ext_arg(unsigned flags, const void __user *argp, size_t *argsz, + struct __kernel_timespec __user **ts, + const sigset_t __user **sig) +{ + struct io_uring_getevents_arg arg; + + /* + * If EXT_ARG isn't set, then we have no timespec and the argp pointer + * is just a pointer to the sigset_t. + */ + if (!(flags & IORING_ENTER_EXT_ARG)) { + *sig = (const sigset_t __user *) argp; + *ts = NULL; + return 0; + } + + /* + * EXT_ARG is set - ensure we agree on the size of it and copy in our + * timespec and sigset_t pointers if good. + */ + if (*argsz != sizeof(arg)) + return -EINVAL; + if (copy_from_user(&arg, argp, sizeof(arg))) + return -EFAULT; + if (arg.pad) + return -EINVAL; + *sig = u64_to_user_ptr(arg.sigmask); + *argsz = arg.sigmask_sz; + *ts = u64_to_user_ptr(arg.ts); + return 0; } SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, - u32, min_complete, u32, flags, const sigset_t __user *, sig, - size_t, sigsz) + u32, min_complete, u32, flags, const void __user *, argp, + size_t, argsz) { struct io_ring_ctx *ctx; - long ret = -EBADF; int submitted = 0; struct fd f; + long ret; io_run_task_work(); - if (flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP | - IORING_ENTER_SQ_WAIT)) + if (unlikely(flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP | + IORING_ENTER_SQ_WAIT | IORING_ENTER_EXT_ARG))) return -EINVAL; f = fdget(fd); - if (!f.file) + if (unlikely(!f.file)) return -EBADF; ret = -EOPNOTSUPP; - if (f.file->f_op != &io_uring_fops) + if (unlikely(f.file->f_op != &io_uring_fops)) goto out_fput; ret = -ENXIO; ctx = f.file->private_data; - if (!percpu_ref_tryget(&ctx->refs)) + if (unlikely(!percpu_ref_tryget(&ctx->refs))) goto out_fput; ret = -EBADFD; - if (ctx->flags & IORING_SETUP_R_DISABLED) + if (unlikely(ctx->flags & IORING_SETUP_R_DISABLED)) goto out; /* @@ -9125,9 +9888,9 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, */ ret = 0; if (ctx->flags & IORING_SETUP_SQPOLL) { - io_cqring_overflow_flush(ctx, false, NULL, NULL); + io_cqring_overflow_flush(ctx); - if (unlikely(ctx->sqo_dead)) { + if (unlikely(ctx->sq_data->thread == NULL)) { ret = -EOWNERDEAD; goto out; } @@ -9140,7 +9903,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, } submitted = to_submit; } else if (to_submit) { - ret = io_uring_add_task_file(ctx, f.file); + ret = io_uring_add_tctx_node(ctx); if (unlikely(ret)) goto out; mutex_lock(&ctx->uring_lock); @@ -9151,6 +9914,13 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, goto out; } if (flags & IORING_ENTER_GETEVENTS) { + const sigset_t __user *sig; + struct __kernel_timespec __user *ts; + + ret = io_get_ext_arg(flags, argp, &argsz, &ts, &sig); + if (unlikely(ret)) + goto out; + min_complete = min(min_complete, ctx->cq_entries); /* @@ -9163,7 +9933,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, !(ctx->flags & IORING_SETUP_SQPOLL)) { ret = io_iopoll_check(ctx, min_complete); } else { - ret = io_cqring_wait(ctx, min_complete, sig, sigsz); + ret = io_cqring_wait(ctx, min_complete, sig, argsz, ts); } } @@ -9176,9 +9946,8 @@ out_fput: #ifdef CONFIG_PROC_FS static int io_uring_show_cred(struct seq_file *m, unsigned int id, - const struct io_identity *iod) + const struct cred *cred) { - const struct cred *cred = iod->creds; struct user_namespace *uns = seq_user_ns(m); struct group_info *gi; kernel_cap_t cap; @@ -9222,18 +9991,18 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m) */ has_lock = mutex_trylock(&ctx->uring_lock); - if (has_lock && (ctx->flags & IORING_SETUP_SQPOLL)) + if (has_lock && (ctx->flags & IORING_SETUP_SQPOLL)) { sq = ctx->sq_data; + if (!sq->thread) + sq = NULL; + } seq_printf(m, "SqThread:\t%d\n", sq ? task_pid_nr(sq->thread) : -1); seq_printf(m, "SqThreadCpu:\t%d\n", sq ? task_cpu(sq->thread) : -1); seq_printf(m, "UserFiles:\t%u\n", ctx->nr_user_files); for (i = 0; has_lock && i < ctx->nr_user_files; i++) { - struct fixed_file_table *table; - struct file *f; + struct file *f = io_file_from_index(ctx, i); - table = &ctx->file_data->table[i >> IORING_FILE_TABLE_SHIFT]; - f = table->files[i & IORING_FILE_TABLE_MASK]; if (f) seq_printf(m, "%5u: %s\n", i, file_dentry(f)->d_iname); else @@ -9241,21 +10010,21 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m) } seq_printf(m, "UserBufs:\t%u\n", ctx->nr_user_bufs); for (i = 0; has_lock && i < ctx->nr_user_bufs; i++) { - struct io_mapped_ubuf *buf = &ctx->user_bufs[i]; + struct io_mapped_ubuf *buf = ctx->user_bufs[i]; + unsigned int len = buf->ubuf_end - buf->ubuf; - seq_printf(m, "%5u: 0x%llx/%u\n", i, buf->ubuf, - (unsigned int) buf->len); + seq_printf(m, "%5u: 0x%llx/%u\n", i, buf->ubuf, len); } if (has_lock && !xa_empty(&ctx->personalities)) { unsigned long index; - const struct io_identity *iod; + const struct cred *cred; seq_printf(m, "Personalities:\n"); - xa_for_each(&ctx->personalities, index, iod) - io_uring_show_cred(m, index, iod); + xa_for_each(&ctx->personalities, index, cred) + io_uring_show_cred(m, index, cred); } seq_printf(m, "PollList:\n"); - spin_lock_irq(&ctx->completion_lock); + spin_lock(&ctx->completion_lock); for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) { struct hlist_head *list = &ctx->cancel_hash[i]; struct io_kiocb *req; @@ -9264,7 +10033,7 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m) seq_printf(m, " op=%d, task_works=%d\n", req->opcode, req->task->task_works != NULL); } - spin_unlock_irq(&ctx->completion_lock); + spin_unlock(&ctx->completion_lock); if (has_lock) mutex_unlock(&ctx->uring_lock); } @@ -9282,14 +10051,12 @@ static void io_uring_show_fdinfo(struct seq_file *m, struct file *f) static const struct file_operations io_uring_fops = { .release = io_uring_release, - .flush = io_uring_flush, .mmap = io_uring_mmap, #ifndef CONFIG_MMU .get_unmapped_area = io_uring_nommu_get_unmapped_area, .mmap_capabilities = io_uring_nommu_mmap_capabilities, #endif .poll = io_uring_poll, - .fasync = io_uring_fasync, #ifdef CONFIG_PROC_FS .show_fdinfo = io_uring_show_fdinfo, #endif @@ -9319,8 +10086,6 @@ static int io_allocate_scq_urings(struct io_ring_ctx *ctx, rings->cq_ring_mask = p->cq_entries - 1; rings->sq_ring_entries = p->sq_entries; rings->cq_ring_entries = p->cq_entries; - ctx->sq_mask = rings->sq_ring_mask; - ctx->cq_mask = rings->cq_ring_mask; size = array_size(sizeof(struct io_uring_sqe), p->sq_entries); if (size == SIZE_MAX) { @@ -9347,7 +10112,7 @@ static int io_uring_install_fd(struct io_ring_ctx *ctx, struct file *file) if (fd < 0) return fd; - ret = io_uring_add_task_file(ctx, file); + ret = io_uring_add_tctx_node(ctx); if (ret) { put_unused_fd(fd); return ret; @@ -9390,10 +10155,8 @@ static struct file *io_uring_get_file(struct io_ring_ctx *ctx) static int io_uring_create(unsigned entries, struct io_uring_params *p, struct io_uring_params __user *params) { - struct user_struct *user = NULL; struct io_ring_ctx *ctx; struct file *file; - bool limit_mem; int ret; if (!entries) @@ -9433,34 +10196,12 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p, p->cq_entries = 2 * p->sq_entries; } - user = get_uid(current_user()); - limit_mem = !capable(CAP_IPC_LOCK); - - if (limit_mem) { - ret = __io_account_mem(user, - ring_pages(p->sq_entries, p->cq_entries)); - if (ret) { - free_uid(user); - return ret; - } - } - ctx = io_ring_ctx_alloc(p); - if (!ctx) { - if (limit_mem) - __io_unaccount_mem(user, ring_pages(p->sq_entries, - p->cq_entries)); - free_uid(user); + if (!ctx) return -ENOMEM; - } ctx->compat = in_compat_syscall(); - ctx->user = user; - ctx->creds = get_current_cred(); -#ifdef CONFIG_AUDIT - ctx->loginuid = current->loginuid; - ctx->sessionid = current->sessionid; -#endif - ctx->sqo_task = get_task_struct(current); + if (!capable(CAP_IPC_LOCK)) + ctx->user = get_uid(current_user()); /* * This is just grabbed for accounting purposes. When a process exits, @@ -9471,35 +10212,6 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p, mmgrab(current->mm); ctx->mm_account = current->mm; -#ifdef CONFIG_BLK_CGROUP - /* - * The sq thread will belong to the original cgroup it was inited in. - * If the cgroup goes offline (e.g. disabling the io controller), then - * issued bios will be associated with the closest cgroup later in the - * block layer. - */ - rcu_read_lock(); - ctx->sqo_blkcg_css = blkcg_css(); - ret = css_tryget_online(ctx->sqo_blkcg_css); - rcu_read_unlock(); - if (!ret) { - /* don't init against a dying cgroup, have the user try again */ - ctx->sqo_blkcg_css = NULL; - ret = -ENODEV; - goto err; - } -#endif - - /* - * Account memory _before_ installing the file descriptor. Once - * the descriptor is installed, it can get closed at any time. Also - * do this before hitting the general error path, as ring freeing - * will un-account as well. - */ - io_account_mem(ctx, ring_pages(p->sq_entries, p->cq_entries), - ACCT_LOCKED); - ctx->limit_mem = limit_mem; - ret = io_allocate_scq_urings(ctx, p); if (ret) goto err; @@ -9507,9 +10219,11 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p, ret = io_sq_offload_create(ctx, p); if (ret) goto err; - - if (!(p->flags & IORING_SETUP_R_DISABLED)) - io_sq_offload_start(ctx); + /* always set a rsrc node */ + ret = io_rsrc_node_switch_start(ctx); + if (ret) + goto err; + io_rsrc_node_switch(ctx, NULL); memset(&p->sq_off, 0, sizeof(p->sq_off)); p->sq_off.head = offsetof(struct io_rings, sq.head); @@ -9532,7 +10246,9 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p, p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP | IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS | IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL | - IORING_FEAT_POLL_32BITS; + IORING_FEAT_POLL_32BITS | IORING_FEAT_SQPOLL_NONFIXED | + IORING_FEAT_EXT_ARG | IORING_FEAT_NATIVE_WORKERS | + IORING_FEAT_RSRC_TAGS; if (copy_to_user(params, p, sizeof(*p))) { ret = -EFAULT; @@ -9551,7 +10267,6 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p, */ ret = io_uring_install_fd(ctx, file); if (ret < 0) { - io_disable_sqo_submit(ctx); /* fput will clean it up */ fput(file); return ret; @@ -9560,7 +10275,6 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p, trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags); return ret; err: - io_disable_sqo_submit(ctx); io_ring_ctx_wait_and_kill(ctx); return ret; } @@ -9638,22 +10352,16 @@ out: static int io_register_personality(struct io_ring_ctx *ctx) { - struct io_identity *iod; + const struct cred *creds; u32 id; int ret; - iod = kmalloc(sizeof(*iod), GFP_KERNEL); - if (unlikely(!iod)) - return -ENOMEM; + creds = get_current_cred(); - io_init_identity(iod); - iod->creds = get_current_cred(); - - ret = xa_alloc_cyclic(&ctx->personalities, &id, (void *)iod, + ret = xa_alloc_cyclic(&ctx->personalities, &id, (void *)creds, XA_LIMIT(0, USHRT_MAX), &ctx->pers_next, GFP_KERNEL); if (ret < 0) { - put_cred(iod->creds); - kfree(iod); + put_cred(creds); return ret; } return id; @@ -9737,24 +10445,273 @@ static int io_register_enable_rings(struct io_ring_ctx *ctx) if (ctx->restrictions.registered) ctx->restricted = 1; - io_sq_offload_start(ctx); + ctx->flags &= ~IORING_SETUP_R_DISABLED; + if (ctx->sq_data && wq_has_sleeper(&ctx->sq_data->wait)) + wake_up(&ctx->sq_data->wait); return 0; } +static int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type, + struct io_uring_rsrc_update2 *up, + unsigned nr_args) +{ + __u32 tmp; + int err; + + if (check_add_overflow(up->offset, nr_args, &tmp)) + return -EOVERFLOW; + err = io_rsrc_node_switch_start(ctx); + if (err) + return err; + + switch (type) { + case IORING_RSRC_FILE: + return __io_sqe_files_update(ctx, up, nr_args); + case IORING_RSRC_BUFFER: + return __io_sqe_buffers_update(ctx, up, nr_args); + } + return -EINVAL; +} + +static int io_register_files_update(struct io_ring_ctx *ctx, void __user *arg, + unsigned nr_args) +{ + struct io_uring_rsrc_update2 up; + + if (!nr_args) + return -EINVAL; + memset(&up, 0, sizeof(up)); + if (copy_from_user(&up, arg, sizeof(struct io_uring_rsrc_update))) + return -EFAULT; + if (up.resv || up.resv2) + return -EINVAL; + return __io_register_rsrc_update(ctx, IORING_RSRC_FILE, &up, nr_args); +} + +static int io_register_rsrc_update(struct io_ring_ctx *ctx, void __user *arg, + unsigned size, unsigned type) +{ + struct io_uring_rsrc_update2 up; + + if (size != sizeof(up)) + return -EINVAL; + if (copy_from_user(&up, arg, sizeof(up))) + return -EFAULT; + if (!up.nr || up.resv || up.resv2) + return -EINVAL; + return __io_register_rsrc_update(ctx, type, &up, up.nr); +} + +static int io_register_rsrc(struct io_ring_ctx *ctx, void __user *arg, + unsigned int size, unsigned int type) +{ + struct io_uring_rsrc_register rr; + + /* keep it extendible */ + if (size != sizeof(rr)) + return -EINVAL; + + memset(&rr, 0, sizeof(rr)); + if (copy_from_user(&rr, arg, size)) + return -EFAULT; + if (!rr.nr || rr.resv || rr.resv2) + return -EINVAL; + + switch (type) { + case IORING_RSRC_FILE: + return io_sqe_files_register(ctx, u64_to_user_ptr(rr.data), + rr.nr, u64_to_user_ptr(rr.tags)); + case IORING_RSRC_BUFFER: + return io_sqe_buffers_register(ctx, u64_to_user_ptr(rr.data), + rr.nr, u64_to_user_ptr(rr.tags)); + } + return -EINVAL; +} + +static int io_register_iowq_aff(struct io_ring_ctx *ctx, void __user *arg, + unsigned len) +{ + struct io_uring_task *tctx = current->io_uring; + cpumask_var_t new_mask; + int ret; + + if (!tctx || !tctx->io_wq) + return -EINVAL; + + if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) + return -ENOMEM; + + cpumask_clear(new_mask); + if (len > cpumask_size()) + len = cpumask_size(); + +#ifdef CONFIG_COMPAT + if (in_compat_syscall()) { + ret = compat_get_bitmap(cpumask_bits(new_mask), + (const compat_ulong_t __user *)arg, + len * 8 /* CHAR_BIT */); + } else { + ret = copy_from_user(new_mask, arg, len); + } +#else + ret = copy_from_user(new_mask, arg, len); +#endif + + if (ret) { + free_cpumask_var(new_mask); + return -EFAULT; + } + + ret = io_wq_cpu_affinity(tctx->io_wq, new_mask); + free_cpumask_var(new_mask); + return ret; +} + +static int io_unregister_iowq_aff(struct io_ring_ctx *ctx) +{ + struct io_uring_task *tctx = current->io_uring; + + if (!tctx || !tctx->io_wq) + return -EINVAL; + + return io_wq_cpu_affinity(tctx->io_wq, NULL); +} + +static int io_register_iowq_max_workers(struct io_ring_ctx *ctx, + void __user *arg) + __must_hold(&ctx->uring_lock) +{ + struct io_tctx_node *node; + struct io_uring_task *tctx = NULL; + struct io_sq_data *sqd = NULL; + __u32 new_count[2]; + int i, ret; + + if (copy_from_user(new_count, arg, sizeof(new_count))) + return -EFAULT; + for (i = 0; i < ARRAY_SIZE(new_count); i++) + if (new_count[i] > INT_MAX) + return -EINVAL; + + if (ctx->flags & IORING_SETUP_SQPOLL) { + sqd = ctx->sq_data; + if (sqd) { + /* + * Observe the correct sqd->lock -> ctx->uring_lock + * ordering. Fine to drop uring_lock here, we hold + * a ref to the ctx. + */ + refcount_inc(&sqd->refs); + mutex_unlock(&ctx->uring_lock); + mutex_lock(&sqd->lock); + mutex_lock(&ctx->uring_lock); + if (sqd->thread) + tctx = sqd->thread->io_uring; + } + } else { + tctx = current->io_uring; + } + + BUILD_BUG_ON(sizeof(new_count) != sizeof(ctx->iowq_limits)); + + for (i = 0; i < ARRAY_SIZE(new_count); i++) + if (new_count[i]) + ctx->iowq_limits[i] = new_count[i]; + ctx->iowq_limits_set = true; + + ret = -EINVAL; + if (tctx && tctx->io_wq) { + ret = io_wq_max_workers(tctx->io_wq, new_count); + if (ret) + goto err; + } else { + memset(new_count, 0, sizeof(new_count)); + } + + if (sqd) { + mutex_unlock(&sqd->lock); + io_put_sq_data(sqd); + } + + if (copy_to_user(arg, new_count, sizeof(new_count))) + return -EFAULT; + + /* that's it for SQPOLL, only the SQPOLL task creates requests */ + if (sqd) + return 0; + + /* now propagate the restriction to all registered users */ + list_for_each_entry(node, &ctx->tctx_list, ctx_node) { + struct io_uring_task *tctx = node->task->io_uring; + + if (WARN_ON_ONCE(!tctx->io_wq)) + continue; + + for (i = 0; i < ARRAY_SIZE(new_count); i++) + new_count[i] = ctx->iowq_limits[i]; + /* ignore errors, it always returns zero anyway */ + (void)io_wq_max_workers(tctx->io_wq, new_count); + } + return 0; +err: + if (sqd) { + mutex_unlock(&sqd->lock); + io_put_sq_data(sqd); + } + return ret; +} + static bool io_register_op_must_quiesce(int op) { switch (op) { + case IORING_REGISTER_BUFFERS: + case IORING_UNREGISTER_BUFFERS: + case IORING_REGISTER_FILES: case IORING_UNREGISTER_FILES: case IORING_REGISTER_FILES_UPDATE: case IORING_REGISTER_PROBE: case IORING_REGISTER_PERSONALITY: case IORING_UNREGISTER_PERSONALITY: + case IORING_REGISTER_FILES2: + case IORING_REGISTER_FILES_UPDATE2: + case IORING_REGISTER_BUFFERS2: + case IORING_REGISTER_BUFFERS_UPDATE: + case IORING_REGISTER_IOWQ_AFF: + case IORING_UNREGISTER_IOWQ_AFF: + case IORING_REGISTER_IOWQ_MAX_WORKERS: return false; default: return true; } } +static int io_ctx_quiesce(struct io_ring_ctx *ctx) +{ + long ret; + + percpu_ref_kill(&ctx->refs); + + /* + * Drop uring mutex before waiting for references to exit. If another + * thread is currently inside io_uring_enter() it might need to grab the + * uring_lock to make progress. If we hold it here across the drain + * wait, then we can deadlock. It's safe to drop the mutex here, since + * no new references will come in after we've killed the percpu ref. + */ + mutex_unlock(&ctx->uring_lock); + do { + ret = wait_for_completion_interruptible(&ctx->ref_comp); + if (!ret) + break; + ret = io_run_task_work_sig(); + } while (ret >= 0); + mutex_lock(&ctx->uring_lock); + + if (ret) + io_refs_resurrect(&ctx->refs, &ctx->ref_comp); + return ret; +} + static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, void __user *arg, unsigned nr_args) __releases(ctx->uring_lock) @@ -9770,58 +10727,32 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, if (percpu_ref_is_dying(&ctx->refs)) return -ENXIO; - if (io_register_op_must_quiesce(opcode)) { - percpu_ref_kill(&ctx->refs); - - /* - * Drop uring mutex before waiting for references to exit. If - * another thread is currently inside io_uring_enter() it might - * need to grab the uring_lock to make progress. If we hold it - * here across the drain wait, then we can deadlock. It's safe - * to drop the mutex here, since no new references will come in - * after we've killed the percpu ref. - */ - mutex_unlock(&ctx->uring_lock); - do { - ret = wait_for_completion_interruptible(&ctx->ref_comp); - if (!ret) - break; - ret = io_run_task_work_sig(); - if (ret < 0) - break; - } while (1); - mutex_lock(&ctx->uring_lock); - - if (ret) { - io_refs_resurrect(&ctx->refs, &ctx->ref_comp); - return ret; - } + if (ctx->restricted) { + if (opcode >= IORING_REGISTER_LAST) + return -EINVAL; + opcode = array_index_nospec(opcode, IORING_REGISTER_LAST); + if (!test_bit(opcode, ctx->restrictions.register_op)) + return -EACCES; } - if (ctx->restricted) { - if (opcode >= IORING_REGISTER_LAST) { - ret = -EINVAL; - goto out; - } - - if (!test_bit(opcode, ctx->restrictions.register_op)) { - ret = -EACCES; - goto out; - } + if (io_register_op_must_quiesce(opcode)) { + ret = io_ctx_quiesce(ctx); + if (ret) + return ret; } switch (opcode) { case IORING_REGISTER_BUFFERS: - ret = io_sqe_buffer_register(ctx, arg, nr_args); + ret = io_sqe_buffers_register(ctx, arg, nr_args, NULL); break; case IORING_UNREGISTER_BUFFERS: ret = -EINVAL; if (arg || nr_args) break; - ret = io_sqe_buffer_unregister(ctx); + ret = io_sqe_buffers_unregister(ctx); break; case IORING_REGISTER_FILES: - ret = io_sqe_files_register(ctx, arg, nr_args); + ret = io_sqe_files_register(ctx, arg, nr_args, NULL); break; case IORING_UNREGISTER_FILES: ret = -EINVAL; @@ -9830,7 +10761,7 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, ret = io_sqe_files_unregister(ctx); break; case IORING_REGISTER_FILES_UPDATE: - ret = io_sqe_files_update(ctx, arg, nr_args); + ret = io_register_files_update(ctx, arg, nr_args); break; case IORING_REGISTER_EVENTFD: case IORING_REGISTER_EVENTFD_ASYNC: @@ -9878,12 +10809,43 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, case IORING_REGISTER_RESTRICTIONS: ret = io_register_restrictions(ctx, arg, nr_args); break; + case IORING_REGISTER_FILES2: + ret = io_register_rsrc(ctx, arg, nr_args, IORING_RSRC_FILE); + break; + case IORING_REGISTER_FILES_UPDATE2: + ret = io_register_rsrc_update(ctx, arg, nr_args, + IORING_RSRC_FILE); + break; + case IORING_REGISTER_BUFFERS2: + ret = io_register_rsrc(ctx, arg, nr_args, IORING_RSRC_BUFFER); + break; + case IORING_REGISTER_BUFFERS_UPDATE: + ret = io_register_rsrc_update(ctx, arg, nr_args, + IORING_RSRC_BUFFER); + break; + case IORING_REGISTER_IOWQ_AFF: + ret = -EINVAL; + if (!arg || !nr_args) + break; + ret = io_register_iowq_aff(ctx, arg, nr_args); + break; + case IORING_UNREGISTER_IOWQ_AFF: + ret = -EINVAL; + if (arg || nr_args) + break; + ret = io_unregister_iowq_aff(ctx); + break; + case IORING_REGISTER_IOWQ_MAX_WORKERS: + ret = -EINVAL; + if (!arg || nr_args != 2) + break; + ret = io_register_iowq_max_workers(ctx, arg); + break; default: ret = -EINVAL; break; } -out: if (io_register_op_must_quiesce(opcode)) { /* bring the ctx back to life */ percpu_ref_reinit(&ctx->refs); @@ -9909,6 +10871,8 @@ SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode, ctx = f.file->private_data; + io_run_task_work(); + mutex_lock(&ctx->uring_lock); ret = __io_uring_register(ctx, opcode, arg, nr_args); mutex_unlock(&ctx->uring_lock); @@ -9955,12 +10919,27 @@ static int __init io_uring_init(void) BUILD_BUG_SQE_ELEM(28, __u32, splice_flags); BUILD_BUG_SQE_ELEM(32, __u64, user_data); BUILD_BUG_SQE_ELEM(40, __u16, buf_index); + BUILD_BUG_SQE_ELEM(40, __u16, buf_group); BUILD_BUG_SQE_ELEM(42, __u16, personality); BUILD_BUG_SQE_ELEM(44, __s32, splice_fd_in); + BUILD_BUG_SQE_ELEM(44, __u32, file_index); + + BUILD_BUG_ON(sizeof(struct io_uring_files_update) != + sizeof(struct io_uring_rsrc_update)); + BUILD_BUG_ON(sizeof(struct io_uring_rsrc_update) > + sizeof(struct io_uring_rsrc_update2)); + + /* ->buf_index is u16 */ + BUILD_BUG_ON(IORING_MAX_REG_BUFFERS >= (1u << 16)); + + /* should fit into one byte */ + BUILD_BUG_ON(SQE_VALID_FLAGS >= (1 << 8)); BUILD_BUG_ON(ARRAY_SIZE(io_op_defs) != IORING_OP_LAST); - BUILD_BUG_ON(__REQ_F_LAST_BIT >= 8 * sizeof(int)); - req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC); + BUILD_BUG_ON(__REQ_F_LAST_BIT > 8 * sizeof(int)); + + req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC | + SLAB_ACCOUNT); return 0; }; __initcall(io_uring_init); diff --git a/kernel/exit.c b/kernel/exit.c index 1410b68327a4..86e403191d47 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -765,7 +765,7 @@ void __noreturn do_exit(long code) schedule(); } - io_uring_files_cancel(tsk->files); + io_uring_files_cancel(); exit_signals(tsk); /* sets PF_EXITING */ /* sync mm's RSS info before statistics gathering */ diff --git a/kernel/fork.c b/kernel/fork.c index e0baadf90aed..f73e3e694a6d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -961,6 +961,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) tsk->splice_pipe = NULL; tsk->task_frag.page = NULL; tsk->wake_q.next = NULL; + tsk->pf_io_worker = NULL; account_kernel_stack(tsk, 1); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index fcfe696806b5..6e29eab94be6 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -21,7 +21,7 @@ #include #include "../workqueue_internal.h" -#include "../../fs/io-wq.h" +#include "../../io_uring/io-wq.h" #include "../smpboot.h" #include "pelt.h" From 62822bf6305aa2ee532b711c330bb4226ed4cedc Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 9 Oct 2020 16:03:01 -0600 Subject: [PATCH 1428/1477] UPSTREAM: signal: kill JOBCTL_TASK_WORK [ Upstream commit 98b89b649fce39dacb9dc036d6d0fdb8caff73f7 ] It's no longer used, get rid of it. Change-Id: Id14379554f3e1085c63ac4d044618f609ebc2f9f Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit c91ab04781f9e46c1a2143bd1ba8fc1f1aff6ebc) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- include/linux/sched/jobctl.h | 4 +--- kernel/signal.c | 20 -------------------- 2 files changed, 1 insertion(+), 23 deletions(-) diff --git a/include/linux/sched/jobctl.h b/include/linux/sched/jobctl.h index d2b4204ba4d3..fa067de9f1a9 100644 --- a/include/linux/sched/jobctl.h +++ b/include/linux/sched/jobctl.h @@ -19,7 +19,6 @@ struct task_struct; #define JOBCTL_TRAPPING_BIT 21 /* switching to TRACED */ #define JOBCTL_LISTENING_BIT 22 /* ptracer is listening for events */ #define JOBCTL_TRAP_FREEZE_BIT 23 /* trap for cgroup freezer */ -#define JOBCTL_TASK_WORK_BIT 24 /* set by TWA_SIGNAL */ #define JOBCTL_STOP_DEQUEUED (1UL << JOBCTL_STOP_DEQUEUED_BIT) #define JOBCTL_STOP_PENDING (1UL << JOBCTL_STOP_PENDING_BIT) @@ -29,10 +28,9 @@ struct task_struct; #define JOBCTL_TRAPPING (1UL << JOBCTL_TRAPPING_BIT) #define JOBCTL_LISTENING (1UL << JOBCTL_LISTENING_BIT) #define JOBCTL_TRAP_FREEZE (1UL << JOBCTL_TRAP_FREEZE_BIT) -#define JOBCTL_TASK_WORK (1UL << JOBCTL_TASK_WORK_BIT) #define JOBCTL_TRAP_MASK (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY) -#define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK | JOBCTL_TASK_WORK) +#define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK) extern bool task_set_jobctl_pending(struct task_struct *task, unsigned long mask); extern void task_clear_jobctl_trapping(struct task_struct *task); diff --git a/kernel/signal.c b/kernel/signal.c index a8499e105c30..147804cda8ba 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2576,26 +2576,6 @@ bool get_signal(struct ksignal *ksig) relock: spin_lock_irq(&sighand->siglock); - /* - * Make sure we can safely read ->jobctl() in task_work add. As Oleg - * states: - * - * It pairs with mb (implied by cmpxchg) before READ_ONCE. So we - * roughly have - * - * task_work_add: get_signal: - * STORE(task->task_works, new_work); STORE(task->jobctl); - * mb(); mb(); - * LOAD(task->jobctl); LOAD(task->task_works); - * - * and we can rely on STORE-MB-LOAD [ in task_work_add]. - */ - smp_store_mb(current->jobctl, current->jobctl & ~JOBCTL_TASK_WORK); - if (unlikely(current->task_works)) { - spin_unlock_irq(&sighand->siglock); - task_work_run(); - goto relock; - } /* * Every stopped thread goes here after wakeup. Check to see if From d10f30da0ddc55547d6c3979c2f48c106ecbdd14 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 5 Jan 2021 11:32:43 -0700 Subject: [PATCH 1429/1477] UPSTREAM: task_work: unconditionally run task_work from get_signal() [ Upstream commit 35d0b389f3b23439ad15b610d6e43fc72fc75779 ] Song reported a boot regression in a kvm image with 5.11-rc, and bisected it down to the below patch. Debugging this issue, turns out that the boot stalled when a task is waiting on a pipe being released. As we no longer run task_work from get_signal() unless it's queued with TWA_SIGNAL, the task goes idle without running the task_work. This prevents ->release() from being called on the pipe, which another boot task is waiting on. For now, re-instate the unconditional task_work run from get_signal(). For 5.12, we'll collapse TWA_RESUME and TWA_SIGNAL, as it no longer makes sense to have a distinction between the two. This will turn task_work notification into a simple boolean, whether to notify or not. Fixes: 98b89b649fce ("signal: kill JOBCTL_TASK_WORK") Reported-by: Song Liu Tested-by: John Stultz Tested-by: Douglas Anderson Tested-by: Sedat Dilek # LLVM/Clang version 11.0.1 Change-Id: Id5ce292120cafff9ede9bb7421cde3aaf4e56924 Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 6ef2b4728a00c98cc1163212443e39d79a4c2c94) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- kernel/signal.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/signal.c b/kernel/signal.c index 147804cda8ba..f6ecd01311d6 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2552,6 +2552,9 @@ bool get_signal(struct ksignal *ksig) struct signal_struct *signal = current->signal; int signr; + if (unlikely(current->task_works)) + task_work_run(); + /* * For non-generic architectures, check for TIF_NOTIFY_SIGNAL so * that the arch handlers don't all have to do it. If we get here From 4b17dea7862e917dba51b17e8e102f0d6a169221 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 17 Feb 2021 10:14:21 -0700 Subject: [PATCH 1430/1477] UPSTREAM: net: remove cmsg restriction from io_uring based send/recvmsg calls [ Upstream commit e54937963fa249595824439dc839c948188dea83 ] No need to restrict these anymore, as the worker threads are direct clones of the original task. Hence we know for a fact that we can support anything that the regular task can. Since the only user of proto_ops->flags was to flag PROTO_CMSG_DATA_ONLY, kill the member and the flag definition too. Change-Id: Ie87e4ff3c621cf53a8e9589a7689e62d759de983 Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit a3025359ffa707b484b23f4df37425f36e6955fd) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- include/linux/net.h | 3 --- net/ipv4/af_inet.c | 1 - net/ipv6/af_inet6.c | 1 - net/socket.c | 10 ---------- 4 files changed, 15 deletions(-) diff --git a/include/linux/net.h b/include/linux/net.h index f5569faf930b..3414d63a52f4 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -43,8 +43,6 @@ struct net; #define SOCK_PASSCRED 3 #define SOCK_PASSSEC 4 -#define PROTO_CMSG_DATA_ONLY 0x0001 - #ifndef ARCH_HAS_SOCKET_TYPES /** * enum sock_type - Socket types @@ -139,7 +137,6 @@ typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *, struct proto_ops { int family; - unsigned int flags; struct module *owner; int (*release) (struct socket *sock); int (*bind) (struct socket *sock, diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 2970555dd77c..b9f4fe8c14b8 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1021,7 +1021,6 @@ static int inet_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned lon const struct proto_ops inet_stream_ops = { .family = PF_INET, - .flags = PROTO_CMSG_DATA_ONLY, .owner = THIS_MODULE, .release = inet_release, .bind = inet_bind, diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 3cb5120b1c4a..bf3dfe2d12cb 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -663,7 +663,6 @@ int inet6_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, const struct proto_ops inet6_stream_ops = { .family = PF_INET6, - .flags = PROTO_CMSG_DATA_ONLY, .owner = THIS_MODULE, .release = inet6_release, .bind = inet6_bind, diff --git a/net/socket.c b/net/socket.c index 16800b0ae2d3..938ab3a89707 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2420,10 +2420,6 @@ static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg, long __sys_sendmsg_sock(struct socket *sock, struct msghdr *msg, unsigned int flags) { - /* disallow ancillary data requests from this path */ - if (msg->msg_control || msg->msg_controllen) - return -EINVAL; - return ____sys_sendmsg(sock, msg, flags, NULL, 0); } @@ -2632,12 +2628,6 @@ long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg, struct user_msghdr __user *umsg, struct sockaddr __user *uaddr, unsigned int flags) { - if (msg->msg_control || msg->msg_controllen) { - /* disallow ancillary data reqs unless cmsg is plain data */ - if (!(sock->ops->flags & PROTO_CMSG_DATA_ONLY)) - return -EINVAL; - } - return ____sys_recvmsg(sock, msg, umsg, uaddr, flags, 0); } From 498b35b3c40720419694dc1d50c4faf231995141 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 15 Feb 2021 13:42:18 -0700 Subject: [PATCH 1431/1477] UPSTREAM: Revert "proc: don't allow async path resolution of /proc/thread-self components" [ Upstream commit 2587890b5e2892dfecaa5e5126bdac8076a4e6f7 ] This reverts commit 0d4370cfe36b7f1719123b621a4ec4d9c7a25f89. No longer needed, as the io-wq worker threads have the right identity. Change-Id: I7a28e02a0a1911555853cf4046e3a09c7e36d4a2 Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 87cb08dc6b4f42a491e582d3ee79c30893681193) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- fs/proc/self.c | 2 +- fs/proc/thread_self.c | 7 ------- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/fs/proc/self.c b/fs/proc/self.c index a4012154e109..cc71ce3466dc 100644 --- a/fs/proc/self.c +++ b/fs/proc/self.c @@ -20,7 +20,7 @@ static const char *proc_self_get_link(struct dentry *dentry, * Not currently supported. Once we can inherit all of struct pid, * we can allow this. */ - if (current->flags & PF_IO_WORKER) + if (current->flags & PF_KTHREAD) return ERR_PTR(-EOPNOTSUPP); if (!tgid) diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c index d56681d86d28..a553273fbd41 100644 --- a/fs/proc/thread_self.c +++ b/fs/proc/thread_self.c @@ -17,13 +17,6 @@ static const char *proc_thread_self_get_link(struct dentry *dentry, pid_t pid = task_pid_nr_ns(current, ns); char *name; - /* - * Not currently supported. Once we can inherit all of struct pid, - * we can allow this. - */ - if (current->flags & PF_IO_WORKER) - return ERR_PTR(-EOPNOTSUPP); - if (!pid) return ERR_PTR(-ENOENT); name = kmalloc(10 + 6 + 10 + 1, dentry ? GFP_KERNEL : GFP_ATOMIC); From 7c9f38c09b6a6839a8c6786fd7c6838b20f71523 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 15 Feb 2021 13:42:42 -0700 Subject: [PATCH 1432/1477] UPSTREAM: Revert "proc: don't allow async path resolution of /proc/self components" [ Upstream commit 9e8d9e829c2142cf1d7756e9ed2e0b4c7569d84c ] This reverts commit 8d4c3e76e3be11a64df95ddee52e99092d42fc19. No longer needed, as the io-wq worker threads have the right identity. Change-Id: I6c12f6f957e1c789f4fd5b21379d167f17feb3ea Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit b76c5373f084c802964e751efb825934a9bbbbec) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- fs/proc/self.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/fs/proc/self.c b/fs/proc/self.c index cc71ce3466dc..72cd69bcaf4a 100644 --- a/fs/proc/self.c +++ b/fs/proc/self.c @@ -16,13 +16,6 @@ static const char *proc_self_get_link(struct dentry *dentry, pid_t tgid = task_tgid_nr_ns(current, ns); char *name; - /* - * Not currently supported. Once we can inherit all of struct pid, - * we can allow this. - */ - if (current->flags & PF_KTHREAD) - return ERR_PTR(-EOPNOTSUPP); - if (!tgid) return ERR_PTR(-ENOENT); /* max length of unsigned int in decimal + NULL term */ From d7a47b29d53996637f79426cd4ac7bad247646af Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 20 Nov 2022 10:10:53 -0700 Subject: [PATCH 1433/1477] UPSTREAM: eventpoll: add EPOLL_URING_WAKE poll wakeup flag [ Upstream commit caf1aeaffc3b09649a56769e559333ae2c4f1802 ] We can have dependencies between epoll and io_uring. Consider an epoll context, identified by the epfd file descriptor, and an io_uring file descriptor identified by iofd. If we add iofd to the epfd context, and arm a multishot poll request for epfd with iofd, then the multishot poll request will repeatedly trigger and generate events until terminated by CQ ring overflow. This isn't a desired behavior. Add EPOLL_URING so that io_uring can pass it in as part of the poll wakeup key, and io_uring can check for that to detect a potential recursive invocation. Cc: stable@vger.kernel.org # 6.0 Change-Id: Ifafcb236b2cfe3ca3e7254a0155625fce00fd038 Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 2f093775022b45253b0d8df0e7296e7bc0d13a7b) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- fs/eventpoll.c | 18 ++++++++++-------- include/uapi/linux/eventpoll.h | 6 ++++++ 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 0ceb8285bf66..2c67c59814e1 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -551,7 +551,8 @@ out_unlock: */ #ifdef CONFIG_DEBUG_LOCK_ALLOC -static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi) +static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi, + unsigned pollflags) { struct eventpoll *ep_src; unsigned long flags; @@ -582,16 +583,17 @@ static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi) } spin_lock_irqsave_nested(&ep->poll_wait.lock, flags, nests); ep->nests = nests + 1; - wake_up_locked_poll(&ep->poll_wait, EPOLLIN); + wake_up_locked_poll(&ep->poll_wait, EPOLLIN | pollflags); ep->nests = 0; spin_unlock_irqrestore(&ep->poll_wait.lock, flags); } #else -static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi) +static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi, + unsigned pollflags) { - wake_up_poll(&ep->poll_wait, EPOLLIN); + wake_up_poll(&ep->poll_wait, EPOLLIN | pollflags); } #endif @@ -818,7 +820,7 @@ static void ep_free(struct eventpoll *ep) /* We need to release all tasks waiting for these file */ if (waitqueue_active(&ep->poll_wait)) - ep_poll_safewake(ep, NULL); + ep_poll_safewake(ep, NULL, 0); /* * We need to lock this because we could be hit by @@ -1287,7 +1289,7 @@ out_unlock: /* We have to call this outside the lock */ if (pwake) - ep_poll_safewake(ep, epi); + ep_poll_safewake(ep, epi, pollflags & EPOLL_URING_WAKE); if (!(epi->event.events & EPOLLEXCLUSIVE)) ewake = 1; @@ -1597,7 +1599,7 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, /* We have to call this outside the lock */ if (pwake) - ep_poll_safewake(ep, NULL); + ep_poll_safewake(ep, NULL, 0); return 0; @@ -1700,7 +1702,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, /* We have to call this outside the lock */ if (pwake) - ep_poll_safewake(ep, NULL); + ep_poll_safewake(ep, NULL, 0); return 0; } diff --git a/include/uapi/linux/eventpoll.h b/include/uapi/linux/eventpoll.h index 8a3432d0f0dc..e687658843b1 100644 --- a/include/uapi/linux/eventpoll.h +++ b/include/uapi/linux/eventpoll.h @@ -41,6 +41,12 @@ #define EPOLLMSG (__force __poll_t)0x00000400 #define EPOLLRDHUP (__force __poll_t)0x00002000 +/* + * Internal flag - wakeup generated by io_uring, used to detect recursion back + * into the io_uring poll handler. + */ +#define EPOLL_URING_WAKE ((__force __poll_t)(1U << 27)) + /* Set exclusive wakeup mode for the target file descriptor */ #define EPOLLEXCLUSIVE ((__force __poll_t)(1U << 28)) From 72d1c48675565b16cdf0d09f558370c2bfe9b3b5 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 20 Nov 2022 10:13:44 -0700 Subject: [PATCH 1434/1477] UPSTREAM: eventfd: provide a eventfd_signal_mask() helper [ Upstream commit 03e02acda8e267a8183e1e0ed289ff1ef9cd7ed8 ] This is identical to eventfd_signal(), but it allows the caller to pass in a mask to be used for the poll wakeup key. The use case is avoiding repeated multishot triggers if we have a dependency between eventfd and io_uring. If we setup an eventfd context and register that as the io_uring eventfd, and at the same time queue a multishot poll request for the eventfd context, then any CQE posted will repeatedly trigger the multishot request until it terminates when the CQ ring overflows. In preparation for io_uring detecting this circular dependency, add the mentioned helper so that io_uring can pass in EPOLL_URING as part of the poll wakeup key. Cc: stable@vger.kernel.org # 6.0 [axboe: fold in !CONFIG_EVENTFD fix from Zhang Qilong] Change-Id: I0c38a56887777f85cb10673b7ca3b5ca4d70c61b Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 4ef66581d7fdbd9bfe0b399c1a2479f8ea5cfdf0) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- fs/eventfd.c | 37 +++++++++++++++++++++---------------- include/linux/eventfd.h | 7 +++++++ 2 files changed, 28 insertions(+), 16 deletions(-) diff --git a/fs/eventfd.c b/fs/eventfd.c index df466ef81ddd..4a14295cffe0 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -45,21 +45,7 @@ struct eventfd_ctx { int id; }; -/** - * eventfd_signal - Adds @n to the eventfd counter. - * @ctx: [in] Pointer to the eventfd context. - * @n: [in] Value of the counter to be added to the eventfd internal counter. - * The value cannot be negative. - * - * This function is supposed to be called by the kernel in paths that do not - * allow sleeping. In this function we allow the counter to reach the ULLONG_MAX - * value, and we signal this as overflow condition by returning a EPOLLERR - * to poll(2). - * - * Returns the amount by which the counter was incremented. This will be less - * than @n if the counter has overflowed. - */ -__u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n) +__u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, unsigned mask) { unsigned long flags; @@ -80,12 +66,31 @@ __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n) n = ULLONG_MAX - ctx->count; ctx->count += n; if (waitqueue_active(&ctx->wqh)) - wake_up_locked_poll(&ctx->wqh, EPOLLIN); + wake_up_locked_poll(&ctx->wqh, EPOLLIN | mask); this_cpu_dec(eventfd_wake_count); spin_unlock_irqrestore(&ctx->wqh.lock, flags); return n; } + +/** + * eventfd_signal - Adds @n to the eventfd counter. + * @ctx: [in] Pointer to the eventfd context. + * @n: [in] Value of the counter to be added to the eventfd internal counter. + * The value cannot be negative. + * + * This function is supposed to be called by the kernel in paths that do not + * allow sleeping. In this function we allow the counter to reach the ULLONG_MAX + * value, and we signal this as overflow condition by returning a EPOLLERR + * to poll(2). + * + * Returns the amount by which the counter was incremented. This will be less + * than @n if the counter has overflowed. + */ +__u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n) +{ + return eventfd_signal_mask(ctx, n, 0); +} EXPORT_SYMBOL_GPL(eventfd_signal); static void eventfd_free_ctx(struct eventfd_ctx *ctx) diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h index dc4fd8a6644d..ce1cf42740bf 100644 --- a/include/linux/eventfd.h +++ b/include/linux/eventfd.h @@ -39,6 +39,7 @@ struct file *eventfd_fget(int fd); struct eventfd_ctx *eventfd_ctx_fdget(int fd); struct eventfd_ctx *eventfd_ctx_fileget(struct file *file); __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n); +__u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, unsigned mask); int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait, __u64 *cnt); @@ -66,6 +67,12 @@ static inline int eventfd_signal(struct eventfd_ctx *ctx, int n) return -ENOSYS; } +static inline int eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, + unsigned mask) +{ + return -ENOSYS; +} + static inline void eventfd_ctx_put(struct eventfd_ctx *ctx) { From 5756328b3f2992bbacd86bae48c6ace0892648b7 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 23 Dec 2022 07:04:49 -0700 Subject: [PATCH 1435/1477] UPSTREAM: io_uring: pass in EPOLL_URING_WAKE for eventfd signaling and wakeups [ Upstream commit 4464853277d0ccdb9914608dd1332f0fa2f9846f ] Pass in EPOLL_URING_WAKE when signaling eventfd or doing poll related wakups, so that we can check for a circular event dependency between eventfd and epoll. If this flag is set when our wakeup handlers are called, then we know we have a dependency that needs to terminate multishot requests. eventfd and epoll are the only such possible dependencies. Bug: 268174392 Cc: stable@vger.kernel.org # 6.0 Change-Id: I6e45fa1484657bd5caad007783785c2ee97a9929 Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 189556b05e1770263c43fa5b4c689e7cd3fa5b4e) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman --- io_uring/io_uring.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 473dbd1830a3..945faf036ad0 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1626,13 +1626,15 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx) * wake as many waiters as we need to. */ if (wq_has_sleeper(&ctx->cq_wait)) - wake_up_all(&ctx->cq_wait); + __wake_up(&ctx->cq_wait, TASK_NORMAL, 0, + poll_to_key(EPOLL_URING_WAKE | EPOLLIN)); if (ctx->sq_data && waitqueue_active(&ctx->sq_data->wait)) wake_up(&ctx->sq_data->wait); if (io_should_trigger_evfd(ctx)) - eventfd_signal(ctx->cq_ev_fd, 1); + eventfd_signal_mask(ctx->cq_ev_fd, 1, EPOLL_URING_WAKE); if (waitqueue_active(&ctx->poll_wait)) - wake_up_interruptible(&ctx->poll_wait); + __wake_up(&ctx->poll_wait, TASK_INTERRUPTIBLE, 0, + poll_to_key(EPOLL_URING_WAKE | EPOLLIN)); } static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx) @@ -1642,12 +1644,14 @@ static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx) if (ctx->flags & IORING_SETUP_SQPOLL) { if (waitqueue_active(&ctx->cq_wait)) - wake_up_all(&ctx->cq_wait); + __wake_up(&ctx->cq_wait, TASK_NORMAL, 0, + poll_to_key(EPOLL_URING_WAKE | EPOLLIN)); } if (io_should_trigger_evfd(ctx)) - eventfd_signal(ctx->cq_ev_fd, 1); + eventfd_signal_mask(ctx->cq_ev_fd, 1, EPOLL_URING_WAKE); if (waitqueue_active(&ctx->poll_wait)) - wake_up_interruptible(&ctx->poll_wait); + __wake_up(&ctx->poll_wait, TASK_INTERRUPTIBLE, 0, + poll_to_key(EPOLL_URING_WAKE | EPOLLIN)); } /* Returns true if there are no backlogged entries after the flush */ @@ -5477,8 +5481,17 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, if (mask && !(mask & poll->events)) return 0; - if (io_poll_get_ownership(req)) + if (io_poll_get_ownership(req)) { + /* + * If we trigger a multishot poll off our own wakeup path, + * disable multishot as there is a circular dependency between + * CQ posting and triggering the event. + */ + if (mask & EPOLL_URING_WAKE) + poll->events |= EPOLLONESHOT; + __io_poll_execute(req, mask); + } return 1; } From d369ac0b2a73bb65b7e43bd0268e87e9498b2bcd Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 9 Jan 2023 12:14:31 +0000 Subject: [PATCH 1436/1477] ANDROID: add flags variable back to struct proto_ops In commit a3025359ffa7 ("net: remove cmsg restriction from io_uring based send/recvmsg calls") the flags variable was removed from struct proto_ops as it is no longer needed. But the ABI signatures break, so put it back to preserve this, there's no functional change here. Bug: 161946584 Bug: 268174392 Fixes: a3025359ffa7 ("net: remove cmsg restriction from io_uring based send/recvmsg calls") Change-Id: Ic6a868f038701a61c993e18b44cdd8ec8b0a4d58 Signed-off-by: Greg Kroah-Hartman --- include/linux/net.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/net.h b/include/linux/net.h index 3414d63a52f4..e201a7fbf3bc 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -137,6 +137,8 @@ typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *, struct proto_ops { int family; + unsigned int flags; // ANDROID - removed in 5.10.162, but remains to + // preserve ABI. It is not used anywhere. struct module *owner; int (*release) (struct socket *sock); int (*bind) (struct socket *sock, From fef924db7278b94c60de86b362d3b4743867b581 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 9 Jan 2023 15:53:12 +0000 Subject: [PATCH 1437/1477] ANDROID: fix up struct task_struct ABI change in 5.10.162 In commit 788d0824269b ("io_uring: import 5.15-stable io_uring"), a new field was added to struct task_struct. Move it to the proper location and macro in order to preserve the kernel ABI. Bug: 161946584 Bug: 268174392 Fixes: 788d0824269b ("io_uring: import 5.15-stable io_uring") Signed-off-by: Greg Kroah-Hartman Change-Id: Ib2f65b7c1a973794b7ab525a9304f666ffebc9ee --- include/linux/sched.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index b8b8f18bbbbb..d3cc279a4639 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -901,9 +901,6 @@ struct task_struct { /* CLONE_CHILD_CLEARTID: */ int __user *clear_child_tid; - /* PF_IO_WORKER */ - void *pf_io_worker; - u64 utime; u64 stime; #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME @@ -1379,7 +1376,9 @@ struct task_struct { ANDROID_VENDOR_DATA_ARRAY(1, 64); ANDROID_OEM_DATA_ARRAY(1, 32); - ANDROID_KABI_RESERVE(1); + /* PF_IO_WORKER */ + ANDROID_KABI_USE(1, void *pf_io_worker); + ANDROID_KABI_RESERVE(2); ANDROID_KABI_RESERVE(3); ANDROID_KABI_RESERVE(4); From d0f788b8fab24197232a6753c3f556ee0cc9d039 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 11 Jan 2023 14:46:15 +0000 Subject: [PATCH 1438/1477] ANDROID: struct io_uring ABI preservation hack for 5.10.162 changes In the 5.10.162 release, the io_uring code was synced with the version that is in the 5.15.y kernel tree in order to resolve a huge number of potential, and known, problems with the codebase. This makes for a more secure and easier-to-update-and-maintain 5.10.y kernel tree, so this is a great thing, however this caused some issues when it comes to the Android KABI preservation and checking tools. A number of the io_uring structures get used in other core kernel structures, only as "opaque" pointers, so there is not any real ABI breakage. But, due to the visibility of the structures going away, the CRC values of many scheduler variables and functions were changed. In order to preserve the CRC values, to prevent all device kernels to be forced to rebuild for no reason whatsoever from a functional point of view, we need to keep around the "old" io_uring structures for the CRC calculation only. This is done by the following definitions of struct io_identity and struct io_uring_task which will only be visible when the CRC calculation build happens, not in any functional kernel build. Yes, this all is a horrible hack, and these really are not the true structures that any code uses, but so life is in the world of stable apis. Bug: 161946584 Bug: 268174392 Fixes: 788d0824269b ("io_uring: import 5.15-stable io_uring") Signed-off-by: Greg Kroah-Hartman Change-Id: I2294f220ae78fe9aa32ee25b81829ae765e9deb2 --- include/linux/io_uring.h | 62 ++++++++++++++++++++++++++++++++++++++++ io_uring/io-wq.h | 11 +++++++ io_uring/io_uring.c | 10 +++++++ 3 files changed, 83 insertions(+) diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 649a4d7c241b..5ae871af45de 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -5,6 +5,68 @@ #include #include +#ifdef __GENKSYMS__ +/* + * ANDROID ABI HACK + * + * In the 5.10.162 release, the io_uring code was synced with the version + * that is in the 5.15.y kernel tree in order to resolve a huge number of + * potential, and known, problems with the codebase. This makes for a more + * secure and easier-to-update-and-maintain 5.10.y kernel tree, so this is + * a great thing, however this caused some issues when it comes to the + * Android KABI preservation and checking tools. + * + * A number of the io_uring structures get used in other core kernel + * structures, only as "opaque" pointers, so there is not any real ABI + * breakage. But, due to the visibility of the structures going away, the + * CRC values of many scheduler variables and functions were changed. + * + * In order to preserve the CRC values, to prevent all device kernels to be + * forced to rebuild for no reason whatsoever from a functional point of + * view, we need to keep around the "old" io_uring structures for the CRC + * calculation only. This is done by the following definitions of struct + * io_identity and struct io_uring_task which will only be visible when the + * CRC calculation build happens, not in any functional kernel build. + * + * Yes, this all is a horrible hack, and these really are not the true + * structures that any code uses, but so life is in the world of stable + * apis... + * The real structures are in io_uring/io_uring.c, see the ones there if + * you need to touch or do anything with it. + * + * NEVER touch these structure definitions, they are fake and not valid code. + */ +struct io_identity { + struct files_struct *files; + struct mm_struct *mm; +#ifdef CONFIG_BLK_CGROUP + struct cgroup_subsys_state *blkcg_css; +#endif + const struct cred *creds; + struct nsproxy *nsproxy; + struct fs_struct *fs; + unsigned long fsize; +#ifdef CONFIG_AUDIT + kuid_t loginuid; + unsigned int sessionid; +#endif + refcount_t count; +}; + +struct io_uring_task { + /* submission side */ + struct xarray xa; + struct wait_queue_head wait; + struct file *last; + struct percpu_counter inflight; + struct io_identity __identity; + struct io_identity *identity; + atomic_t in_idle; + bool sqpoll; +}; +#endif /* ANDROID ABI HACK */ + + #if defined(CONFIG_IO_URING) struct sock *io_uring_get_socket(struct file *file); void __io_uring_cancel(bool cancel_all); diff --git a/io_uring/io-wq.h b/io_uring/io-wq.h index bf5c4c533760..300a695d191d 100644 --- a/io_uring/io-wq.h +++ b/io_uring/io-wq.h @@ -1,6 +1,17 @@ #ifndef INTERNAL_IO_WQ_H #define INTERNAL_IO_WQ_H +#ifdef __GENKSYMS__ +/* + * ANDROID ABI HACK + * + * See the big comment in the linux/io_uring.h file for details. This + * include is not needed for any real functionality, but must be here to + * preserve the CRC of a number of variables and functions. + */ +#include +#endif + #include struct io_wq; diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 945faf036ad0..35a9bcd5648b 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -461,6 +461,15 @@ struct io_ring_ctx { }; }; +#ifndef __GENKSYMS__ +/* + * ANDROID ABI HACK + * + * See the big comment in the linux/io_uring.h file for details. This + * structure definition should NOT be used if __GENKSYMS__ is enabled, + * as a "fake" structure definition has already been read in the + * linux/io_uring.h file in order to preserve the Android kernel ABI. + */ struct io_uring_task { /* submission side */ int cached_refs; @@ -477,6 +486,7 @@ struct io_uring_task { struct callback_head task_work; bool task_running; }; +#endif /* * First field must be the file pointer in all the From 899476c3afacfdd3183310bdd56f331ce9f5fc38 Mon Sep 17 00:00:00 2001 From: qixiaoyu1 Date: Fri, 13 Jan 2023 10:19:29 +0800 Subject: [PATCH 1439/1477] FROMGIT: f2fs: fix wrong calculation of block age Currently we wrongly calculate the new block age to old * LAST_AGE_WEIGHT / 100. Fix it to new * (100 - LAST_AGE_WEIGHT) / 100 + old * LAST_AGE_WEIGHT / 100. Bug: 267580491 (cherry picked from commit b03a41a495df35f8e8d25220878bd6b8472d9396 https://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git dev) Signed-off-by: qixiaoyu1 Signed-off-by: xiongping1 Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Change-Id: If06f04c63f9ed0de4e1d734936d9ea9a6c613d64 --- fs/f2fs/extent_cache.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 8306fcdda510..42af3319dbe1 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -873,11 +873,18 @@ unlock_out: static unsigned long long __calculate_block_age(unsigned long long new, unsigned long long old) { - unsigned long long diff; + unsigned int rem_old, rem_new; + unsigned long long res; - diff = (new >= old) ? new - (new - old) : new + (old - new); + res = div_u64_rem(new, 100, &rem_new) * (100 - LAST_AGE_WEIGHT) + + div_u64_rem(old, 100, &rem_old) * LAST_AGE_WEIGHT; - return div_u64(diff * LAST_AGE_WEIGHT, 100); + if (rem_new) + res += rem_new * (100 - LAST_AGE_WEIGHT) / 100; + if (rem_old) + res += rem_old * LAST_AGE_WEIGHT / 100; + + return res; } /* This returns a new age and allocated blocks in ei */ From 3fcc69ca4d73af35c58ab578c70ad0bd65a5575a Mon Sep 17 00:00:00 2001 From: qixiaoyu1 Date: Tue, 20 Dec 2022 16:16:29 +0800 Subject: [PATCH 1440/1477] FROMGIT: f2fs: add sysfs nodes to set last_age_weight Bug: 267580491 (cherry picked from commit d23be468eada21c828058e0e8d60409eaec373ab https://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git dev) Signed-off-by: qixiaoyu1 Signed-off-by: xiongping1 Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Change-Id: I88b795ec90f4589676daed4919db31b26574c84b --- Documentation/ABI/testing/sysfs-fs-f2fs | 5 +++++ fs/f2fs/extent_cache.c | 15 +++++++++------ fs/f2fs/f2fs.h | 1 + fs/f2fs/sysfs.c | 11 +++++++++++ 4 files changed, 26 insertions(+), 6 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 9c7ddb4d331f..75f4bbb06c4d 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -528,3 +528,8 @@ Contact: "Ping Xiong" Description: When DATA SEPARATION is on, it controls the age threshold to indicate the data blocks as warm. By default it was initialized as 2621440 blocks (equals to 10GB). + +What: /sys/fs/f2fs//last_age_weight +Date: January 2023 +Contact: "Ping Xiong" +Description: When DATA SEPARATION is on, it controls the weight of last data block age. diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 42af3319dbe1..33eb6dea04f0 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -870,19 +870,21 @@ unlock_out: } #endif -static unsigned long long __calculate_block_age(unsigned long long new, +static unsigned long long __calculate_block_age(struct f2fs_sb_info *sbi, + unsigned long long new, unsigned long long old) { unsigned int rem_old, rem_new; unsigned long long res; + unsigned int weight = sbi->last_age_weight; - res = div_u64_rem(new, 100, &rem_new) * (100 - LAST_AGE_WEIGHT) - + div_u64_rem(old, 100, &rem_old) * LAST_AGE_WEIGHT; + res = div_u64_rem(new, 100, &rem_new) * (100 - weight) + + div_u64_rem(old, 100, &rem_old) * weight; if (rem_new) - res += rem_new * (100 - LAST_AGE_WEIGHT) / 100; + res += rem_new * (100 - weight) / 100; if (rem_old) - res += rem_old * LAST_AGE_WEIGHT / 100; + res += rem_old * weight / 100; return res; } @@ -916,7 +918,7 @@ static int __get_new_block_age(struct inode *inode, struct extent_info *ei, cur_age = ULLONG_MAX - tei.last_blocks + cur_blocks; if (tei.age) - ei->age = __calculate_block_age(cur_age, tei.age); + ei->age = __calculate_block_age(sbi, cur_age, tei.age); else ei->age = cur_age; ei->last_blocks = cur_blocks; @@ -1233,6 +1235,7 @@ void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi) atomic64_set(&sbi->allocated_data_blocks, 0); sbi->hot_data_age_threshold = DEF_HOT_DATA_AGE_THRESHOLD; sbi->warm_data_age_threshold = DEF_WARM_DATA_AGE_THRESHOLD; + sbi->last_age_weight = LAST_AGE_WEIGHT; } int __init f2fs_create_extent_cache(void) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 8a7d1802a473..2e13419f93e8 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1627,6 +1627,7 @@ struct f2fs_sb_info { /* The threshold used for hot and warm data seperation*/ unsigned int hot_data_age_threshold; unsigned int warm_data_age_threshold; + unsigned int last_age_weight; /* basic filesystem units */ unsigned int log_sectors_per_block; /* log2 sectors per block */ diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index c9217d7a941f..a095919c55ad 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -567,6 +567,15 @@ out: return count; } + if (!strcmp(a->attr.name, "last_age_weight")) { + if (t > 100) + return -EINVAL; + if (t == *ui) + return count; + *ui = (unsigned int)t; + return count; + } + *ui = (unsigned int)t; return count; @@ -799,6 +808,7 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_reclaimed_segments, gc_reclaimed_segs); /* For block age extent cache */ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, hot_data_age_threshold, hot_data_age_threshold); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, warm_data_age_threshold, warm_data_age_threshold); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, last_age_weight, last_age_weight); #define ATTR_LIST(name) (&f2fs_attr_##name.attr) static struct attribute *f2fs_attrs[] = { @@ -877,6 +887,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(gc_reclaimed_segments), ATTR_LIST(hot_data_age_threshold), ATTR_LIST(warm_data_age_threshold), + ATTR_LIST(last_age_weight), NULL, }; ATTRIBUTE_GROUPS(f2fs); From d6b2899ce661ed6d21dbf5ea83899aefd3ae359c Mon Sep 17 00:00:00 2001 From: Woogeun Lee Date: Wed, 14 Dec 2022 09:54:17 +0900 Subject: [PATCH 1441/1477] ANDROID: ABI: update allowed list for galaxy 1 Added function: [A] 'function void _trace_android_vh_record_pcpu_rwsem_starttime(task_struct*, unsigned long int)' Bug: 262423323 Change-Id: I4ebef8d03a3c030da6eac2f4d857ce889005d5ec Signed-off-by: Woogeun Lee --- android/abi_gki_aarch64.xml | 41 +++++++++++++++++++++++++++++++++- android/abi_gki_aarch64_galaxy | 1 + 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/android/abi_gki_aarch64.xml b/android/abi_gki_aarch64.xml index 5f631576d09c..1bc86e8755d1 100644 --- a/android/abi_gki_aarch64.xml +++ b/android/abi_gki_aarch64.xml @@ -44536,7 +44536,23 @@ - + + + + + + + + + + + + + + + + + @@ -48866,6 +48882,7 @@ + @@ -53809,6 +53826,13 @@ + + + + + + + @@ -66469,6 +66493,13 @@ + + + + + + + @@ -91217,6 +91248,7 @@ + @@ -102983,6 +103015,12 @@ + + + + + + @@ -111228,6 +111266,7 @@ + diff --git a/android/abi_gki_aarch64_galaxy b/android/abi_gki_aarch64_galaxy index b9540bf8b079..7ba5f74f973a 100644 --- a/android/abi_gki_aarch64_galaxy +++ b/android/abi_gki_aarch64_galaxy @@ -574,6 +574,7 @@ _snd_ctl_add_follower _snd_pcm_stream_lock_irqsave _totalram_pages + _trace_android_vh_record_pcpu_rwsem_starttime access_process_vm ack_all_badblocks activate_task From 1eb5992d60010c922f94c6d448f91b4548ade0a4 Mon Sep 17 00:00:00 2001 From: Maulik Shah Date: Mon, 9 Jan 2023 14:33:03 +0530 Subject: [PATCH 1442/1477] ANDROID: cpuidle-psci: Fix suspicious RCU usage This change fixes suspicious RCU usage warnings from vendor hook. ============================= WARNING: suspicious RCU usage 5.15.41-debug-gc1163f69ba3b-dirty #1 Not tainted ----------------------------- include/trace/events/lock.h:37 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 RCU used illegally from extended quiescent state! no locks held by swapper/0/0. stack backtrace: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.15.41-debug-gc1163f69ba3b-dirty #1 Call trace: dump_backtrace+0x0/0x1d8 dump_stack+0x1c/0x4c .. .. _printk+0x58/0x84 lockdep_rcu_suspicious+0x44/0x15c trace_android_vh_printk_caller_id+0xc4/0x13c vprintk_store+0x54/0x59c vprintk_emit+0x8c/0x130 vprintk_default+0x48/0x74 vprintk+0xf8/0x13c _printk+0x58/0x84 lockdep_rcu_suspicious+0x44/0x15c trace_android_vh_cpuidle_psci_enter+0xc4/0x144 __psci_enter_domain_idle_state+0x64/0x118 psci_enter_domain_idle_state+0x1c/0x2c cpuidle_enter_state+0x14c/0x2fc cpuidle_enter+0x3c/0x58 Bug: 267847290 Fixes: 3567f516024d ("ANDROID: cpuidle-psci: Add vendor hook for cpuidle psci enter and exit") Change-Id: I910a6a0595c3a79b75e581297eb56d512ce5885c Signed-off-by: Maulik Shah --- drivers/cpuidle/cpuidle-psci.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/cpuidle/cpuidle-psci.c b/drivers/cpuidle/cpuidle-psci.c index efc063ffc683..95ec4a688aa7 100644 --- a/drivers/cpuidle/cpuidle-psci.c +++ b/drivers/cpuidle/cpuidle-psci.c @@ -68,10 +68,12 @@ static int __psci_enter_domain_idle_state(struct cpuidle_device *dev, if (ret) return -1; - trace_android_vh_cpuidle_psci_enter(dev, s2idle); /* Do runtime PM to manage a hierarchical CPU toplogy. */ rcu_irq_enter_irqson(); + + trace_android_vh_cpuidle_psci_enter(dev, s2idle); + if (s2idle) dev_pm_genpd_suspend(pd_dev); else @@ -89,10 +91,11 @@ static int __psci_enter_domain_idle_state(struct cpuidle_device *dev, dev_pm_genpd_resume(pd_dev); else pm_runtime_get_sync(pd_dev); - rcu_irq_exit_irqson(); trace_android_vh_cpuidle_psci_exit(dev, s2idle); + rcu_irq_exit_irqson(); + cpu_pm_exit(); /* Clear the domain state to start fresh when back from idle. */ From 865f370bf9f971cada5a1513d0a89fe27bdc860e Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 19 Aug 2022 13:54:25 +0100 Subject: [PATCH 1443/1477] ANDROID: mm/vmalloc: Add override for lazy vunmap Add an interface to disable lazy vunmap by forcing the threshold to zero. This might be interesting for debugging/testing in general, but primarily helps a horrible situation which needs to guarantee that vmalloc aliases are up-to-date from atomic context, wherein the only practical solution is to never let them get stale in the first place. Bug: 223346425 (cherry picked from commit 2a34c1503b85f49dd472dfd932dfcd16cab8ee8a https://git.gitlab.arm.com/linux-arm/linux-rm.git arm64/2454944-dev) Change-Id: I12fbbe3903f76a028ceea91ed078f0de2abe3815 Signed-off-by: Robin Murphy [BM: Convert to a flag that can be explicitly modified at runtime instead of relying on arch specific bits] Signed-off-by: Beata Michalska --- include/linux/vmalloc.h | 3 +++ mm/vmalloc.c | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 0549ca17ba6f..167a9533d951 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -245,4 +245,7 @@ pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms) int register_vmap_purge_notifier(struct notifier_block *nb); int unregister_vmap_purge_notifier(struct notifier_block *nb); +/* Allow disabling lazy TLB flushing */ +extern bool lazy_vunmap_enable; + #endif /* _LINUX_VMALLOC_H */ diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 817a472ee30f..3b56c30a8e93 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1277,6 +1277,7 @@ int unregister_vmap_purge_notifier(struct notifier_block *nb) } EXPORT_SYMBOL_GPL(unregister_vmap_purge_notifier); +bool lazy_vunmap_enable __read_mostly = true; /* * lazy_max_pages is the maximum amount of virtual address space we gather up * before attempting to purge with a TLB flush. @@ -1297,6 +1298,9 @@ static unsigned long lazy_max_pages(void) { unsigned int log; + if (!lazy_vunmap_enable) + return 0; + log = fls(num_online_cpus()); return log * (32UL * 1024 * 1024 / PAGE_SIZE); From 3c75a6fb7fd1cd149d7af1a2e200b064a2d12782 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 25 Apr 2022 17:53:55 +0100 Subject: [PATCH 1444/1477] ANDROID: arm64: Work around Cortex-A510 erratum 2454944 Cortex-A510 erratum 2454944 may cause clean cache lines to be erroneously written back to memory, breaking the assumptions we rely on for non-coherent DMA. Try to mitigate this by implementing special DMA ops that do their best to avoid cacheable aliases via a combination of bounce-buffering and manipulating the linear map directly, to minimise the chance of DMA-mapped pages being speculated back into caches. The other main concern is initial entry, where cache lines covering the kernel image might potentially become affected between being cleaned by the bootloader and the kernel being called, which might require additional cache maintenance from the bootloader to be safe in that regard too. Cortex-A510 supports S2FWB, so KVM should be unaffected. For the workaround to be applied, it needs to be explicitly requested through dedicated arm64_noalias_setup_dma_ops callback. Bug: 223346425 (cherry picked from commit 683efc5fc6eeb653caf85c33a2fb92a33c8faa75 https://git.gitlab.arm.com/linux-arm/linux-rm.git arm64/2454944-dev) Change-Id: If76b97dc39c278edb80f9b750129975ab2ac563e Signed-off-by: Robin Murphy [BM: Stripping-down the original solution by removing support for cpu capabilities and ammending relevant bits, with the final version being reduced to dedicated DMA ops with dependencies on rodata_full being enabled (CONFIG_RODATA_FULL_DEFAULT_ENABLED), swiotlb late init and disabling lazy tlb flushing. Also, as a consequence, reducing debugging support.] Signed-off-by: Beata Michalska --- arch/arm64/Kconfig | 23 + arch/arm64/include/asm/dma-mapping-noalias.h | 15 + arch/arm64/mm/Makefile | 2 + arch/arm64/mm/dma-mapping-noalias.c | 576 +++++++++++++++++++ 4 files changed, 616 insertions(+) create mode 100644 arch/arm64/include/asm/dma-mapping-noalias.h create mode 100644 arch/arm64/mm/dma-mapping-noalias.c diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 4b8b2e0ecfab..88ac9f0b1462 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -729,6 +729,29 @@ config ARM64_ERRATUM_2067961 If unsure, say Y. +config ARM64_ERRATUM_2454944 + bool "Cortex-A510: 2454944: Unmodified cache line might be written back to memory" + select ARCH_HAS_TEARDOWN_DMA_OPS + select RODATA_FULL_DEFAULT_ENABLED + help + This option adds the workaround for ARM Cortex-A510 erratum 2454944. + + Affected Cortex-A510 core might write unmodified cache lines back to + memory, which breaks the assumptions upon which software coherency + management for non-coherent DMA relies. If a cache line is + speculatively fetched while a non-coherent device is writing directly + to DRAM, and subsequently written back by natural eviction, data + written by the device in the intervening period can be lost. + + The workaround is to enforce as far as reasonably possible that all + non-coherent DMA transfers are bounced and/or remapped to minimise + the chance that any Cacheable alias exists through which speculative + cache fills could occur. To further improve effectiveness of + the workaround, lazy TLB flushing should be disabled. + + This is quite involved and has unavoidable performance impact on + affected systems. + config ARM64_ERRATUM_2457168 bool "Cortex-A510: 2457168: workaround for AMEVCNTR01 incrementing incorrectly" depends on ARM64_AMU_EXTN diff --git a/arch/arm64/include/asm/dma-mapping-noalias.h b/arch/arm64/include/asm/dma-mapping-noalias.h new file mode 100644 index 000000000000..f71300626167 --- /dev/null +++ b/arch/arm64/include/asm/dma-mapping-noalias.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2022-2023 ARM Ltd. + */ +#ifndef __ASM_DMA_MAPPING_NOALIAS_H +#define __ASM_DMA_MAPPING_NOALIAS_H + +#ifdef CONFIG_ARM64_ERRATUM_2454944 +void arm64_noalias_setup_dma_ops(struct device *dev); +#else +static inline void arm64_noalias_setup_dma_ops(struct device *dev) +{ +} +#endif +#endif /* __ASM_DMA_MAPPING_NOALIAS_H */ diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile index 5ead3c3de3b6..1d61971bcde0 100644 --- a/arch/arm64/mm/Makefile +++ b/arch/arm64/mm/Makefile @@ -13,3 +13,5 @@ KASAN_SANITIZE_physaddr.o += n obj-$(CONFIG_KASAN) += kasan_init.o KASAN_SANITIZE_kasan_init.o := n + +obj-$(CONFIG_ARM64_ERRATUM_2454944) += dma-mapping-noalias.o diff --git a/arch/arm64/mm/dma-mapping-noalias.c b/arch/arm64/mm/dma-mapping-noalias.c new file mode 100644 index 000000000000..5a9bba17af22 --- /dev/null +++ b/arch/arm64/mm/dma-mapping-noalias.c @@ -0,0 +1,576 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Support for uncached DMA mappings. + * Part of Cortex-A510 erratum 2454944 workaround. + * + * Copyright (C) 2022-2023 ARM Ltd. + * Author: Robin Murphy + * Activating swiotlb + disabling lazy vunmap: Beata Michalska + */ +#include +#include +#include +#include +#include +#include +#include + +/* + * Bits [58:55] of the translation table descriptor are being reserved + * by the architecture for software use purposes. With the assumption that + * those should not be used on linear map addresses (which is not without + * any guarantee though), those bits are being leveraged to trace potential + * cacheable aliases. This is still far from being perfect, to say at least: + * ... categorically the worst, but oh well, needs must... + */ +#define REFCOUNT_INC BIT(55) +#define PTE_REFCOUNT(pte) (((pte) >> 55) & 0xf) + +static int pte_set_nc(pte_t *ptep, unsigned long addr, void *data) +{ + pteval_t old_pte, new_pte, pte; + unsigned int refcount; + + pte = pte_val(READ_ONCE(*ptep)); + do { + /* Avoid racing against the transient invalid state */ + old_pte = pte | PTE_VALID; + new_pte = old_pte + REFCOUNT_INC; + refcount = PTE_REFCOUNT(pte); + if (WARN_ON(refcount == 15)) + return -EINVAL; + if (refcount == 0) { + new_pte &= ~(PTE_ATTRINDX_MASK | PTE_VALID); + new_pte |= PTE_ATTRINDX(MT_NORMAL_NC); + } + pte = cmpxchg_relaxed(&pte_val(*ptep), old_pte, new_pte); + } while (pte != old_pte); + + *(unsigned int *)data = refcount; + if (refcount) + return 0; + + flush_tlb_kernel_range(addr, addr + PAGE_SIZE); + WRITE_ONCE(*ptep, __pte(new_pte | PTE_VALID)); + return 0; +} + +static int pte_clear_nc(pte_t *ptep, unsigned long addr, void *data) +{ + pteval_t old_pte, new_pte, pte; + unsigned int refcount; + + pte = pte_val(READ_ONCE(*ptep)); + do { + old_pte = pte | PTE_VALID; + new_pte = old_pte - REFCOUNT_INC; + refcount = PTE_REFCOUNT(pte); + if (WARN_ON(refcount == 0)) + return -EINVAL; + if (refcount == 1) { + new_pte &= ~(PTE_ATTRINDX_MASK | PTE_VALID); + new_pte |= PTE_ATTRINDX(MT_NORMAL_TAGGED); + } + pte = cmpxchg_relaxed(&pte_val(*ptep), old_pte, new_pte); + } while (pte != old_pte); + + if (refcount > 1) + return 0; + + flush_tlb_kernel_range(addr, addr + PAGE_SIZE); + WRITE_ONCE(*ptep, __pte(new_pte | PTE_VALID)); + return 0; +} + +static int set_nc(void *addr, size_t size) +{ + unsigned int count; + int ret = apply_to_existing_page_range(&init_mm, (unsigned long)addr, + size, pte_set_nc, &count); + + WARN_RATELIMIT(count == 0 && page_mapped(virt_to_page(addr)), + "changing linear mapping but cacheable aliases may still exist\n"); + dsb(ishst); + isb(); + __flush_dcache_area(addr, size); + return ret; +} + +static int clear_nc(void *addr, size_t size) +{ + int ret = apply_to_existing_page_range(&init_mm, (unsigned long)addr, + size, pte_clear_nc, NULL); + dsb(ishst); + isb(); + __inval_dcache_area(addr, size); + return ret; +} + +static phys_addr_t __arm64_noalias_map(struct device *dev, phys_addr_t phys, + size_t size, enum dma_data_direction dir, + unsigned long attrs, bool bounce) +{ + bounce = bounce || (phys | size) & ~PAGE_MASK; + if (bounce) { + phys = swiotlb_tbl_map_single(dev, phys, size, PAGE_ALIGN(size), + dir, attrs); + if (phys == DMA_MAPPING_ERROR) + return DMA_MAPPING_ERROR; + } + if (set_nc(phys_to_virt(phys & PAGE_MASK), PAGE_ALIGN(size))) + goto out_unmap; + + return phys; +out_unmap: + if (bounce) + swiotlb_tbl_unmap_single(dev, phys, size, PAGE_ALIGN(size), dir, + attrs | DMA_ATTR_SKIP_CPU_SYNC); + return DMA_MAPPING_ERROR; + +} + +static void __arm64_noalias_unmap(struct device *dev, phys_addr_t phys, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + clear_nc(phys_to_virt(phys & PAGE_MASK), PAGE_ALIGN(size)); + if (is_swiotlb_buffer(phys)) + swiotlb_tbl_unmap_single(dev, phys, size, PAGE_ALIGN(size), dir, attrs); +} + +static void __arm64_noalias_sync_for_device(struct device *dev, phys_addr_t phys, + size_t size, enum dma_data_direction dir) +{ + if (is_swiotlb_buffer(phys)) + swiotlb_tbl_sync_single(dev, phys, size, dir, SYNC_FOR_DEVICE); + else + arch_sync_dma_for_device(phys, size, dir); +} + +static void __arm64_noalias_sync_for_cpu(struct device *dev, phys_addr_t phys, + size_t size, enum dma_data_direction dir) +{ + if (is_swiotlb_buffer(phys)) + swiotlb_tbl_sync_single(dev, phys, size, dir, SYNC_FOR_CPU); + else + arch_sync_dma_for_cpu(phys, size, dir); +} + +static void *arm64_noalias_alloc(struct device *dev, size_t size, + dma_addr_t *dma_addr, gfp_t gfp, unsigned long attrs) +{ + struct page *page; + void *ret; + + if (attrs & DMA_ATTR_NO_WARN) + gfp |= __GFP_NOWARN; + + size = PAGE_ALIGN(size); + page = dma_direct_alloc_pages(dev, size, dma_addr, 0, gfp & ~__GFP_ZERO); + if (!page) + return NULL; + + ret = page_address(page); + if (set_nc(ret, size)) { + dma_direct_free_pages(dev, size, page, *dma_addr, 0); + return NULL; + } + return ret; +} + +static void arm64_noalias_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t dma_addr, unsigned long attrs) +{ + size = PAGE_ALIGN(size); + clear_nc(cpu_addr, size); + dma_direct_free_pages(dev, size, virt_to_page(cpu_addr), dma_addr, 0); +} + +static dma_addr_t arm64_noalias_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + phys_addr_t phys = page_to_phys(page) + offset; + bool bounce = !dma_capable(dev, phys_to_dma(dev, phys), size, true); + + if (!bounce && dir == DMA_TO_DEVICE) { + arch_sync_dma_for_device(phys, size, dir); + return phys_to_dma(dev, phys); + } + + bounce = bounce || page_mapped(page); + phys = __arm64_noalias_map(dev, phys, size, dir, attrs, bounce); + if (phys == DMA_MAPPING_ERROR) + return DMA_MAPPING_ERROR; + + return phys_to_dma(dev, phys); +} + +static void arm64_noalias_unmap_page(struct device *dev, dma_addr_t dma_addr, + size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + if (dir == DMA_TO_DEVICE) + return; + __arm64_noalias_unmap(dev, dma_to_phys(dev, dma_addr), size, dir, attrs); +} + +static void arm64_noalias_unmap_sg(struct device *dev, struct scatterlist *sgl, int nents, + enum dma_data_direction dir, unsigned long attrs) +{ + struct scatterlist *sg; + int i; + + if (dir == DMA_TO_DEVICE) + return; + for_each_sg(sgl, sg, nents, i) + __arm64_noalias_unmap(dev, dma_to_phys(dev, sg->dma_address), + sg->length, dir, attrs); +} + +static int arm64_noalias_map_sg(struct device *dev, struct scatterlist *sgl, int nents, + enum dma_data_direction dir, unsigned long attrs) +{ + int i; + struct scatterlist *sg; + + for_each_sg(sgl, sg, nents, i) { + sg->dma_address = arm64_noalias_map_page(dev, sg_page(sg), sg->offset, + sg->length, dir, attrs); + if (sg->dma_address == DMA_MAPPING_ERROR) + goto out_unmap; + sg->dma_length = sg->length; + } + + return nents; + +out_unmap: + arm64_noalias_unmap_sg(dev, sgl, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); + return 0; +} + +static void arm64_noalias_sync_single_for_device(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir) +{ + __arm64_noalias_sync_for_device(dev, dma_to_phys(dev, addr), size, dir); +} + +static void arm64_noalias_sync_single_for_cpu(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir) +{ + __arm64_noalias_sync_for_cpu(dev, dma_to_phys(dev, addr), size, dir); +} + +static void arm64_noalias_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir) +{ + struct scatterlist *sg; + int i; + + for_each_sg(sgl, sg, nents, i) + arm64_noalias_sync_single_for_device(dev, sg->dma_address, sg->length, dir); +} + +static void arm64_noalias_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir) +{ + struct scatterlist *sg; + int i; + + for_each_sg(sgl, sg, nents, i) + arm64_noalias_sync_single_for_cpu(dev, sg->dma_address, sg->length, dir); +} + +static const struct dma_map_ops arm64_noalias_ops = { + .alloc = arm64_noalias_alloc, + .free = arm64_noalias_free, + .alloc_pages = dma_common_alloc_pages, + .free_pages = dma_common_free_pages, + .mmap = dma_common_mmap, + .get_sgtable = dma_common_get_sgtable, + .map_page = arm64_noalias_map_page, + .unmap_page = arm64_noalias_unmap_page, + .map_sg = arm64_noalias_map_sg, + .unmap_sg = arm64_noalias_unmap_sg, + .sync_single_for_cpu = arm64_noalias_sync_single_for_cpu, + .sync_single_for_device = arm64_noalias_sync_single_for_device, + .sync_sg_for_cpu = arm64_noalias_sync_sg_for_cpu, + .sync_sg_for_device = arm64_noalias_sync_sg_for_device, + .dma_supported = dma_direct_supported, + .get_required_mask = dma_direct_get_required_mask, + .max_mapping_size = swiotlb_max_mapping_size, +}; + +#ifdef CONFIG_IOMMU_DMA +static const struct dma_map_ops *iommu_dma_ops; + +static void *arm64_iommu_alloc(struct device *dev, size_t size, + dma_addr_t *dma_addr, gfp_t gfp, unsigned long attrs) +{ + struct page **pages; + void *ret; + int i; + + size = PAGE_ALIGN(size); + if (!gfpflags_allow_blocking(gfp) || (attrs & DMA_ATTR_FORCE_CONTIGUOUS)) { + ret = dma_common_alloc_pages(dev, size, dma_addr, 0, gfp); + return ret ? page_address(ret) : NULL; + } + + ret = iommu_dma_ops->alloc(dev, size, dma_addr, gfp, attrs); + if (ret) { + pages = dma_common_find_pages(ret); + for (i = 0; i < size / PAGE_SIZE; i++) + if (set_nc(page_address(pages[i]), PAGE_SIZE)) + goto err; + } + return ret; + +err: + while (i--) + clear_nc(page_address(pages[i]), PAGE_SIZE); + iommu_dma_ops->free(dev, size, ret, *dma_addr, attrs); + return NULL; +} + +static void arm64_iommu_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t dma_addr, unsigned long attrs) +{ + struct page **pages = dma_common_find_pages(cpu_addr); + int i; + + size = PAGE_ALIGN(size); + if (!pages) + return dma_common_free_pages(dev, size, virt_to_page(cpu_addr), dma_addr, 0); + + for (i = 0; i < size / PAGE_SIZE; i++) + clear_nc(page_address(pages[i]), PAGE_SIZE); + iommu_dma_ops->free(dev, size, cpu_addr, dma_addr, attrs); +} + +static dma_addr_t arm64_iommu_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + phys_addr_t phys = page_to_phys(page) + offset; + dma_addr_t ret; + + if (dir == DMA_TO_DEVICE) + return iommu_dma_ops->map_page(dev, page, offset, size, dir, attrs); + + phys = __arm64_noalias_map(dev, phys, size, dir, attrs, page_mapped(page)); + if (phys == DMA_MAPPING_ERROR) + return DMA_MAPPING_ERROR; + + attrs |= DMA_ATTR_SKIP_CPU_SYNC; + ret = iommu_dma_ops->map_page(dev, phys_to_page(phys), offset_in_page(phys), + size, dir, attrs); + if (ret == DMA_MAPPING_ERROR) + __arm64_noalias_unmap(dev, phys, size, dir, attrs); + return ret; +} + +static void arm64_iommu_unmap_page(struct device *dev, dma_addr_t addr, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + phys_addr_t phys; + + if (dir == DMA_TO_DEVICE) + return iommu_dma_ops->unmap_page(dev, addr, size, dir, attrs); + + phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), addr); + iommu_dma_ops->unmap_page(dev, addr, size, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); + __arm64_noalias_unmap(dev, phys, size, dir, attrs); +} + +static int arm64_iommu_map_sg(struct device *dev, struct scatterlist *sgl, int nents, + enum dma_data_direction dir, unsigned long attrs) +{ + int i, ret; + struct scatterlist *sg; + phys_addr_t *orig_phys; + + if (dir == DMA_TO_DEVICE) + return iommu_dma_ops->map_sg(dev, sgl, nents, dir, attrs); + + orig_phys = kmalloc_array(nents, sizeof(*orig_phys), GFP_ATOMIC); + if (!orig_phys) + return 0; + + for_each_sg(sgl, sg, nents, i) { + phys_addr_t phys = sg_phys(sg); + /* + * Note we do not have the page_mapped() check here, since + * bouncing plays complete havoc with dma-buf imports. Those + * may well be mapped in userspace, but we hope and pray that + * it's via dma_mmap_attrs() so any such mappings are safely + * non-cacheable. DO NOT allow a block device or other similar + * scatterlist user to get here (disable IOMMUs if necessary), + * since we can't mitigate for both conflicting use-cases. + */ + phys = __arm64_noalias_map(dev, phys, sg->length, dir, attrs, false); + if (phys == DMA_MAPPING_ERROR) + goto out_unmap; + + orig_phys[i] = sg_phys(sg); + sg_assign_page(sg, phys_to_page(phys)); + sg->offset = offset_in_page(phys); + } + ret = iommu_dma_ops->map_sg(dev, sgl, nents, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); + if (ret <= 0) + goto out_unmap; + + for_each_sg(sgl, sg, nents, i) { + sg_assign_page(sg, phys_to_page(orig_phys[i])); + sg->offset = offset_in_page(orig_phys[i]); + } + + kfree(orig_phys); + return ret; + +out_unmap: + for_each_sg(sgl, sg, nents, i) { + __arm64_noalias_unmap(dev, sg_phys(sg), sg->length, dir, attrs); + sg_assign_page(sg, phys_to_page(orig_phys[i])); + sg->offset = offset_in_page(orig_phys[i]); + } + kfree(orig_phys); + return 0; +} + +static void arm64_iommu_unmap_sg(struct device *dev, struct scatterlist *sgl, int nents, + enum dma_data_direction dir, unsigned long attrs) +{ + struct iommu_domain *domain; + struct scatterlist *sg, *tmp; + dma_addr_t iova; + int i; + + if (dir == DMA_TO_DEVICE) + return iommu_dma_ops->unmap_sg(dev, sgl, nents, dir, attrs); + + domain = iommu_get_dma_domain(dev); + iova = sgl->dma_address; + tmp = sgl; + for_each_sg(sgl, sg, nents, i) { + phys_addr_t phys = iommu_iova_to_phys(domain, iova); + + __arm64_noalias_unmap(dev, phys, sg->length, dir, attrs); + iova += sg->length; + if (iova == tmp->dma_address + tmp->dma_length && !sg_is_last(tmp)) { + tmp = sg_next(tmp); + iova = tmp->dma_address; + } + } + iommu_dma_ops->unmap_sg(dev, sgl, nents, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); +} + +static void arm64_iommu_sync_single_for_device(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir) +{ + phys_addr_t phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), addr); + + __arm64_noalias_sync_for_device(dev, phys, size, dir); +} + +static void arm64_iommu_sync_single_for_cpu(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir) +{ + phys_addr_t phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), addr); + + __arm64_noalias_sync_for_cpu(dev, phys, size, dir); +} + +static void arm64_iommu_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir) +{ + struct iommu_domain *domain = iommu_get_dma_domain(dev); + struct scatterlist *sg, *tmp = sgl; + dma_addr_t iova = sgl->dma_address; + int i; + + for_each_sg(sgl, sg, nents, i) { + phys_addr_t phys = iommu_iova_to_phys(domain, iova); + + __arm64_noalias_sync_for_device(dev, phys, sg->length, dir); + iova += sg->length; + if (iova == tmp->dma_address + tmp->dma_length && !sg_is_last(tmp)) { + tmp = sg_next(tmp); + iova = tmp->dma_address; + } + } +} + +static void arm64_iommu_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir) +{ + struct iommu_domain *domain = iommu_get_dma_domain(dev); + struct scatterlist *sg, *tmp = sgl; + dma_addr_t iova = sgl->dma_address; + int i; + + for_each_sg(sgl, sg, nents, i) { + phys_addr_t phys = iommu_iova_to_phys(domain, iova); + + __arm64_noalias_sync_for_cpu(dev, phys, sg->length, dir); + iova += sg->length; + if (iova == tmp->dma_address + tmp->dma_length && !sg_is_last(tmp)) { + tmp = sg_next(tmp); + iova = tmp->dma_address; + } + } +} + +static struct dma_map_ops arm64_iommu_ops = { + .alloc = arm64_iommu_alloc, + .free = arm64_iommu_free, + .alloc_pages = dma_common_alloc_pages, + .free_pages = dma_common_free_pages, + .map_page = arm64_iommu_map_page, + .unmap_page = arm64_iommu_unmap_page, + .map_sg = arm64_iommu_map_sg, + .unmap_sg = arm64_iommu_unmap_sg, + .sync_single_for_cpu = arm64_iommu_sync_single_for_cpu, + .sync_single_for_device = arm64_iommu_sync_single_for_device, + .sync_sg_for_cpu = arm64_iommu_sync_sg_for_cpu, + .sync_sg_for_device = arm64_iommu_sync_sg_for_device, +}; + +#endif /* CONFIG_IOMMU_DMA */ + +static inline void arm64_noalias_prepare(void) +{ + if (!is_swiotlb_active()) + swiotlb_late_init_with_default_size(swiotlb_size_or_default()); + if (lazy_vunmap_enable) { + lazy_vunmap_enable = false; + vm_unmap_aliases(); + } +} + +void arm64_noalias_setup_dma_ops(struct device *dev) +{ + if (dev_is_dma_coherent(dev)) + return; + + dev_info(dev, "applying no-alias DMA workaround\n"); + if (!dev->dma_ops) { + dev->dma_ops = &arm64_noalias_ops; + goto done; + } + + if (IS_ENABLED(CONFIG_IOMMU_DMA)) { + dev->dma_ops = &arm64_iommu_ops; + if (iommu_dma_ops) + goto done; + + iommu_dma_ops = dev->dma_ops; + arm64_iommu_ops.mmap = iommu_dma_ops->mmap; + arm64_iommu_ops.get_sgtable = iommu_dma_ops->get_sgtable; + arm64_iommu_ops.map_resource = iommu_dma_ops->map_resource; + arm64_iommu_ops.unmap_resource = iommu_dma_ops->unmap_resource; + arm64_iommu_ops.get_merge_boundary = iommu_dma_ops->get_merge_boundary; + } +done: + arm64_noalias_prepare(); +} +EXPORT_SYMBOL_GPL(arm64_noalias_setup_dma_ops); From 9d2ec2e0b69e81d5925e363f3a4e0b6cfaefb7e7 Mon Sep 17 00:00:00 2001 From: Beata Michalska Date: Tue, 20 Dec 2022 20:29:23 +0000 Subject: [PATCH 1445/1477] ANDROID: dma-ops: Add restricted vendor hook Add a vendor hook to arch_setup_dma_ops to allow vendors to perform any necessary post-actions on setting up DMA ops for a given device, focusing mainly on enabling those to opt-in for the Cortex-A510 erratum 2454944. Bug: 263236925 Change-Id: I6fd4d3a30829437fc113ec15ca2e5d060a38e60c Signed-off-by: Beata Michalska --- arch/arm64/mm/dma-mapping.c | 4 ++++ drivers/android/vendor_hooks.c | 2 ++ include/trace/hooks/dma_noalias.h | 19 +++++++++++++++++++ 3 files changed, 25 insertions(+) create mode 100644 include/trace/hooks/dma_noalias.h diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index e75e5e75b192..19ceea8fea1a 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -11,6 +11,7 @@ #include #include #include +#include #include @@ -56,6 +57,9 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, trace_android_rvh_iommu_setup_dma_ops(dev, dma_base, size); } + /* Allow vendor modules to opt-in for the 2454944 erratum workaround */ + trace_android_rvh_setup_dma_ops(dev); + #ifdef CONFIG_XEN if (xen_initial_domain()) dev->dma_ops = &xen_swiotlb_dma_ops; diff --git a/drivers/android/vendor_hooks.c b/drivers/android/vendor_hooks.c index 3ff54d2aa04d..18ef4406879f 100644 --- a/drivers/android/vendor_hooks.c +++ b/drivers/android/vendor_hooks.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -210,6 +211,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_die_kernel_fault); EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_do_sea); EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_do_mem_abort); EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_do_sp_pc_abort); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_setup_dma_ops); EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_iommu_setup_dma_ops); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_iommu_setup_dma_ops); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_iommu_alloc_iova); diff --git a/include/trace/hooks/dma_noalias.h b/include/trace/hooks/dma_noalias.h new file mode 100644 index 000000000000..2dc7b04d9ab2 --- /dev/null +++ b/include/trace/hooks/dma_noalias.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM dma_noalias + +#define TRACE_INCLUDE_PATH trace/hooks + +#if !defined(_TRACE_HOOK_DMA_NOALIAS_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_HOOK_DMA_NOALIAS_H + +#include + +DECLARE_RESTRICTED_HOOK(android_rvh_setup_dma_ops, + TP_PROTO(struct device *dev), + TP_ARGS(dev), 1); + +#endif /*_TRACE_HOOK_DMA_NOALIAS_H */ + +/* This part must be outside protection */ +#include From d40d310e5ea48fda20e144621a2abe0aa61cf39c Mon Sep 17 00:00:00 2001 From: Beata Michalska Date: Thu, 12 Jan 2023 10:21:26 +0000 Subject: [PATCH 1446/1477] ANDROID: GKI: Enable ARM64_ERRATUM_2454944 Enable workaround for Cortex-A510 erratum 2454944. Bug: 223346425 Change-Id: Ieb60640b26cd2093702045670890b6a204277cca Signed-off-by: Beata Michalska --- arch/arm64/configs/gki_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/configs/gki_defconfig b/arch/arm64/configs/gki_defconfig index 2ff6b13c9fe6..d6adcfbd974d 100644 --- a/arch/arm64/configs/gki_defconfig +++ b/arch/arm64/configs/gki_defconfig @@ -51,6 +51,7 @@ CONFIG_ARCH_SUNXI=y CONFIG_ARCH_HISI=y CONFIG_ARCH_QCOM=y CONFIG_ARCH_SPRD=y +CONFIG_ARM64_ERRATUM_2454944=y CONFIG_SCHED_MC=y CONFIG_NR_CPUS=32 CONFIG_PARAVIRT=y From 345ec059db55e8a5e4086af2035e014a456df759 Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Tue, 10 Jan 2023 08:46:47 -0800 Subject: [PATCH 1447/1477] UPSTREAM: io_uring: Fix unsigned 'res' comparison with zero in io_fixup_rw_res() Smatch warning: io_fixup_rw_res() warn: unsigned 'res' is never less than zero. Change type of 'res' from unsigned to long. Fixes: d6b7efc722a2 ("io_uring/rw: fix error'ed retry return values") Signed-off-by: Harshit Mogalapalli Signed-off-by: Greg Kroah-Hartman Bug: 268174392 (cherry picked from commit 07b3672c4090d1e8147b1ae9c2546d8a8d8e13ae) Signed-off-by: Greg Kroah-Hartman Change-Id: I3534398af5e77e92a1ac48170e3ae4dffa42463b --- io_uring/io_uring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 35a9bcd5648b..5947c8140229 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -2712,7 +2712,7 @@ static bool __io_complete_rw_common(struct io_kiocb *req, long res) return false; } -static inline int io_fixup_rw_res(struct io_kiocb *req, unsigned res) +static inline int io_fixup_rw_res(struct io_kiocb *req, long res) { struct io_async_rw *io = req->async_data; From 3b30dcbf0c741f580d13c096f2515e7a6a447ae6 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 2 Jan 2023 16:49:46 -0700 Subject: [PATCH 1448/1477] UPSTREAM: io_uring/io-wq: free worker if task_work creation is canceled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit af82425c6a2d2f347c79b63ce74fca6dc6be157f upstream. If we cancel the task_work, the worker will never come into existance. As this is the last reference to it, ensure that we get it freed appropriately. Cc: stable@vger.kernel.org Reported-by: 진호 Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman Bug: 268174392 (cherry picked from commit b912ed1363b363d587fe1aac9636ce9e9366a775) Signed-off-by: Greg Kroah-Hartman Change-Id: Iacfd7a5db15c417fd1f02c85e414e3137e8729ec --- io_uring/io-wq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c index 6031fb319d87..81b5e4462dba 100644 --- a/io_uring/io-wq.c +++ b/io_uring/io-wq.c @@ -1217,6 +1217,7 @@ static void io_wq_cancel_tw_create(struct io_wq *wq) worker = container_of(cb, struct io_worker, create_work); io_worker_cancel_cb(worker); + kfree(worker); } } From 95a85d74bfb1baa764deb3be2cade573f72729d0 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 8 Jan 2023 10:39:17 -0700 Subject: [PATCH 1449/1477] UPSTREAM: io_uring/io-wq: only free worker if it was allocated for creation commit e6db6f9398dadcbc06318a133d4c44a2d3844e61 upstream. We have two types of task_work based creation, one is using an existing worker to setup a new one (eg when going to sleep and we have no free workers), and the other is allocating a new worker. Only the latter should be freed when we cancel task_work creation for a new worker. Fixes: af82425c6a2d ("io_uring/io-wq: free worker if task_work creation is canceled") Reported-by: syzbot+d56ec896af3637bdb7e4@syzkaller.appspotmail.com Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman Bug: 268174392 (cherry picked from commit a88a0d16e18f18d7221f0c5a1fdf487f5f9079e3) Signed-off-by: Greg Kroah-Hartman Change-Id: I75c9b22dce02151b2687cf90d6c5b74c08d0f04b --- io_uring/io-wq.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c index 81b5e4462dba..87bc38b47103 100644 --- a/io_uring/io-wq.c +++ b/io_uring/io-wq.c @@ -1217,7 +1217,12 @@ static void io_wq_cancel_tw_create(struct io_wq *wq) worker = container_of(cb, struct io_worker, create_work); io_worker_cancel_cb(worker); - kfree(worker); + /* + * Only the worker continuation helper has worker allocated and + * hence needs freeing. + */ + if (cb->func == create_worker_cont) + kfree(worker); } } From 786944d74f6bafea48ed17c112dc0c91ff45eb87 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 20 Jan 2023 20:50:24 -0700 Subject: [PATCH 1450/1477] UPSTREAM: io_uring: don't gate task_work run on TIF_NOTIFY_SIGNAL commit 46a525e199e4037516f7e498c18f065b09df32ac upstream. This isn't a reliable mechanism to tell if we have task_work pending, we really should be looking at whether we have any items queued. This is problematic if forward progress is gated on running said task_work. One such example is reading from a pipe, where the write side has been closed right before the read is started. The fput() of the file queues TWA_RESUME task_work, and we need that task_work to be run before ->release() is called for the pipe. If ->release() isn't called, then the read will sit forever waiting on data that will never arise. Fix this by io_run_task_work() so it checks if we have task_work pending rather than rely on TIF_NOTIFY_SIGNAL for that. The latter obviously doesn't work for task_work that is queued without TWA_SIGNAL. Reported-by: Christiano Haesbaert Cc: stable@vger.kernel.org Link: https://github.com/axboe/liburing/issues/665 Change-Id: I042b07491afac06692639d91bdf7dd21a2405651 Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Bug: 268174392 (cherry picked from commit 2fd232bbd66fafdf7af3cc155c7bb82d96d07bc9) Signed-off-by: Greg Kroah-Hartman --- io_uring/io-wq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c index 87bc38b47103..81485c1a9879 100644 --- a/io_uring/io-wq.c +++ b/io_uring/io-wq.c @@ -513,7 +513,7 @@ static struct io_wq_work *io_get_next_work(struct io_wqe_acct *acct, static bool io_flush_signals(void) { - if (unlikely(test_thread_flag(TIF_NOTIFY_SIGNAL))) { + if (test_thread_flag(TIF_NOTIFY_SIGNAL) || current->task_works) { __set_current_state(TASK_RUNNING); tracehook_notify_signal(); return true; From ad4691fb742d76b51af296c489cb7495652db4ac Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 23 Nov 2021 00:07:47 +0000 Subject: [PATCH 1451/1477] UPSTREAM: io_uring: improve send/recv error handling commit 7297ce3d59449de49d3c9e1f64ae25488750a1fc upstream. Hide all error handling under common if block, removes two extra ifs on the success path and keeps the handling more condensed. Change-Id: If6864c8ddd06bc853cef6b543fc06cf99d9ad147 Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/5761545158a12968f3caf30f747eea65ed75dfc1.1637524285.git.asml.silence@gmail.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Bug: 268174392 (cherry picked from commit abdc16c8361b1420c1228068e0314b981a48bdcd) Signed-off-by: Greg Kroah-Hartman --- io_uring/io_uring.c | 55 +++++++++++++++++++++++++-------------------- 1 file changed, 31 insertions(+), 24 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 5947c8140229..7672f2068be5 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -4716,17 +4716,18 @@ static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) min_ret = iov_iter_count(&kmsg->msg.msg_iter); ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); - if ((issue_flags & IO_URING_F_NONBLOCK) && ret == -EAGAIN) - return io_setup_async_msg(req, kmsg); - if (ret == -ERESTARTSYS) - ret = -EINTR; + if (ret < min_ret) { + if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) + return io_setup_async_msg(req, kmsg); + if (ret == -ERESTARTSYS) + ret = -EINTR; + req_set_fail(req); + } /* fast path, check for non-NULL to avoid function call */ if (kmsg->free_iov) kfree(kmsg->free_iov); req->flags &= ~REQ_F_NEED_CLEANUP; - if (ret < min_ret) - req_set_fail(req); __io_req_complete(req, issue_flags, ret, 0); return 0; } @@ -4762,13 +4763,13 @@ static int io_send(struct io_kiocb *req, unsigned int issue_flags) msg.msg_flags = flags; ret = sock_sendmsg(sock, &msg); - if ((issue_flags & IO_URING_F_NONBLOCK) && ret == -EAGAIN) - return -EAGAIN; - if (ret == -ERESTARTSYS) - ret = -EINTR; - - if (ret < min_ret) + if (ret < min_ret) { + if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) + return -EAGAIN; + if (ret == -ERESTARTSYS) + ret = -EINTR; req_set_fail(req); + } __io_req_complete(req, issue_flags, ret, 0); return 0; } @@ -4955,10 +4956,15 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) ret = __sys_recvmsg_sock(sock, &kmsg->msg, req->sr_msg.umsg, kmsg->uaddr, flags); - if (force_nonblock && ret == -EAGAIN) - return io_setup_async_msg(req, kmsg); - if (ret == -ERESTARTSYS) - ret = -EINTR; + if (ret < min_ret) { + if (ret == -EAGAIN && force_nonblock) + return io_setup_async_msg(req, kmsg); + if (ret == -ERESTARTSYS) + ret = -EINTR; + req_set_fail(req); + } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { + req_set_fail(req); + } if (req->flags & REQ_F_BUFFER_SELECTED) cflags = io_put_recv_kbuf(req); @@ -4966,8 +4972,6 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) if (kmsg->free_iov) kfree(kmsg->free_iov); req->flags &= ~REQ_F_NEED_CLEANUP; - if (ret < min_ret || ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC)))) - req_set_fail(req); __io_req_complete(req, issue_flags, ret, cflags); return 0; } @@ -5014,15 +5018,18 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags) min_ret = iov_iter_count(&msg.msg_iter); ret = sock_recvmsg(sock, &msg, flags); - if (force_nonblock && ret == -EAGAIN) - return -EAGAIN; - if (ret == -ERESTARTSYS) - ret = -EINTR; out_free: + if (ret < min_ret) { + if (ret == -EAGAIN && force_nonblock) + return -EAGAIN; + if (ret == -ERESTARTSYS) + ret = -EINTR; + req_set_fail(req); + } else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { + req_set_fail(req); + } if (req->flags & REQ_F_BUFFER_SELECTED) cflags = io_put_recv_kbuf(req); - if (ret < min_ret || ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC)))) - req_set_fail(req); __io_req_complete(req, issue_flags, ret, cflags); return 0; } From 3a74ee4aa1c52cb574b6373d3feb935b06ad2c60 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 21 Jan 2023 10:21:22 -0700 Subject: [PATCH 1452/1477] UPSTREAM: io_uring: ensure recv and recvmsg handle MSG_WAITALL correctly commit 7ba89d2af17aa879dda30f5d5d3f152e587fc551 upstream. We currently don't attempt to get the full asked for length even if MSG_WAITALL is set, if we get a partial receive. If we do see a partial receive, then just note how many bytes we did and return -EAGAIN to get it retried. The iov is advanced appropriately for the vector based case, and we manually bump the buffer and remainder for the non-vector case. Cc: stable@vger.kernel.org Reported-by: Constantine Gavrilov Change-Id: I618bde7c86b29f6053dd8cd19682f2916e57dd54 Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Bug: 268174392 (cherry picked from commit aadd9b093018183c7895aa07395b786bc71c3d31) Signed-off-by: Greg Kroah-Hartman --- io_uring/io_uring.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 7672f2068be5..7855e557f9ae 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -588,6 +588,7 @@ struct io_sr_msg { int msg_flags; int bgid; size_t len; + size_t done_io; struct io_buffer *kbuf; }; @@ -4913,12 +4914,21 @@ static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (req->ctx->compat) sr->msg_flags |= MSG_CMSG_COMPAT; #endif + sr->done_io = 0; return 0; } +static bool io_net_retry(struct socket *sock, int flags) +{ + if (!(flags & MSG_WAITALL)) + return false; + return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET; +} + static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) { struct io_async_msghdr iomsg, *kmsg; + struct io_sr_msg *sr = &req->sr_msg; struct socket *sock; struct io_buffer *kbuf; unsigned flags; @@ -4961,6 +4971,10 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) return io_setup_async_msg(req, kmsg); if (ret == -ERESTARTSYS) ret = -EINTR; + if (ret > 0 && io_net_retry(sock, flags)) { + sr->done_io += ret; + return io_setup_async_msg(req, kmsg); + } req_set_fail(req); } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { req_set_fail(req); @@ -4972,6 +4986,10 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) if (kmsg->free_iov) kfree(kmsg->free_iov); req->flags &= ~REQ_F_NEED_CLEANUP; + if (ret >= 0) + ret += sr->done_io; + else if (sr->done_io) + ret = sr->done_io; __io_req_complete(req, issue_flags, ret, cflags); return 0; } @@ -5024,12 +5042,22 @@ out_free: return -EAGAIN; if (ret == -ERESTARTSYS) ret = -EINTR; + if (ret > 0 && io_net_retry(sock, flags)) { + sr->len -= ret; + sr->buf += ret; + sr->done_io += ret; + return -EAGAIN; + } req_set_fail(req); } else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { req_set_fail(req); } if (req->flags & REQ_F_BUFFER_SELECTED) cflags = io_put_recv_kbuf(req); + if (ret >= 0) + ret += sr->done_io; + else if (sr->done_io) + ret = sr->done_io; __io_req_complete(req, issue_flags, ret, cflags); return 0; } From 1a9e93592acc316e6dbfb400589949257eb4a2d1 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 23 Mar 2022 09:30:05 -0600 Subject: [PATCH 1453/1477] UPSTREAM: io_uring: add flag for disabling provided buffer recycling commit 8a3e8ee56417f5e0e66580d93941ed9d6f4c8274 upstream. If we need to continue doing this IO, then we don't want a potentially selected buffer recycled. Add a flag for that. Set this for recv/recvmsg if they do partial IO. Change-Id: If9381bd6a5695c8c85c7a51c3adccc0dc09f8999 Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Bug: 268174392 (cherry picked from commit 96ccba4a1a59ae6bcfed405961dd399c74d0b2f8) Signed-off-by: Greg Kroah-Hartman --- io_uring/io_uring.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 7855e557f9ae..98948e4232a6 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -750,6 +750,7 @@ enum { REQ_F_CREDS_BIT, REQ_F_REFCOUNT_BIT, REQ_F_ARM_LTIMEOUT_BIT, + REQ_F_PARTIAL_IO_BIT, /* keep async read/write and isreg together and in order */ REQ_F_NOWAIT_READ_BIT, REQ_F_NOWAIT_WRITE_BIT, @@ -805,6 +806,8 @@ enum { REQ_F_REFCOUNT = BIT(REQ_F_REFCOUNT_BIT), /* there is a linked timeout that has to be armed */ REQ_F_ARM_LTIMEOUT = BIT(REQ_F_ARM_LTIMEOUT_BIT), + /* request has already done partial IO */ + REQ_F_PARTIAL_IO = BIT(REQ_F_PARTIAL_IO_BIT), }; struct async_poll { @@ -4973,6 +4976,7 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) ret = -EINTR; if (ret > 0 && io_net_retry(sock, flags)) { sr->done_io += ret; + req->flags |= REQ_F_PARTIAL_IO; return io_setup_async_msg(req, kmsg); } req_set_fail(req); @@ -5046,6 +5050,7 @@ out_free: sr->len -= ret; sr->buf += ret; sr->done_io += ret; + req->flags |= REQ_F_PARTIAL_IO; return -EAGAIN; } req_set_fail(req); From e5daff2236fb31dc0cded5b30c408fb3d9b4c111 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 20 Apr 2022 19:21:36 -0600 Subject: [PATCH 1454/1477] UPSTREAM: io_uring: support MSG_WAITALL for IORING_OP_SEND(MSG) commit 4c3c09439c08b03d9503df0ca4c7619c5842892e upstream. Like commit 7ba89d2af17a for recv/recvmsg, support MSG_WAITALL for the send side. If this flag is set and we do a short send, retry for a stream of seqpacket socket. Change-Id: If67a4462576af1b683d53d2dc0d46e44c9dd8863 Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Bug: 268174392 (cherry picked from commit f901b4bfd03ed234f72f28e27d965569531dc505) Signed-off-by: Greg Kroah-Hartman --- io_uring/io_uring.c | 36 +++++++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 98948e4232a6..ec8f05f27260 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -4627,6 +4627,13 @@ static int io_sync_file_range(struct io_kiocb *req, unsigned int issue_flags) } #if defined(CONFIG_NET) +static bool io_net_retry(struct socket *sock, int flags) +{ + if (!(flags & MSG_WAITALL)) + return false; + return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET; +} + static int io_setup_async_msg(struct io_kiocb *req, struct io_async_msghdr *kmsg) { @@ -4690,12 +4697,14 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (req->ctx->compat) sr->msg_flags |= MSG_CMSG_COMPAT; #endif + sr->done_io = 0; return 0; } static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) { struct io_async_msghdr iomsg, *kmsg; + struct io_sr_msg *sr = &req->sr_msg; struct socket *sock; unsigned flags; int min_ret = 0; @@ -4726,12 +4735,21 @@ static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) return io_setup_async_msg(req, kmsg); if (ret == -ERESTARTSYS) ret = -EINTR; + if (ret > 0 && io_net_retry(sock, flags)) { + sr->done_io += ret; + req->flags |= REQ_F_PARTIAL_IO; + return io_setup_async_msg(req, kmsg); + } req_set_fail(req); } /* fast path, check for non-NULL to avoid function call */ if (kmsg->free_iov) kfree(kmsg->free_iov); req->flags &= ~REQ_F_NEED_CLEANUP; + if (ret >= 0) + ret += sr->done_io; + else if (sr->done_io) + ret = sr->done_io; __io_req_complete(req, issue_flags, ret, 0); return 0; } @@ -4772,8 +4790,19 @@ static int io_send(struct io_kiocb *req, unsigned int issue_flags) return -EAGAIN; if (ret == -ERESTARTSYS) ret = -EINTR; + if (ret > 0 && io_net_retry(sock, flags)) { + sr->len -= ret; + sr->buf += ret; + sr->done_io += ret; + req->flags |= REQ_F_PARTIAL_IO; + return -EAGAIN; + } req_set_fail(req); } + if (ret >= 0) + ret += sr->done_io; + else if (sr->done_io) + ret = sr->done_io; __io_req_complete(req, issue_flags, ret, 0); return 0; } @@ -4921,13 +4950,6 @@ static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) return 0; } -static bool io_net_retry(struct socket *sock, int flags) -{ - if (!(flags & MSG_WAITALL)) - return false; - return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET; -} - static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) { struct io_async_msghdr iomsg, *kmsg; From 9d980b8ab001dbd07b857035bb6084821ffd5fce Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 21 Jan 2023 10:39:22 -0700 Subject: [PATCH 1455/1477] UPSTREAM: io_uring: allow re-poll if we made progress commit 10c873334febaeea9aa0c25c10b5ac0951b77a5f upstream. We currently check REQ_F_POLLED before arming async poll for a notification to retry. If it's set, then we don't allow poll and will punt to io-wq instead. This is done to prevent a situation where a buggy driver will repeatedly return that there's space/data available yet we get -EAGAIN. However, if we already transferred data, then it should be safe to rely on poll again. Gate the check on whether or not REQ_F_PARTIAL_IO is also set. Change-Id: I36b6d16ac43202fdf9ae5eea64f9dfbcfbe7fee5 Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Bug: 268174392 (cherry picked from commit 4bc17e6381bdbe7373ffccbda94db56631061a4a) Signed-off-by: Greg Kroah-Hartman --- io_uring/io_uring.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index ec8f05f27260..91a177a2e7da 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -5704,7 +5704,7 @@ static int io_arm_poll_handler(struct io_kiocb *req) if (!req->file || !file_can_poll(req->file)) return IO_APOLL_ABORTED; - if (req->flags & REQ_F_POLLED) + if ((req->flags & (REQ_F_POLLED|REQ_F_PARTIAL_IO)) == REQ_F_POLLED) return IO_APOLL_ABORTED; if (!def->pollin && !def->pollout) return IO_APOLL_ABORTED; @@ -5720,7 +5720,10 @@ static int io_arm_poll_handler(struct io_kiocb *req) mask |= POLLOUT | POLLWRNORM; } - apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC); + if (req->flags & REQ_F_POLLED) + apoll = req->apoll; + else + apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC); if (unlikely(!apoll)) return IO_APOLL_ABORTED; apoll->double_poll = NULL; From 430d4109d2f5b562eac2ea259f753b4ba6c3b7c5 Mon Sep 17 00:00:00 2001 From: Dylan Yudaken Date: Sat, 21 Jan 2023 09:13:12 -0700 Subject: [PATCH 1456/1477] UPSTREAM: io_uring: fix async accept on O_NONBLOCK sockets commit a73825ba70c93e1eb39a845bb3d9885a787f8ffe upstream. Do not set REQ_F_NOWAIT if the socket is non blocking. When enabled this causes the accept to immediately post a CQE with EAGAIN, which means you cannot perform an accept SQE on a NONBLOCK socket asynchronously. By removing the flag if there is no pending accept then poll is armed as usual and when a connection comes in the CQE is posted. Change-Id: I0fae3f75c7fbbf44f85da7d83f48c4cfed1fcae9 Signed-off-by: Dylan Yudaken Link: https://lore.kernel.org/r/20220324143435.2875844-1-dylany@fb.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Bug: 268174392 (cherry picked from commit aa4c9b3e45e234bc011bd26a045f6bdfd4783001) Signed-off-by: Greg Kroah-Hartman --- io_uring/io_uring.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 91a177a2e7da..951a936b1c48 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -5122,9 +5122,6 @@ static int io_accept(struct io_kiocb *req, unsigned int issue_flags) struct file *file; int ret, fd; - if (req->file->f_flags & O_NONBLOCK) - req->flags |= REQ_F_NOWAIT; - if (!fixed) { fd = __get_unused_fd_flags(accept->flags, accept->nofile); if (unlikely(fd < 0)) @@ -5137,6 +5134,8 @@ static int io_accept(struct io_kiocb *req, unsigned int issue_flags) if (!fixed) put_unused_fd(fd); ret = PTR_ERR(file); + /* safe to retry */ + req->flags |= REQ_F_PARTIAL_IO; if (ret == -EAGAIN && force_nonblock) return -EAGAIN; if (ret == -ERESTARTSYS) From 293eda17dda4657e46c26e6d6b9e1bbf7b16de20 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 23 Dec 2022 06:37:08 -0700 Subject: [PATCH 1457/1477] UPSTREAM: io_uring: check for valid register opcode earlier [ Upstream commit 343190841a1f22b96996d9f8cfab902a4d1bfd0e ] We only check the register opcode value inside the restricted ring section, move it into the main io_uring_register() function instead and check it up front. Change-Id: I4b5f782dad48eb0e7f04d5956cc087494e02b2ec Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Bug: 268174392 (cherry picked from commit 78e8151f04f0aa0a5809aafea5de3719c2360033) Signed-off-by: Greg Kroah-Hartman --- io_uring/io_uring.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 951a936b1c48..2309fdcc7b52 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -10815,8 +10815,6 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, return -ENXIO; if (ctx->restricted) { - if (opcode >= IORING_REGISTER_LAST) - return -EINVAL; opcode = array_index_nospec(opcode, IORING_REGISTER_LAST); if (!test_bit(opcode, ctx->restrictions.register_op)) return -EACCES; @@ -10948,6 +10946,9 @@ SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode, long ret = -EBADF; struct fd f; + if (opcode >= IORING_REGISTER_LAST) + return -EINVAL; + f = fdget(fd); if (!f.file) return -EBADF; From 662d1ad94d6604917d2667956771204e69a63ec2 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sat, 14 Jan 2023 09:14:03 -0700 Subject: [PATCH 1458/1477] UPSTREAM: io_uring: lock overflowing for IOPOLL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 544d163d659d45a206d8929370d5a2984e546cb7 upstream. syzbot reports an issue with overflow filling for IOPOLL: WARNING: CPU: 0 PID: 28 at io_uring/io_uring.c:734 io_cqring_event_overflow+0x1c0/0x230 io_uring/io_uring.c:734 CPU: 0 PID: 28 Comm: kworker/u4:1 Not tainted 6.2.0-rc3-syzkaller-16369-g358a161a6a9e #0 Workqueue: events_unbound io_ring_exit_work Call trace:  io_cqring_event_overflow+0x1c0/0x230 io_uring/io_uring.c:734  io_req_cqe_overflow+0x5c/0x70 io_uring/io_uring.c:773  io_fill_cqe_req io_uring/io_uring.h:168 [inline]  io_do_iopoll+0x474/0x62c io_uring/rw.c:1065  io_iopoll_try_reap_events+0x6c/0x108 io_uring/io_uring.c:1513  io_uring_try_cancel_requests+0x13c/0x258 io_uring/io_uring.c:3056  io_ring_exit_work+0xec/0x390 io_uring/io_uring.c:2869  process_one_work+0x2d8/0x504 kernel/workqueue.c:2289  worker_thread+0x340/0x610 kernel/workqueue.c:2436  kthread+0x12c/0x158 kernel/kthread.c:376  ret_from_fork+0x10/0x20 arch/arm64/kernel/entry.S:863 There is no real problem for normal IOPOLL as flush is also called with uring_lock taken, but it's getting more complicated for IOPOLL|SQPOLL, for which __io_cqring_overflow_flush() happens from the CQ waiting path. Reported-and-tested-by: syzbot+6805087452d72929404e@syzkaller.appspotmail.com Cc: stable@vger.kernel.org # 5.10+ Change-Id: I3449b2ea1b71ff2f04f119741751b42870386923 Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Bug: 268174392 (cherry picked from commit de77faee280163ff03b7ab64af6c9d779a43d4c4) Signed-off-by: Greg Kroah-Hartman --- io_uring/io_uring.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 2309fdcc7b52..35b877bd636f 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -2492,12 +2492,26 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, io_init_req_batch(&rb); while (!list_empty(done)) { + struct io_uring_cqe *cqe; + unsigned cflags; + req = list_first_entry(done, struct io_kiocb, inflight_entry); list_del(&req->inflight_entry); - - io_fill_cqe_req(req, req->result, io_put_rw_kbuf(req)); + cflags = io_put_rw_kbuf(req); (*nr_events)++; + cqe = io_get_cqe(ctx); + if (cqe) { + WRITE_ONCE(cqe->user_data, req->user_data); + WRITE_ONCE(cqe->res, req->result); + WRITE_ONCE(cqe->flags, cflags); + } else { + spin_lock(&ctx->completion_lock); + io_cqring_event_overflow(ctx, req->user_data, + req->result, cflags); + spin_unlock(&ctx->completion_lock); + } + if (req_ref_put_and_test(req)) io_req_free_batch(&rb, req, &ctx->submit_state); } From 7a8dd2f210727120da6699b8691692983250bf4a Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 5 Jan 2023 10:49:15 +0000 Subject: [PATCH 1459/1477] UPSTREAM: io_uring: fix CQ waiting timeout handling commit 12521a5d5cb7ff0ad43eadfc9c135d86e1131fa8 upstream. Jiffy to ktime CQ waiting conversion broke how we treat timeouts, in particular we rearm it anew every time we get into io_cqring_wait_schedule() without adjusting the timeout. Waiting for 2 CQEs and getting a task_work in the middle may double the timeout value, or even worse in some cases task may wait indefinitely. Cc: stable@vger.kernel.org Fixes: 228339662b398 ("io_uring: don't convert to jiffies for waiting on timeouts") Change-Id: If8605a13266ae2b49b1f7d7cd5ee092f9ffd2805 Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/f7bffddd71b08f28a877d44d37ac953ddb01590d.1672915663.git.asml.silence@gmail.com Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Bug: 268174392 (cherry picked from commit e0140e9da31e1f89baca5f175e073e8aed92688f) Signed-off-by: Greg Kroah-Hartman --- io_uring/io_uring.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 35b877bd636f..07ad8347f980 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -7528,7 +7528,7 @@ static int io_run_task_work_sig(void) /* when returns >0, the caller should retry */ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx, struct io_wait_queue *iowq, - ktime_t timeout) + ktime_t *timeout) { int ret; @@ -7540,7 +7540,7 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx, if (test_bit(0, &ctx->check_cq_overflow)) return 1; - if (!schedule_hrtimeout(&timeout, HRTIMER_MODE_ABS)) + if (!schedule_hrtimeout(timeout, HRTIMER_MODE_ABS)) return -ETIME; return 1; } @@ -7603,7 +7603,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, } prepare_to_wait_exclusive(&ctx->cq_wait, &iowq.wq, TASK_INTERRUPTIBLE); - ret = io_cqring_wait_schedule(ctx, &iowq, timeout); + ret = io_cqring_wait_schedule(ctx, &iowq, &timeout); finish_wait(&ctx->cq_wait, &iowq.wq); cond_resched(); } while (ret > 0); From 0edbe68c5eed2ab97aa855e32a99e8bd40f4fa60 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 21 Jan 2023 12:36:08 -0700 Subject: [PATCH 1460/1477] UPSTREAM: io_uring: ensure that cached task references are always put on exit commit e775f93f2ab976a2cdb4a7b53063cbe890904f73 upstream. io_uring caches task references to avoid doing atomics for each of them per request. If a request is put from the same task that allocated it, then we can maintain a per-ctx cache of them. This obviously relies on io_uring always pruning caches in a reliable way, and there's currently a case off io_uring fd release where we can miss that. One example is a ring setup with IOPOLL, which relies on the task polling for completions, which will free them. However, if such a task submits a request and then exits or closes the ring without reaping the completion, then ring release will reap and put. If release happens from that very same task, the completed request task refs will get put back into the cache pool. This is problematic, as we're now beyond the point of pruning caches. Manually drop these caches after doing an IOPOLL reap. This releases references from the current task, which is enough. If another task happens to be doing the release, then the caching will not be triggered and there's no issue. Cc: stable@vger.kernel.org Fixes: e98e49b2bbf7 ("io_uring: extend task put optimisations") Reported-by: Homin Rhee Change-Id: I9495121af065424141fa9c39840ab9aa91f45c72 Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Bug: 268174392 (cherry picked from commit e9c6556708d3b1f77f1a9e08881f8bb01e98e919) Signed-off-by: Greg Kroah-Hartman --- io_uring/io_uring.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 07ad8347f980..713a2436feef 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -9523,6 +9523,10 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) /* if we failed setting up the ctx, we might not have any rings */ io_iopoll_try_reap_events(ctx); + /* drop cached put refs after potentially doing completions */ + if (current->io_uring) + io_uring_drop_tctx_refs(current); + INIT_WORK(&ctx->exit_work, io_ring_exit_work); /* * Use system_unbound_wq to avoid spawning tons of event kworkers From f568c7ae55b1d81806cd23a93ef268dc94ed4a1c Mon Sep 17 00:00:00 2001 From: Dylan Yudaken Date: Tue, 22 Feb 2022 02:55:01 -0800 Subject: [PATCH 1461/1477] UPSTREAM: io_uring: remove duplicated calls to io_kiocb_ppos commit af9c45ecebaf1b428306f41421f4bcffe439f735 upstream. io_kiocb_ppos is called in both branches, and it seems that the compiler does not fuse this. Fusing removes a few bytes from loop_rw_iter. Before: $ nm -S fs/io_uring.o | grep loop_rw_iter 0000000000002430 0000000000000124 t loop_rw_iter After: $ nm -S fs/io_uring.o | grep loop_rw_iter 0000000000002430 000000000000010d t loop_rw_iter Change-Id: Ibd662d59697d9cb1e484319050f6e5f960f6ac5c Signed-off-by: Dylan Yudaken Reviewed-by: Pavel Begunkov Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Bug: 268174392 (cherry picked from commit 076f872314d4da36e878673cbef1eae3514b0c6d) Signed-off-by: Greg Kroah-Hartman --- io_uring/io_uring.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 713a2436feef..7b26c0c08375 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -3310,6 +3310,7 @@ static ssize_t loop_rw_iter(int rw, struct io_kiocb *req, struct iov_iter *iter) struct kiocb *kiocb = &req->rw.kiocb; struct file *file = req->file; ssize_t ret = 0; + loff_t *ppos; /* * Don't support polled IO through this interface, and we can't @@ -3321,6 +3322,8 @@ static ssize_t loop_rw_iter(int rw, struct io_kiocb *req, struct iov_iter *iter) if (kiocb->ki_flags & IOCB_NOWAIT) return -EAGAIN; + ppos = io_kiocb_ppos(kiocb); + while (iov_iter_count(iter)) { struct iovec iovec; ssize_t nr; @@ -3334,10 +3337,10 @@ static ssize_t loop_rw_iter(int rw, struct io_kiocb *req, struct iov_iter *iter) if (rw == READ) { nr = file->f_op->read(file, iovec.iov_base, - iovec.iov_len, io_kiocb_ppos(kiocb)); + iovec.iov_len, ppos); } else { nr = file->f_op->write(file, iovec.iov_base, - iovec.iov_len, io_kiocb_ppos(kiocb)); + iovec.iov_len, ppos); } if (nr < 0) { From 0813679ea31ceba96d4ecdb50efd9348d92c4c2c Mon Sep 17 00:00:00 2001 From: Dylan Yudaken Date: Tue, 22 Feb 2022 02:55:02 -0800 Subject: [PATCH 1462/1477] UPSTREAM: io_uring: update kiocb->ki_pos at execution time commit d34e1e5b396a0dbaa4a29b7138df662cfb9d8e8e upstream. Update kiocb->ki_pos at execution time rather than in io_prep_rw(). io_prep_rw() happens before the job is enqueued to a worker and so the offset might be read multiple times before being executed once. Ensures that the file position in a set of _linked_ SQEs will be only obtained after earlier SQEs have completed, and so will include their incremented file position. Change-Id: I3c5abbf6a337ec1958fd6600c5feb44fb61a5772 Signed-off-by: Dylan Yudaken Reviewed-by: Pavel Begunkov Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Bug: 268174392 (cherry picked from commit ea528ecac3ae58d8197871b29748eefca7d46be8) Signed-off-by: Greg Kroah-Hartman --- io_uring/io_uring.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 7b26c0c08375..ad5860309f0d 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -2929,14 +2929,6 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, req->flags |= REQ_F_ISREG; kiocb->ki_pos = READ_ONCE(sqe->off); - if (kiocb->ki_pos == -1) { - if (!(file->f_mode & FMODE_STREAM)) { - req->flags |= REQ_F_CUR_POS; - kiocb->ki_pos = file->f_pos; - } else { - kiocb->ki_pos = 0; - } - } kiocb->ki_hint = ki_hint_validate(file_write_hint(kiocb->ki_filp)); kiocb->ki_flags = iocb_flags(kiocb->ki_filp); ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags)); @@ -3018,6 +3010,20 @@ static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret) } } +static inline void io_kiocb_update_pos(struct io_kiocb *req) +{ + struct kiocb *kiocb = &req->rw.kiocb; + + if (kiocb->ki_pos == -1) { + if (!(req->file->f_mode & FMODE_STREAM)) { + req->flags |= REQ_F_CUR_POS; + kiocb->ki_pos = req->file->f_pos; + } else { + kiocb->ki_pos = 0; + } + } +} + static void kiocb_done(struct kiocb *kiocb, ssize_t ret, unsigned int issue_flags) { @@ -3573,6 +3579,8 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags) return ret ?: -EAGAIN; } + io_kiocb_update_pos(req); + ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), req->result); if (unlikely(ret)) { kfree(iovec); @@ -3707,6 +3715,8 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags) (req->flags & REQ_F_ISREG)) goto copy_iov; + io_kiocb_update_pos(req); + ret = rw_verify_area(WRITE, req->file, io_kiocb_ppos(kiocb), req->result); if (unlikely(ret)) goto out_free; From 19f09203485e6c6eabd60f7851253b5ffe08c245 Mon Sep 17 00:00:00 2001 From: Dylan Yudaken Date: Tue, 22 Feb 2022 02:55:03 -0800 Subject: [PATCH 1463/1477] UPSTREAM: io_uring: do not recalculate ppos unnecessarily commit b4aec40015953b65f2f114641e7fd7714c8df8e6 upstream. There is a slight optimisation to be had by calculating the correct pos pointer inside io_kiocb_update_pos and then using that later. It seems code size drops by a bit: 000000000000a1b0 0000000000000400 t io_read 000000000000a5b0 0000000000000319 t io_write vs 000000000000a1b0 00000000000003f6 t io_read 000000000000a5b0 0000000000000310 t io_write Change-Id: I19d8cdb6ea88d8fc4625e521363d5a8f638dfdcb Signed-off-by: Dylan Yudaken Reviewed-by: Pavel Begunkov Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Bug: 268174392 (cherry picked from commit e90cfb96999454b6d3f469f3a9414cf7f9df62fe) Signed-off-by: Greg Kroah-Hartman --- io_uring/io_uring.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index ad5860309f0d..a4941265a9b7 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -3010,18 +3010,22 @@ static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret) } } -static inline void io_kiocb_update_pos(struct io_kiocb *req) +static inline loff_t *io_kiocb_update_pos(struct io_kiocb *req) { struct kiocb *kiocb = &req->rw.kiocb; + bool is_stream = req->file->f_mode & FMODE_STREAM; if (kiocb->ki_pos == -1) { - if (!(req->file->f_mode & FMODE_STREAM)) { + if (!is_stream) { req->flags |= REQ_F_CUR_POS; kiocb->ki_pos = req->file->f_pos; + return &kiocb->ki_pos; } else { kiocb->ki_pos = 0; + return NULL; } } + return is_stream ? NULL : &kiocb->ki_pos; } static void kiocb_done(struct kiocb *kiocb, ssize_t ret, @@ -3547,6 +3551,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags) bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; struct iov_iter_state __state, *state; ssize_t ret, ret2; + loff_t *ppos; if (rw) { iter = &rw->iter; @@ -3579,9 +3584,9 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags) return ret ?: -EAGAIN; } - io_kiocb_update_pos(req); + ppos = io_kiocb_update_pos(req); - ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), req->result); + ret = rw_verify_area(READ, req->file, ppos, req->result); if (unlikely(ret)) { kfree(iovec); return ret; @@ -3685,6 +3690,7 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags) bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; struct iov_iter_state __state, *state; ssize_t ret, ret2; + loff_t *ppos; if (rw) { iter = &rw->iter; @@ -3715,9 +3721,9 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags) (req->flags & REQ_F_ISREG)) goto copy_iov; - io_kiocb_update_pos(req); + ppos = io_kiocb_update_pos(req); - ret = rw_verify_area(WRITE, req->file, io_kiocb_ppos(kiocb), req->result); + ret = rw_verify_area(WRITE, req->file, ppos, req->result); if (unlikely(ret)) goto out_free; From 30fe79d39ab459eb7416ad844d8cccc12919ee71 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 21 Jan 2023 13:38:51 -0700 Subject: [PATCH 1464/1477] UPSTREAM: io_uring/rw: defer fsnotify calls to task context commit b000145e9907809406d8164c3b2b8861d95aecd1 upstream. We can't call these off the kiocb completion as that might be off soft/hard irq context. Defer the calls to when we process the task_work for this request. That avoids valid complaints like: stack backtrace: CPU: 1 PID: 0 Comm: swapper/1 Not tainted 6.0.0-rc6-syzkaller-00321-g105a36f3694e #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/26/2022 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 print_usage_bug kernel/locking/lockdep.c:3961 [inline] valid_state kernel/locking/lockdep.c:3973 [inline] mark_lock_irq kernel/locking/lockdep.c:4176 [inline] mark_lock.part.0.cold+0x18/0xd8 kernel/locking/lockdep.c:4632 mark_lock kernel/locking/lockdep.c:4596 [inline] mark_usage kernel/locking/lockdep.c:4527 [inline] __lock_acquire+0x11d9/0x56d0 kernel/locking/lockdep.c:5007 lock_acquire kernel/locking/lockdep.c:5666 [inline] lock_acquire+0x1ab/0x570 kernel/locking/lockdep.c:5631 __fs_reclaim_acquire mm/page_alloc.c:4674 [inline] fs_reclaim_acquire+0x115/0x160 mm/page_alloc.c:4688 might_alloc include/linux/sched/mm.h:271 [inline] slab_pre_alloc_hook mm/slab.h:700 [inline] slab_alloc mm/slab.c:3278 [inline] __kmem_cache_alloc_lru mm/slab.c:3471 [inline] kmem_cache_alloc+0x39/0x520 mm/slab.c:3491 fanotify_alloc_fid_event fs/notify/fanotify/fanotify.c:580 [inline] fanotify_alloc_event fs/notify/fanotify/fanotify.c:813 [inline] fanotify_handle_event+0x1130/0x3f40 fs/notify/fanotify/fanotify.c:948 send_to_group fs/notify/fsnotify.c:360 [inline] fsnotify+0xafb/0x1680 fs/notify/fsnotify.c:570 __fsnotify_parent+0x62f/0xa60 fs/notify/fsnotify.c:230 fsnotify_parent include/linux/fsnotify.h:77 [inline] fsnotify_file include/linux/fsnotify.h:99 [inline] fsnotify_access include/linux/fsnotify.h:309 [inline] __io_complete_rw_common+0x485/0x720 io_uring/rw.c:195 io_complete_rw+0x1a/0x1f0 io_uring/rw.c:228 iomap_dio_complete_work fs/iomap/direct-io.c:144 [inline] iomap_dio_bio_end_io+0x438/0x5e0 fs/iomap/direct-io.c:178 bio_endio+0x5f9/0x780 block/bio.c:1564 req_bio_endio block/blk-mq.c:695 [inline] blk_update_request+0x3fc/0x1300 block/blk-mq.c:825 scsi_end_request+0x7a/0x9a0 drivers/scsi/scsi_lib.c:541 scsi_io_completion+0x173/0x1f70 drivers/scsi/scsi_lib.c:971 scsi_complete+0x122/0x3b0 drivers/scsi/scsi_lib.c:1438 blk_complete_reqs+0xad/0xe0 block/blk-mq.c:1022 __do_softirq+0x1d3/0x9c6 kernel/softirq.c:571 invoke_softirq kernel/softirq.c:445 [inline] __irq_exit_rcu+0x123/0x180 kernel/softirq.c:650 irq_exit_rcu+0x5/0x20 kernel/softirq.c:662 common_interrupt+0xa9/0xc0 arch/x86/kernel/irq.c:240 Fixes: f63cf5192fe3 ("io_uring: ensure that fsnotify is always called") Link: https://lore.kernel.org/all/20220929135627.ykivmdks2w5vzrwg@quack3/ Reported-by: syzbot+dfcc5f4da15868df7d4d@syzkaller.appspotmail.com Reported-by: Jan Kara Change-Id: Ia16078bdf53c6b2536cacb7aafa03a4ec1079a94 Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Bug: 268174392 (cherry picked from commit ea2e6286e3e89a115ae554e20ba9aec2b2e1ddff) Signed-off-by: Greg Kroah-Hartman --- io_uring/io_uring.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index a4941265a9b7..06267fb28546 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -2712,12 +2712,6 @@ static bool io_rw_should_reissue(struct io_kiocb *req) static bool __io_complete_rw_common(struct io_kiocb *req, long res) { - if (req->rw.kiocb.ki_flags & IOCB_WRITE) { - kiocb_end_write(req); - fsnotify_modify(req->file); - } else { - fsnotify_access(req->file); - } if (res != req->result) { if ((res == -EAGAIN || res == -EOPNOTSUPP) && io_rw_should_reissue(req)) { @@ -2770,6 +2764,20 @@ static void __io_complete_rw(struct io_kiocb *req, long res, long res2, __io_req_complete(req, issue_flags, io_fixup_rw_res(req, res), io_put_rw_kbuf(req)); } +static void io_req_rw_complete(struct io_kiocb *req, bool *locked) +{ + struct io_rw *rw = &req->rw; + + if (rw->kiocb.ki_flags & IOCB_WRITE) { + kiocb_end_write(req); + fsnotify_modify(req->file); + } else { + fsnotify_access(req->file); + } + + io_req_task_complete(req, locked); +} + static void io_complete_rw(struct kiocb *kiocb, long res, long res2) { struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb); @@ -2777,7 +2785,7 @@ static void io_complete_rw(struct kiocb *kiocb, long res, long res2) if (__io_complete_rw_common(req, res)) return; req->result = io_fixup_rw_res(req, res); - req->io_task_work.func = io_req_task_complete; + req->io_task_work.func = io_req_rw_complete; io_req_task_work_add(req); } From c7e0a6d8bfed46cee4b12166b3ad5744d5279c7b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 11 Apr 2022 09:48:30 -0600 Subject: [PATCH 1465/1477] UPSTREAM: io_uring: io_kiocb_update_pos() should not touch file for non -1 offset commit 6f83ab22adcb77a5824d2c274dace0d99e21319f upstream. -1 tells use to use the current position, but we check if the file is a stream regardless of that. Fix up io_kiocb_update_pos() to only dip into file if we need to. This is both more efficient and also drops 12 bytes of text on aarch64 and 64 bytes on x86-64. Fixes: b4aec4001595 ("io_uring: do not recalculate ppos unnecessarily") Change-Id: I5c22ce8122b0e1f0ad423a5b3aa520ee416feff1 Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman Bug: 268174392 (cherry picked from commit 89a77271d254752d83507fc347dc2d675885fe07) Signed-off-by: Greg Kroah-Hartman --- io_uring/io_uring.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 06267fb28546..d758427877e3 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -3021,19 +3021,18 @@ static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret) static inline loff_t *io_kiocb_update_pos(struct io_kiocb *req) { struct kiocb *kiocb = &req->rw.kiocb; - bool is_stream = req->file->f_mode & FMODE_STREAM; - if (kiocb->ki_pos == -1) { - if (!is_stream) { - req->flags |= REQ_F_CUR_POS; - kiocb->ki_pos = req->file->f_pos; - return &kiocb->ki_pos; - } else { - kiocb->ki_pos = 0; - return NULL; - } + if (kiocb->ki_pos != -1) + return &kiocb->ki_pos; + + if (!(req->file->f_mode & FMODE_STREAM)) { + req->flags |= REQ_F_CUR_POS; + kiocb->ki_pos = req->file->f_pos; + return &kiocb->ki_pos; } - return is_stream ? NULL : &kiocb->ki_pos; + + kiocb->ki_pos = 0; + return NULL; } static void kiocb_done(struct kiocb *kiocb, ssize_t ret, From 848284d40dda028f67fa5ccd86be30b714cfedb8 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 29 Sep 2022 09:39:10 +0200 Subject: [PATCH 1466/1477] UPSTREAM: io_uring/net: fix fast_iov assignment in io_setup_async_msg() commit 3e4cb6ebbb2bad201c1186bc0b7e8cf41dd7f7e6 upstream. I hit a very bad problem during my tests of SENDMSG_ZC. BUG(); in first_iovec_segment() triggered very easily. The problem was io_setup_async_msg() in the partial retry case, which seems to happen more often with _ZC. iov_iter_iovec_advance() may change i->iov in order to have i->iov_offset being only relative to the first element. Which means kmsg->msg.msg_iter.iov is no longer the same as kmsg->fast_iov. But this would rewind the copy to be the start of async_msg->fast_iov, which means the internal state of sync_msg->msg.msg_iter is inconsitent. I tested with 5 vectors with length like this 4, 0, 64, 20, 8388608 and got a short writes with: - ret=2675244 min_ret=8388692 => remaining 5713448 sr->done_io=2675244 - ret=-EAGAIN => io_uring_poll_arm - ret=4911225 min_ret=5713448 => remaining 802223 sr->done_io=7586469 - ret=-EAGAIN => io_uring_poll_arm - ret=802223 min_ret=802223 => res=8388692 While this was easily triggered with SENDMSG_ZC (queued for 6.1), it was a potential problem starting with 7ba89d2af17aa879dda30f5d5d3f152e587fc551 in 5.18 for IORING_OP_RECVMSG. And also with 4c3c09439c08b03d9503df0ca4c7619c5842892e in 5.19 for IORING_OP_SENDMSG. However 257e84a5377fbbc336ff563833a8712619acce56 introduced the critical code into io_setup_async_msg() in 5.11. Fixes: 7ba89d2af17aa ("io_uring: ensure recv and recvmsg handle MSG_WAITALL correctly") Fixes: 257e84a5377fb ("io_uring: refactor sendmsg/recvmsg iov managing") Cc: stable@vger.kernel.org Change-Id: I72c459fdbae2938d176126ed2f17eea990c42d49 Signed-off-by: Stefan Metzmacher Reviewed-by: Pavel Begunkov Link: https://lore.kernel.org/r/b2e7be246e2fb173520862b0c7098e55767567a2.1664436949.git.metze@samba.org Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman Bug: 268174392 (cherry picked from commit fc2491562a9bed1f3e58a8ebdd4176962640e58c) Signed-off-by: Greg Kroah-Hartman --- io_uring/io_uring.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index d758427877e3..9c3acfdc10b0 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -4691,8 +4691,10 @@ static int io_setup_async_msg(struct io_kiocb *req, if (async_msg->msg.msg_name) async_msg->msg.msg_name = &async_msg->addr; /* if were using fast_iov, set it to the new one */ - if (!async_msg->free_iov) - async_msg->msg.msg_iter.iov = async_msg->fast_iov; + if (!kmsg->free_iov) { + size_t fast_idx = kmsg->msg.msg_iter.iov - kmsg->fast_iov; + async_msg->msg.msg_iter.iov = &async_msg->fast_iov[fast_idx]; + } return -EAGAIN; } From 4faf0616b756b449561084622dee418199c18a8f Mon Sep 17 00:00:00 2001 From: Alviro Iskandar Setiawan Date: Mon, 7 Feb 2022 21:05:33 +0700 Subject: [PATCH 1467/1477] UPSTREAM: io_uring: Clean up a false-positive warning from GCC 9.3.0 commit 0d7c1153d9291197c1dc473cfaade77acb874b4b upstream. In io_recv(), if import_single_range() fails, the @flags variable is uninitialized, then it will goto out_free. After the goto, the compiler doesn't know that (ret < min_ret) is always true, so it thinks the "if ((flags & MSG_WAITALL) ..." path could be taken. The complaint comes from gcc-9 (Debian 9.3.0-22) 9.3.0: ``` fs/io_uring.c:5238 io_recvfrom() error: uninitialized symbol 'flags' ``` Fix this by bypassing the @ret and @flags check when import_single_range() fails. Reasons: 1. import_single_range() only returns -EFAULT when it fails. 2. At that point, @flags is uninitialized and shouldn't be read. Reported-by: kernel test robot Reported-by: Dan Carpenter Reported-by: "Chen, Rong A" Link: https://lore.gnuweeb.org/timl/d33bb5a9-8173-f65b-f653-51fc0681c6d6@intel.com/ Cc: Pavel Begunkov Suggested-by: Ammar Faizi Fixes: 7297ce3d59449de49d3c9e1f64ae25488750a1fc ("io_uring: improve send/recv error handling") Change-Id: Ifd324b3213a0682dae11177c93a5b8272516c2cd Signed-off-by: Alviro Iskandar Setiawan Signed-off-by: Ammar Faizi Link: https://lore.kernel.org/r/20220207140533.565411-1-ammarfaizi2@gnuweeb.org Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman Bug: 268174392 (cherry picked from commit ddaaadf22bea31cf7e94d55c6530ce75947bb1cc) Signed-off-by: Greg Kroah-Hartman --- io_uring/io_uring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 9c3acfdc10b0..0eed453fd469 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -5104,7 +5104,6 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags) min_ret = iov_iter_count(&msg.msg_iter); ret = sock_recvmsg(sock, &msg, flags); -out_free: if (ret < min_ret) { if (ret == -EAGAIN && force_nonblock) return -EAGAIN; @@ -5119,6 +5118,7 @@ out_free: } req_set_fail(req); } else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { +out_free: req_set_fail(req); } if (req->flags & REQ_F_BUFFER_SELECTED) From 138d120fb254802336a68641cd2fe97deb720da2 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 22 Jan 2023 10:24:20 -0700 Subject: [PATCH 1468/1477] UPSTREAM: io_uring: fix double poll leak on repolling commit c0737fa9a5a5cf5a053bcc983f72d58919b997c6 upstream. We have re-polling for partial IO, so a request can be polled twice. If it used two poll entries the first time then on the second io_arm_poll_handler() it will find the old apoll entry and NULL kmalloc()'ed second entry, i.e. apoll->double_poll, so leaking it. Fixes: 10c873334feba ("io_uring: allow re-poll if we made progress") Change-Id: If720ddcd7e488c0c336533d8dde00864689648ca Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/fee2452494222ecc7f1f88c8fb659baef971414a.1655852245.git.asml.silence@gmail.com Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman Bug: 268174392 (cherry picked from commit c1a279d79e313bd9b0ed31025edc68394bfc40ab) Signed-off-by: Greg Kroah-Hartman --- io_uring/io_uring.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 0eed453fd469..9ef72355f5c1 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -5761,10 +5761,12 @@ static int io_arm_poll_handler(struct io_kiocb *req) mask |= POLLOUT | POLLWRNORM; } - if (req->flags & REQ_F_POLLED) + if (req->flags & REQ_F_POLLED) { apoll = req->apoll; - else + kfree(apoll->double_poll); + } else { apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC); + } if (unlikely(!apoll)) return IO_APOLL_ABORTED; apoll->double_poll = NULL; From d419b1b05caefbe6356a344043a9f20f68567470 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 22 Jan 2023 10:36:37 -0700 Subject: [PATCH 1469/1477] UPSTREAM: io_uring/rw: ensure kiocb_end_write() is always called commit 2ec33a6c3cca9fe2465e82050c81f5ffdc508b36 upstream. A previous commit moved the notifications and end-write handling, but it is now missing a few spots where we also want to call both of those. Without that, we can potentially be missing file notifications, and more importantly, have an imbalance in the super_block writers sem accounting. Fixes: b000145e9907 ("io_uring/rw: defer fsnotify calls to task context") Reported-by: Dave Chinner Link: https://lore.kernel.org/all/20221010050319.GC2703033@dread.disaster.area/ Change-Id: Iaaa509f5dadcae04f58c929901225bc968b35d52 Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman Bug: 268174392 (cherry picked from commit 3d5f181bda25112c4d5d10c1930a59787868e032) Signed-off-by: Greg Kroah-Hartman --- io_uring/io_uring.c | 57 +++++++++++++++++++++++++++++---------------- 1 file changed, 37 insertions(+), 20 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 9ef72355f5c1..d1a079b7f1ca 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -2710,11 +2710,34 @@ static bool io_rw_should_reissue(struct io_kiocb *req) } #endif +/* + * Trigger the notifications after having done some IO, and finish the write + * accounting, if any. + */ +static void io_req_io_end(struct io_kiocb *req) +{ + struct io_rw *rw = &req->rw; + + WARN_ON(!in_task()); + + if (rw->kiocb.ki_flags & IOCB_WRITE) { + kiocb_end_write(req); + fsnotify_modify(req->file); + } else { + fsnotify_access(req->file); + } +} + static bool __io_complete_rw_common(struct io_kiocb *req, long res) { if (res != req->result) { if ((res == -EAGAIN || res == -EOPNOTSUPP) && io_rw_should_reissue(req)) { + /* + * Reissue will start accounting again, finish the + * current cycle. + */ + io_req_io_end(req); req->flags |= REQ_F_REISSUE; return true; } @@ -2756,25 +2779,9 @@ static void io_req_task_complete(struct io_kiocb *req, bool *locked) } } -static void __io_complete_rw(struct io_kiocb *req, long res, long res2, - unsigned int issue_flags) -{ - if (__io_complete_rw_common(req, res)) - return; - __io_req_complete(req, issue_flags, io_fixup_rw_res(req, res), io_put_rw_kbuf(req)); -} - static void io_req_rw_complete(struct io_kiocb *req, bool *locked) { - struct io_rw *rw = &req->rw; - - if (rw->kiocb.ki_flags & IOCB_WRITE) { - kiocb_end_write(req); - fsnotify_modify(req->file); - } else { - fsnotify_access(req->file); - } - + io_req_io_end(req); io_req_task_complete(req, locked); } @@ -3042,10 +3049,20 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret, if (req->flags & REQ_F_CUR_POS) req->file->f_pos = kiocb->ki_pos; - if (ret >= 0 && (kiocb->ki_complete == io_complete_rw)) - __io_complete_rw(req, ret, 0, issue_flags); - else + if (ret >= 0 && (kiocb->ki_complete == io_complete_rw)) { + if (!__io_complete_rw_common(req, ret)) { + /* + * Safe to call io_end from here as we're inline + * from the submission path. + */ + io_req_io_end(req); + __io_req_complete(req, issue_flags, + io_fixup_rw_res(req, ret), + io_put_rw_kbuf(req)); + } + } else { io_rw_done(kiocb, ret); + } if (req->flags & REQ_F_REISSUE) { req->flags &= ~REQ_F_REISSUE; From fb53217659a0f369fa9441111e9760ad9e41e760 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 16 Oct 2022 17:24:10 -0600 Subject: [PATCH 1470/1477] UPSTREAM: io_uring/rw: remove leftover debug statement commit 5c61795ea97c170347c5c4af0c159bd877b8af71 upstream. This debug statement was never meant to go into the upstream release, kill it off before it ends up in a release. It was just part of the testing for the initial version of the patch. Fixes: 2ec33a6c3cca ("io_uring/rw: ensure kiocb_end_write() is always called") Change-Id: Iee9f436c34cc137a7ab934aafa3aa0c584369418 Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman Bug: 268174392 (cherry picked from commit e699cce29accb713200ba94c88c5b98e2ccf4b75) Signed-off-by: Greg Kroah-Hartman --- io_uring/io_uring.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index d1a079b7f1ca..2be25b6fe65a 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -2718,8 +2718,6 @@ static void io_req_io_end(struct io_kiocb *req) { struct io_rw *rw = &req->rw; - WARN_ON(!in_task()); - if (rw->kiocb.ki_flags & IOCB_WRITE) { kiocb_end_write(req); fsnotify_modify(req->file); From fcd1e26ae038aea25a43f2414c9b7e3824105b1c Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 4 Nov 2022 19:51:43 +0000 Subject: [PATCH 1471/1477] BACKPORT: iommu: Avoid races around device probe We currently have 3 different ways that __iommu_probe_device() may be called, but no real guarantee that multiple callers can't tread on each other, especially once asynchronous driver probe gets involved. It would likely have taken a fair bit of luck to hit this previously, but commit 57365a04c921 ("iommu: Move bus setup to IOMMU device registration") ups the odds since now it's not just omap-iommu that may trigger multiple bus_iommu_probe() calls in parallel if probing asynchronously. Add a lock to ensure we can't try to double-probe a device, and also close some possible race windows to make sure we're truly robust against trying to double-initialise a group via two different member devices. Reported-by: Brian Norris Signed-off-by: Robin Murphy Tested-by: Brian Norris Fixes: 57365a04c921 ("iommu: Move bus setup to IOMMU device registration") Link: https://lore.kernel.org/r/1946ef9f774851732eed78760a78ec40dbc6d178.1667591503.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel Bug: 270248437 Bug: 269232600 (cherry picked from commit 01657bc14a3990c665375f77978631fee77b1fce) Change-Id: Ie87f8f7a7b90431c3a2682923961885ce7b239f3 Signed-off-by: Zhenhua Huang Signed-off-by: Srinivasarao Pathipati --- drivers/iommu/iommu.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index ed6902eb8d91..9bf7042768a4 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -201,13 +201,23 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list const struct iommu_ops *ops = dev->bus->iommu_ops; struct iommu_device *iommu_dev; struct iommu_group *group; + static DEFINE_MUTEX(iommu_probe_device_lock); int ret; if (!ops) return -ENODEV; - - if (!dev_iommu_get(dev)) - return -ENOMEM; + /* + * Serialise to avoid races between IOMMU drivers registering in + * parallel and/or the "replay" calls from ACPI/OF code via client + * driver probe. Once the latter have been cleaned up we should + * probably be able to use device_lock() here to minimise the scope, + * but for now enforcing a simple global ordering is fine. + */ + mutex_lock(&iommu_probe_device_lock); + if (!dev_iommu_get(dev)) { + ret = -ENOMEM; + goto err_unlock; + } if (!try_module_get(ops->owner)) { ret = -EINVAL; @@ -227,11 +237,14 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list ret = PTR_ERR(group); goto out_release; } - iommu_group_put(group); + mutex_lock(&group->mutex); if (group_list && !group->default_domain && list_empty(&group->entry)) list_add_tail(&group->entry, group_list); + mutex_unlock(&group->mutex); + iommu_group_put(group); + mutex_unlock(&iommu_probe_device_lock); iommu_device_link(iommu_dev, dev); return 0; @@ -245,6 +258,9 @@ out_module_put: err_free: dev_iommu_free(dev); +err_unlock: + mutex_unlock(&iommu_probe_device_lock); + return ret; } @@ -1766,11 +1782,11 @@ int bus_iommu_probe(struct bus_type *bus) return ret; list_for_each_entry_safe(group, next, &group_list, entry) { + mutex_lock(&group->mutex); + /* Remove item from the list */ list_del_init(&group->entry); - mutex_lock(&group->mutex); - /* Try to allocate default domain */ probe_alloc_default_domain(bus, group); From 8290f9275b36efd581fa436b584bf03971c06606 Mon Sep 17 00:00:00 2001 From: Sarannya S Date: Fri, 17 Feb 2023 12:26:24 +0530 Subject: [PATCH 1472/1477] ANDROID: ABI: Update allowed list for QCOM Add __xa_alloc_cyclic to the symbol list. This function will be used in qrtr for cyclic port id allocation. Leaf changes summary: 1 artifact changed (2 filtered out) Changed leaf types summary: 0 (1 filtered out) leaf type changed Removed/Changed/Added functions summary: 0 Removed, 0 Changed, 1 Added function Removed/Changed/Added variables summary: 0 Removed, 0 Changed (1 filtered out), 0 Added variable 1 Added function: [A] 'function int __xa_alloc_cyclic(xarray*, u32*, void*, xa_limit, u32*, gfp_t)' Bug: 270248437 Bug: 269687040 Change-Id: I9543ad1320e79daa781f0075decd74612bc694a1 Signed-off-by: Sarannya S (cherry picked from commit 3bdc72479dd286d5f0bb6b8f60f02c4c0cb7273d) --- android/abi_gki_aarch64.xml | 10 ++++++++++ android/abi_gki_aarch64_qcom | 1 + 2 files changed, 11 insertions(+) diff --git a/android/abi_gki_aarch64.xml b/android/abi_gki_aarch64.xml index 1bc86e8755d1..9eed60902766 100644 --- a/android/abi_gki_aarch64.xml +++ b/android/abi_gki_aarch64.xml @@ -754,6 +754,7 @@ + @@ -121401,6 +121402,15 @@ + + + + + + + + + diff --git a/android/abi_gki_aarch64_qcom b/android/abi_gki_aarch64_qcom index f3e65159f870..0004b141d673 100644 --- a/android/abi_gki_aarch64_qcom +++ b/android/abi_gki_aarch64_qcom @@ -3045,6 +3045,7 @@ ww_mutex_lock ww_mutex_unlock __xa_alloc + __xa_alloc_cyclic xa_destroy xa_erase xa_find From 3a6f125d37a949a01c5360fdd1c158a04c3a0b7f Mon Sep 17 00:00:00 2001 From: Bing-Jhong Billy Jheng Date: Thu, 2 Mar 2023 21:00:06 +0800 Subject: [PATCH 1473/1477] UPSTREAM: io_uring: add missing lock in io_get_file_fixed io_get_file_fixed will access io_uring's context. Lock it if it is invoked unlocked (eg via io-wq) to avoid a race condition with fixed files getting unregistered. No single upstream patch exists for this issue, it was fixed as part of the file assignment changes that went into the 5.18 cycle. Signed-off-by: Jheng, Bing-Jhong Billy Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 08681391b84da27133deefaaddefd0acfa90c2be) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman Change-Id: I01ec4283589acde1d17318eb76f87ce099ec3fa0 --- io_uring/io_uring.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 2be25b6fe65a..87e1256cea04 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1102,7 +1102,8 @@ static int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type, unsigned nr_args); static void io_clean_op(struct io_kiocb *req); static struct file *io_file_get(struct io_ring_ctx *ctx, - struct io_kiocb *req, int fd, bool fixed); + struct io_kiocb *req, int fd, bool fixed, + unsigned int issue_flags); static void __io_queue_sqe(struct io_kiocb *req); static void io_rsrc_put_work(struct work_struct *work); @@ -3985,7 +3986,7 @@ static int io_tee(struct io_kiocb *req, unsigned int issue_flags) return -EAGAIN; in = io_file_get(req->ctx, req, sp->splice_fd_in, - (sp->flags & SPLICE_F_FD_IN_FIXED)); + (sp->flags & SPLICE_F_FD_IN_FIXED), issue_flags); if (!in) { ret = -EBADF; goto done; @@ -4025,7 +4026,7 @@ static int io_splice(struct io_kiocb *req, unsigned int issue_flags) return -EAGAIN; in = io_file_get(req->ctx, req, sp->splice_fd_in, - (sp->flags & SPLICE_F_FD_IN_FIXED)); + (sp->flags & SPLICE_F_FD_IN_FIXED), issue_flags); if (!in) { ret = -EBADF; goto done; @@ -6886,13 +6887,16 @@ static void io_fixed_file_set(struct io_fixed_file *file_slot, struct file *file } static inline struct file *io_file_get_fixed(struct io_ring_ctx *ctx, - struct io_kiocb *req, int fd) + struct io_kiocb *req, int fd, + unsigned int issue_flags) { - struct file *file; + struct file *file = NULL; unsigned long file_ptr; + io_ring_submit_lock(ctx, !(issue_flags & IO_URING_F_NONBLOCK)); + if (unlikely((unsigned int)fd >= ctx->nr_user_files)) - return NULL; + goto out; fd = array_index_nospec(fd, ctx->nr_user_files); file_ptr = io_fixed_file_slot(&ctx->file_table, fd)->file_ptr; file = (struct file *) (file_ptr & FFS_MASK); @@ -6900,6 +6904,8 @@ static inline struct file *io_file_get_fixed(struct io_ring_ctx *ctx, /* mask in overlapping REQ_F and FFS bits */ req->flags |= (file_ptr << REQ_F_NOWAIT_READ_BIT); io_req_set_rsrc_node(req); +out: + io_ring_submit_unlock(ctx, !(issue_flags & IO_URING_F_NONBLOCK)); return file; } @@ -6917,10 +6923,11 @@ static struct file *io_file_get_normal(struct io_ring_ctx *ctx, } static inline struct file *io_file_get(struct io_ring_ctx *ctx, - struct io_kiocb *req, int fd, bool fixed) + struct io_kiocb *req, int fd, bool fixed, + unsigned int issue_flags) { if (fixed) - return io_file_get_fixed(ctx, req, fd); + return io_file_get_fixed(ctx, req, fd, issue_flags); else return io_file_get_normal(ctx, req, fd); } @@ -7142,7 +7149,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, if (io_op_defs[req->opcode].needs_file) { req->file = io_file_get(ctx, req, READ_ONCE(sqe->fd), - (sqe_flags & IOSQE_FIXED_FILE)); + (sqe_flags & IOSQE_FIXED_FILE), 0); if (unlikely(!req->file)) ret = -EBADF; } From 539bde6ffa98bb3faa02c8d545b25f1ea720c104 Mon Sep 17 00:00:00 2001 From: Bing-Jhong Billy Jheng Date: Thu, 2 Mar 2023 21:00:06 +0800 Subject: [PATCH 1474/1477] UPSTREAM: io_uring: add missing lock in io_get_file_fixed io_get_file_fixed will access io_uring's context. Lock it if it is invoked unlocked (eg via io-wq) to avoid a race condition with fixed files getting unregistered. No single upstream patch exists for this issue, it was fixed as part of the file assignment changes that went into the 5.18 cycle. Signed-off-by: Jheng, Bing-Jhong Billy Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 08681391b84da27133deefaaddefd0acfa90c2be) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman Change-Id: I01ec4283589acde1d17318eb76f87ce099ec3fa0 --- io_uring/io_uring.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 2be25b6fe65a..87e1256cea04 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1102,7 +1102,8 @@ static int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type, unsigned nr_args); static void io_clean_op(struct io_kiocb *req); static struct file *io_file_get(struct io_ring_ctx *ctx, - struct io_kiocb *req, int fd, bool fixed); + struct io_kiocb *req, int fd, bool fixed, + unsigned int issue_flags); static void __io_queue_sqe(struct io_kiocb *req); static void io_rsrc_put_work(struct work_struct *work); @@ -3985,7 +3986,7 @@ static int io_tee(struct io_kiocb *req, unsigned int issue_flags) return -EAGAIN; in = io_file_get(req->ctx, req, sp->splice_fd_in, - (sp->flags & SPLICE_F_FD_IN_FIXED)); + (sp->flags & SPLICE_F_FD_IN_FIXED), issue_flags); if (!in) { ret = -EBADF; goto done; @@ -4025,7 +4026,7 @@ static int io_splice(struct io_kiocb *req, unsigned int issue_flags) return -EAGAIN; in = io_file_get(req->ctx, req, sp->splice_fd_in, - (sp->flags & SPLICE_F_FD_IN_FIXED)); + (sp->flags & SPLICE_F_FD_IN_FIXED), issue_flags); if (!in) { ret = -EBADF; goto done; @@ -6886,13 +6887,16 @@ static void io_fixed_file_set(struct io_fixed_file *file_slot, struct file *file } static inline struct file *io_file_get_fixed(struct io_ring_ctx *ctx, - struct io_kiocb *req, int fd) + struct io_kiocb *req, int fd, + unsigned int issue_flags) { - struct file *file; + struct file *file = NULL; unsigned long file_ptr; + io_ring_submit_lock(ctx, !(issue_flags & IO_URING_F_NONBLOCK)); + if (unlikely((unsigned int)fd >= ctx->nr_user_files)) - return NULL; + goto out; fd = array_index_nospec(fd, ctx->nr_user_files); file_ptr = io_fixed_file_slot(&ctx->file_table, fd)->file_ptr; file = (struct file *) (file_ptr & FFS_MASK); @@ -6900,6 +6904,8 @@ static inline struct file *io_file_get_fixed(struct io_ring_ctx *ctx, /* mask in overlapping REQ_F and FFS bits */ req->flags |= (file_ptr << REQ_F_NOWAIT_READ_BIT); io_req_set_rsrc_node(req); +out: + io_ring_submit_unlock(ctx, !(issue_flags & IO_URING_F_NONBLOCK)); return file; } @@ -6917,10 +6923,11 @@ static struct file *io_file_get_normal(struct io_ring_ctx *ctx, } static inline struct file *io_file_get(struct io_ring_ctx *ctx, - struct io_kiocb *req, int fd, bool fixed) + struct io_kiocb *req, int fd, bool fixed, + unsigned int issue_flags) { if (fixed) - return io_file_get_fixed(ctx, req, fd); + return io_file_get_fixed(ctx, req, fd, issue_flags); else return io_file_get_normal(ctx, req, fd); } @@ -7142,7 +7149,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, if (io_op_defs[req->opcode].needs_file) { req->file = io_file_get(ctx, req, READ_ONCE(sqe->fd), - (sqe_flags & IOSQE_FIXED_FILE)); + (sqe_flags & IOSQE_FIXED_FILE), 0); if (unlikely(!req->file)) ret = -EBADF; } From 5210719c5ba25481f38176dbd23d27dce4bca90a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 3 Mar 2023 06:49:57 -0700 Subject: [PATCH 1475/1477] UPSTREAM: io_uring: ensure that io_init_req() passes in the right issue_flags We can't use 0 here, as io_init_req() is always invoked with the ctx uring_lock held. Newer kernels have IO_URING_F_UNLOCKED for this, but previously we used IO_URING_F_NONBLOCK to indicate this as well. Fixes: 08681391b84d ("io_uring: add missing lock in io_get_file_fixed") Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit da24142b1ef9fd5d36b76e36bab328a5b27523e8) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman Change-Id: I762eacf1b49ca8a38d8b77c44db4ca2bc49b2c4c --- io_uring/io_uring.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 87e1256cea04..43ccd6ca8302 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -7149,7 +7149,8 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, if (io_op_defs[req->opcode].needs_file) { req->file = io_file_get(ctx, req, READ_ONCE(sqe->fd), - (sqe_flags & IOSQE_FIXED_FILE), 0); + (sqe_flags & IOSQE_FIXED_FILE), + IO_URING_F_NONBLOCK); if (unlikely(!req->file)) ret = -EBADF; } From e80b42acfac0cb07f6c67922108188d644f876c5 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 3 Mar 2023 06:49:57 -0700 Subject: [PATCH 1476/1477] UPSTREAM: io_uring: ensure that io_init_req() passes in the right issue_flags We can't use 0 here, as io_init_req() is always invoked with the ctx uring_lock held. Newer kernels have IO_URING_F_UNLOCKED for this, but previously we used IO_URING_F_NONBLOCK to indicate this as well. Fixes: 08681391b84d ("io_uring: add missing lock in io_get_file_fixed") Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit da24142b1ef9fd5d36b76e36bab328a5b27523e8) Bug: 268174392 Signed-off-by: Greg Kroah-Hartman Change-Id: I762eacf1b49ca8a38d8b77c44db4ca2bc49b2c4c --- io_uring/io_uring.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 87e1256cea04..43ccd6ca8302 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -7149,7 +7149,8 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, if (io_op_defs[req->opcode].needs_file) { req->file = io_file_get(ctx, req, READ_ONCE(sqe->fd), - (sqe_flags & IOSQE_FIXED_FILE), 0); + (sqe_flags & IOSQE_FIXED_FILE), + IO_URING_F_NONBLOCK); if (unlikely(!req->file)) ret = -EBADF; } From 52836ddd51e8be9c0201d005995dce9a756e457a Mon Sep 17 00:00:00 2001 From: Patrick Daly Date: Mon, 10 Oct 2022 19:25:27 -0700 Subject: [PATCH 1477/1477] ANDROID: mm/filemap: Fix missing put_page() for speculative page fault find_get_page() returns a page with increased refcount, assuming a page exists at the given index. Ensure this refcount is dropped on error. Bug: 271079833 Bug: 271204963 Fixes: 59d4d125 ("BACKPORT: FROMLIST: mm: implement speculative handling in filemap_fault()") Change-Id: Idc7b9e3f11f32a02bed4c6f4e11cec9200a5c790 Signed-off-by: Patrick Daly (cherry picked from commit 6232eecfa7ca0d8d0ca088da6d0edb2c3a879ff9) Signed-off-by: Zhenhua Huang (cherry picked from commit 1d05213028b6dbdb8801e20f29b6a6f91c216033) Signed-off-by: Srinivasarao Pathipati --- mm/filemap.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index aeb45f36aba1..dd1db5745741 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2736,11 +2736,14 @@ vm_fault_t filemap_fault(struct vm_fault *vmf) if (vmf->flags & FAULT_FLAG_SPECULATIVE) { page = find_get_page(mapping, offset); - if (unlikely(!page) || unlikely(PageReadahead(page))) + if (unlikely(!page)) return VM_FAULT_RETRY; + if (unlikely(PageReadahead(page))) + goto page_put; + if (!trylock_page(page)) - return VM_FAULT_RETRY; + goto page_put; if (unlikely(compound_head(page)->mapping != mapping)) goto page_unlock; @@ -2772,6 +2775,8 @@ vm_fault_t filemap_fault(struct vm_fault *vmf) return VM_FAULT_LOCKED; page_unlock: unlock_page(page); +page_put: + put_page(page); return VM_FAULT_RETRY; }