Files
android_kernel_xiaomi_sm8450/arch/powerpc/platforms/powernv/memtrace.c
David Hildenbrand cd2eda58ea powerpc/powernv/memtrace: Fix crashing the kernel when enabling concurrently
commit d6718941a2767fb383e105d257d2105fe4f15f0e upstream.

It's very easy to crash the kernel right now by simply trying to
enable memtrace concurrently, hammering on the "enable" interface

loop.sh:
  #!/bin/bash

  dmesg --console-off

  while true; do
          echo 0x40000000 > /sys/kernel/debug/powerpc/memtrace/enable
  done

[root@localhost ~]# loop.sh &
[root@localhost ~]# loop.sh &

Resulting quickly in a kernel crash. Let's properly protect using a
mutex.

Fixes: 9d5171a8f2 ("powerpc/powernv: Enable removal of memory for in memory tracing")
Cc: stable@vger.kernel.org# v4.14+
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20201111145322.15793-3-david@redhat.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2020-12-30 11:54:16 +01:00

343 lines
7.7 KiB
C

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) IBM Corporation, 2014, 2017
* Anton Blanchard, Rashmica Gupta.
*/
#define pr_fmt(fmt) "memtrace: " fmt
#include <linux/bitops.h>
#include <linux/string.h>
#include <linux/memblock.h>
#include <linux/init.h>
#include <linux/moduleparam.h>
#include <linux/fs.h>
#include <linux/debugfs.h>
#include <linux/slab.h>
#include <linux/memory.h>
#include <linux/memory_hotplug.h>
#include <linux/numa.h>
#include <asm/machdep.h>
#include <asm/debugfs.h>
/* This enables us to keep track of the memory removed from each node. */
struct memtrace_entry {
void *mem;
u64 start;
u64 size;
u32 nid;
struct dentry *dir;
char name[16];
};
static DEFINE_MUTEX(memtrace_mutex);
static u64 memtrace_size;
static struct memtrace_entry *memtrace_array;
static unsigned int memtrace_array_nr;
static ssize_t memtrace_read(struct file *filp, char __user *ubuf,
size_t count, loff_t *ppos)
{
struct memtrace_entry *ent = filp->private_data;
return simple_read_from_buffer(ubuf, count, ppos, ent->mem, ent->size);
}
static const struct file_operations memtrace_fops = {
.llseek = default_llseek,
.read = memtrace_read,
.open = simple_open,
};
static int check_memblock_online(struct memory_block *mem, void *arg)
{
if (mem->state != MEM_ONLINE)
return -1;
return 0;
}
static int change_memblock_state(struct memory_block *mem, void *arg)
{
unsigned long state = (unsigned long)arg;
mem->state = state;
return 0;
}
static void memtrace_clear_range(unsigned long start_pfn,
unsigned long nr_pages)
{
unsigned long pfn;
/*
* As pages are offline, we cannot trust the memmap anymore. As HIGHMEM
* does not apply, avoid passing around "struct page" and use
* clear_page() instead directly.
*/
for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) {
if (IS_ALIGNED(pfn, PAGES_PER_SECTION))
cond_resched();
clear_page(__va(PFN_PHYS(pfn)));
}
}
/* called with device_hotplug_lock held */
static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages)
{
const unsigned long start = PFN_PHYS(start_pfn);
const unsigned long size = PFN_PHYS(nr_pages);
if (walk_memory_blocks(start, size, NULL, check_memblock_online))
return false;
walk_memory_blocks(start, size, (void *)MEM_GOING_OFFLINE,
change_memblock_state);
if (offline_pages(start_pfn, nr_pages)) {
walk_memory_blocks(start, size, (void *)MEM_ONLINE,
change_memblock_state);
return false;
}
walk_memory_blocks(start, size, (void *)MEM_OFFLINE,
change_memblock_state);
return true;
}
static u64 memtrace_alloc_node(u32 nid, u64 size)
{
u64 start_pfn, end_pfn, nr_pages, pfn;
u64 base_pfn;
u64 bytes = memory_block_size_bytes();
if (!node_spanned_pages(nid))
return 0;
start_pfn = node_start_pfn(nid);
end_pfn = node_end_pfn(nid);
nr_pages = size >> PAGE_SHIFT;
/* Trace memory needs to be aligned to the size */
end_pfn = round_down(end_pfn - nr_pages, nr_pages);
lock_device_hotplug();
for (base_pfn = end_pfn; base_pfn > start_pfn; base_pfn -= nr_pages) {
if (memtrace_offline_pages(nid, base_pfn, nr_pages) == true) {
/*
* Clear the range while we still have a linear
* mapping.
*/
memtrace_clear_range(base_pfn, nr_pages);
/*
* Remove memory in memory block size chunks so that
* iomem resources are always split to the same size and
* we never try to remove memory that spans two iomem
* resources.
*/
end_pfn = base_pfn + nr_pages;
for (pfn = base_pfn; pfn < end_pfn; pfn += bytes>> PAGE_SHIFT) {
__remove_memory(nid, pfn << PAGE_SHIFT, bytes);
}
unlock_device_hotplug();
return base_pfn << PAGE_SHIFT;
}
}
unlock_device_hotplug();
return 0;
}
static int memtrace_init_regions_runtime(u64 size)
{
u32 nid;
u64 m;
memtrace_array = kcalloc(num_online_nodes(),
sizeof(struct memtrace_entry), GFP_KERNEL);
if (!memtrace_array) {
pr_err("Failed to allocate memtrace_array\n");
return -EINVAL;
}
for_each_online_node(nid) {
m = memtrace_alloc_node(nid, size);
/*
* A node might not have any local memory, so warn but
* continue on.
*/
if (!m) {
pr_err("Failed to allocate trace memory on node %d\n", nid);
continue;
}
pr_info("Allocated trace memory on node %d at 0x%016llx\n", nid, m);
memtrace_array[memtrace_array_nr].start = m;
memtrace_array[memtrace_array_nr].size = size;
memtrace_array[memtrace_array_nr].nid = nid;
memtrace_array_nr++;
}
return 0;
}
static struct dentry *memtrace_debugfs_dir;
static int memtrace_init_debugfs(void)
{
int ret = 0;
int i;
for (i = 0; i < memtrace_array_nr; i++) {
struct dentry *dir;
struct memtrace_entry *ent = &memtrace_array[i];
ent->mem = ioremap(ent->start, ent->size);
/* Warn but continue on */
if (!ent->mem) {
pr_err("Failed to map trace memory at 0x%llx\n",
ent->start);
ret = -1;
continue;
}
snprintf(ent->name, 16, "%08x", ent->nid);
dir = debugfs_create_dir(ent->name, memtrace_debugfs_dir);
ent->dir = dir;
debugfs_create_file("trace", 0400, dir, ent, &memtrace_fops);
debugfs_create_x64("start", 0400, dir, &ent->start);
debugfs_create_x64("size", 0400, dir, &ent->size);
}
return ret;
}
static int online_mem_block(struct memory_block *mem, void *arg)
{
return device_online(&mem->dev);
}
/*
* Iterate through the chunks of memory we have removed from the kernel
* and attempt to add them back to the kernel.
*/
static int memtrace_online(void)
{
int i, ret = 0;
struct memtrace_entry *ent;
for (i = memtrace_array_nr - 1; i >= 0; i--) {
ent = &memtrace_array[i];
/* We have onlined this chunk previously */
if (ent->nid == NUMA_NO_NODE)
continue;
/* Remove from io mappings */
if (ent->mem) {
iounmap(ent->mem);
ent->mem = 0;
}
if (add_memory(ent->nid, ent->start, ent->size, MHP_NONE)) {
pr_err("Failed to add trace memory to node %d\n",
ent->nid);
ret += 1;
continue;
}
lock_device_hotplug();
walk_memory_blocks(ent->start, ent->size, NULL,
online_mem_block);
unlock_device_hotplug();
/*
* Memory was added successfully so clean up references to it
* so on reentry we can tell that this chunk was added.
*/
debugfs_remove_recursive(ent->dir);
pr_info("Added trace memory back to node %d\n", ent->nid);
ent->size = ent->start = ent->nid = NUMA_NO_NODE;
}
if (ret)
return ret;
/* If all chunks of memory were added successfully, reset globals */
kfree(memtrace_array);
memtrace_array = NULL;
memtrace_size = 0;
memtrace_array_nr = 0;
return 0;
}
static int memtrace_enable_set(void *data, u64 val)
{
int rc = -EAGAIN;
u64 bytes;
/*
* Don't attempt to do anything if size isn't aligned to a memory
* block or equal to zero.
*/
bytes = memory_block_size_bytes();
if (val & (bytes - 1)) {
pr_err("Value must be aligned with 0x%llx\n", bytes);
return -EINVAL;
}
mutex_lock(&memtrace_mutex);
/* Re-add/online previously removed/offlined memory */
if (memtrace_size) {
if (memtrace_online())
goto out_unlock;
}
if (!val) {
rc = 0;
goto out_unlock;
}
/* Offline and remove memory */
if (memtrace_init_regions_runtime(val))
goto out_unlock;
if (memtrace_init_debugfs())
goto out_unlock;
memtrace_size = val;
rc = 0;
out_unlock:
mutex_unlock(&memtrace_mutex);
return rc;
}
static int memtrace_enable_get(void *data, u64 *val)
{
*val = memtrace_size;
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(memtrace_init_fops, memtrace_enable_get,
memtrace_enable_set, "0x%016llx\n");
static int memtrace_init(void)
{
memtrace_debugfs_dir = debugfs_create_dir("memtrace",
powerpc_debugfs_root);
debugfs_create_file("enable", 0600, memtrace_debugfs_dir,
NULL, &memtrace_init_fops);
return 0;
}
machine_device_initcall(powernv, memtrace_init);