Files
android_kernel_xiaomi_sm8450/arch/arm64/mm/ptdump.c
Russell King 75d0665639 BACKPORT: arm64/bpf: Remove 128MB limit for BPF JIT programs
(Backport: resolve conflicts due to missing f4693c2716b35 and also
 drop in_bpf_jit from fixup_exception the same way 5.15 backport
 9c82ce593626 does it.)

Commit 91fc957c9b ("arm64/bpf: don't allocate BPF JIT programs in module
memory") restricts BPF JIT program allocation to a 128MB region to ensure
BPF programs are still in branching range of each other. However this
restriction should not apply to the aarch64 JIT, since BPF_JMP | BPF_CALL
are implemented as a 64-bit move into a register and then a BLR instruction -
which has the effect of being able to call anything without proximity
limitation.

The practical reason to relax this restriction on JIT memory is that 128MB of
JIT memory can be quickly exhausted, especially where PAGE_SIZE is 64KB - one
page is needed per program. In cases where seccomp filters are applied to
multiple VMs on VM launch - such filters are classic BPF but converted to
BPF - this can severely limit the number of VMs that can be launched. In a
world where we support BPF JIT always on, turning off the JIT isn't always an
option either.

Fixes: 91fc957c9b ("arm64/bpf: don't allocate BPF JIT programs in module memory")
Suggested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Russell King <russell.king@oracle.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Alan Maguire <alan.maguire@oracle.com>
Link: https://lore.kernel.org/bpf/1636131046-5982-2-git-send-email-alan.maguire@oracle.com
(cherry picked from commit b89ddf4cca43f1269093942cf5c4e457fd45c335)
Bug: 252919296
Change-Id: Iec7d0b2bba001df94c2e21fcd5883ff002111cd5
Signed-off-by: Andrey Konovalov <andreyknvl@google.com>
2023-01-23 17:15:17 +00:00

392 lines
8.6 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2014, The Linux Foundation. All rights reserved.
* Debug helper to dump the current kernel pagetables of the system
* so that we can see what the various memory ranges are set to.
*
* Derived from x86 and arm implementation:
* (C) Copyright 2008 Intel Corporation
*
* Author: Arjan van de Ven <arjan@linux.intel.com>
*/
#include <linux/debugfs.h>
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/io.h>
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/ptdump.h>
#include <linux/sched.h>
#include <linux/seq_file.h>
#include <asm/fixmap.h>
#include <asm/kasan.h>
#include <asm/memory.h>
#include <asm/pgtable-hwdef.h>
#include <asm/ptdump.h>
enum address_markers_idx {
PAGE_OFFSET_NR = 0,
PAGE_END_NR,
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
KASAN_START_NR,
#endif
};
static struct addr_marker address_markers[] = {
{ PAGE_OFFSET, "Linear Mapping start" },
{ 0 /* PAGE_END */, "Linear Mapping end" },
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
{ 0 /* KASAN_SHADOW_START */, "Kasan shadow start" },
{ KASAN_SHADOW_END, "Kasan shadow end" },
#endif
{ MODULES_VADDR, "Modules start" },
{ MODULES_END, "Modules end" },
{ VMALLOC_START, "vmalloc() area" },
{ VMALLOC_END, "vmalloc() end" },
{ FIXADDR_START, "Fixmap start" },
{ FIXADDR_TOP, "Fixmap end" },
{ PCI_IO_START, "PCI I/O start" },
{ PCI_IO_END, "PCI I/O end" },
#ifdef CONFIG_SPARSEMEM_VMEMMAP
{ VMEMMAP_START, "vmemmap start" },
{ VMEMMAP_START + VMEMMAP_SIZE, "vmemmap end" },
#endif
{ -1, NULL },
};
#define pt_dump_seq_printf(m, fmt, args...) \
({ \
if (m) \
seq_printf(m, fmt, ##args); \
})
#define pt_dump_seq_puts(m, fmt) \
({ \
if (m) \
seq_printf(m, fmt); \
})
/*
* The page dumper groups page table entries of the same type into a single
* description. It uses pg_state to track the range information while
* iterating over the pte entries. When the continuity is broken it then
* dumps out a description of the range.
*/
struct pg_state {
struct ptdump_state ptdump;
struct seq_file *seq;
const struct addr_marker *marker;
unsigned long start_address;
int level;
u64 current_prot;
bool check_wx;
unsigned long wx_pages;
unsigned long uxn_pages;
};
struct prot_bits {
u64 mask;
u64 val;
const char *set;
const char *clear;
};
static const struct prot_bits pte_bits[] = {
{
.mask = PTE_VALID,
.val = PTE_VALID,
.set = " ",
.clear = "F",
}, {
.mask = PTE_USER,
.val = PTE_USER,
.set = "USR",
.clear = " ",
}, {
.mask = PTE_RDONLY,
.val = PTE_RDONLY,
.set = "ro",
.clear = "RW",
}, {
.mask = PTE_PXN,
.val = PTE_PXN,
.set = "NX",
.clear = "x ",
}, {
.mask = PTE_SHARED,
.val = PTE_SHARED,
.set = "SHD",
.clear = " ",
}, {
.mask = PTE_AF,
.val = PTE_AF,
.set = "AF",
.clear = " ",
}, {
.mask = PTE_NG,
.val = PTE_NG,
.set = "NG",
.clear = " ",
}, {
.mask = PTE_CONT,
.val = PTE_CONT,
.set = "CON",
.clear = " ",
}, {
.mask = PTE_TABLE_BIT,
.val = PTE_TABLE_BIT,
.set = " ",
.clear = "BLK",
}, {
.mask = PTE_UXN,
.val = PTE_UXN,
.set = "UXN",
.clear = " ",
}, {
.mask = PTE_GP,
.val = PTE_GP,
.set = "GP",
.clear = " ",
}, {
.mask = PTE_ATTRINDX_MASK,
.val = PTE_ATTRINDX(MT_DEVICE_nGnRnE),
.set = "DEVICE/nGnRnE",
}, {
.mask = PTE_ATTRINDX_MASK,
.val = PTE_ATTRINDX(MT_DEVICE_nGnRE),
.set = "DEVICE/nGnRE",
}, {
.mask = PTE_ATTRINDX_MASK,
.val = PTE_ATTRINDX(MT_DEVICE_GRE),
.set = "DEVICE/GRE",
}, {
.mask = PTE_ATTRINDX_MASK,
.val = PTE_ATTRINDX(MT_NORMAL_NC),
.set = "MEM/NORMAL-NC",
}, {
.mask = PTE_ATTRINDX_MASK,
.val = PTE_ATTRINDX(MT_NORMAL),
.set = "MEM/NORMAL",
}, {
.mask = PTE_ATTRINDX_MASK,
.val = PTE_ATTRINDX(MT_NORMAL_TAGGED),
.set = "MEM/NORMAL-TAGGED",
}
};
struct pg_level {
const struct prot_bits *bits;
const char *name;
size_t num;
u64 mask;
};
static struct pg_level pg_level[] = {
{ /* pgd */
.name = "PGD",
.bits = pte_bits,
.num = ARRAY_SIZE(pte_bits),
}, { /* p4d */
.name = "P4D",
.bits = pte_bits,
.num = ARRAY_SIZE(pte_bits),
}, { /* pud */
.name = (CONFIG_PGTABLE_LEVELS > 3) ? "PUD" : "PGD",
.bits = pte_bits,
.num = ARRAY_SIZE(pte_bits),
}, { /* pmd */
.name = (CONFIG_PGTABLE_LEVELS > 2) ? "PMD" : "PGD",
.bits = pte_bits,
.num = ARRAY_SIZE(pte_bits),
}, { /* pte */
.name = "PTE",
.bits = pte_bits,
.num = ARRAY_SIZE(pte_bits),
},
};
static void dump_prot(struct pg_state *st, const struct prot_bits *bits,
size_t num)
{
unsigned i;
for (i = 0; i < num; i++, bits++) {
const char *s;
if ((st->current_prot & bits->mask) == bits->val)
s = bits->set;
else
s = bits->clear;
if (s)
pt_dump_seq_printf(st->seq, " %s", s);
}
}
static void note_prot_uxn(struct pg_state *st, unsigned long addr)
{
if (!st->check_wx)
return;
if ((st->current_prot & PTE_UXN) == PTE_UXN)
return;
WARN_ONCE(1, "arm64/mm: Found non-UXN mapping at address %p/%pS\n",
(void *)st->start_address, (void *)st->start_address);
st->uxn_pages += (addr - st->start_address) / PAGE_SIZE;
}
static void note_prot_wx(struct pg_state *st, unsigned long addr)
{
if (!st->check_wx)
return;
if ((st->current_prot & PTE_RDONLY) == PTE_RDONLY)
return;
if ((st->current_prot & PTE_PXN) == PTE_PXN)
return;
WARN_ONCE(1, "arm64/mm: Found insecure W+X mapping at address %p/%pS\n",
(void *)st->start_address, (void *)st->start_address);
st->wx_pages += (addr - st->start_address) / PAGE_SIZE;
}
static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
u64 val)
{
struct pg_state *st = container_of(pt_st, struct pg_state, ptdump);
static const char units[] = "KMGTPE";
u64 prot = 0;
if (level >= 0)
prot = val & pg_level[level].mask;
if (st->level == -1) {
st->level = level;
st->current_prot = prot;
st->start_address = addr;
pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
} else if (prot != st->current_prot || level != st->level ||
addr >= st->marker[1].start_address) {
const char *unit = units;
unsigned long delta;
if (st->current_prot) {
note_prot_uxn(st, addr);
note_prot_wx(st, addr);
}
pt_dump_seq_printf(st->seq, "0x%016lx-0x%016lx ",
st->start_address, addr);
delta = (addr - st->start_address) >> 10;
while (!(delta & 1023) && unit[1]) {
delta >>= 10;
unit++;
}
pt_dump_seq_printf(st->seq, "%9lu%c %s", delta, *unit,
pg_level[st->level].name);
if (st->current_prot && pg_level[st->level].bits)
dump_prot(st, pg_level[st->level].bits,
pg_level[st->level].num);
pt_dump_seq_puts(st->seq, "\n");
if (addr >= st->marker[1].start_address) {
st->marker++;
pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
}
st->start_address = addr;
st->current_prot = prot;
st->level = level;
}
if (addr >= st->marker[1].start_address) {
st->marker++;
pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
}
}
void ptdump_walk(struct seq_file *s, struct ptdump_info *info)
{
unsigned long end = ~0UL;
struct pg_state st;
if (info->base_addr < TASK_SIZE_64)
end = TASK_SIZE_64;
st = (struct pg_state){
.seq = s,
.marker = info->markers,
.ptdump = {
.note_page = note_page,
.range = (struct ptdump_range[]){
{info->base_addr, end},
{0, 0}
}
}
};
ptdump_walk_pgd(&st.ptdump, info->mm, NULL);
}
static void ptdump_initialize(void)
{
unsigned i, j;
for (i = 0; i < ARRAY_SIZE(pg_level); i++)
if (pg_level[i].bits)
for (j = 0; j < pg_level[i].num; j++)
pg_level[i].mask |= pg_level[i].bits[j].mask;
}
static struct ptdump_info kernel_ptdump_info = {
.mm = &init_mm,
.markers = address_markers,
.base_addr = PAGE_OFFSET,
};
void ptdump_check_wx(void)
{
struct pg_state st = {
.seq = NULL,
.marker = (struct addr_marker[]) {
{ 0, NULL},
{ -1, NULL},
},
.level = -1,
.check_wx = true,
.ptdump = {
.note_page = note_page,
.range = (struct ptdump_range[]) {
{PAGE_OFFSET, ~0UL},
{0, 0}
}
}
};
ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
if (st.wx_pages || st.uxn_pages)
pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found, %lu non-UXN pages found\n",
st.wx_pages, st.uxn_pages);
else
pr_info("Checked W+X mappings: passed, no W+X pages found\n");
}
static int ptdump_init(void)
{
address_markers[PAGE_END_NR].start_address = PAGE_END;
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
address_markers[KASAN_START_NR].start_address = KASAN_SHADOW_START;
#endif
ptdump_initialize();
ptdump_debugfs_register(&kernel_ptdump_info, "kernel_page_tables");
return 0;
}
device_initcall(ptdump_init);