Merge branch 'linus' into sched/urgent, to resolve conflicts
Conflicts: arch/arm64/kernel/entry.S arch/x86/Kconfig include/linux/sched/mm.h kernel/fork.c Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
@@ -67,7 +67,7 @@ void __init MMU_init_hw(void)
|
||||
/* PIN up to the 3 first 8Mb after IMMR in DTLB table */
|
||||
#ifdef CONFIG_PIN_TLB_DATA
|
||||
unsigned long ctr = mfspr(SPRN_MD_CTR) & 0xfe000000;
|
||||
unsigned long flags = 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY;
|
||||
unsigned long flags = 0xf0 | MD_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY;
|
||||
#ifdef CONFIG_PIN_TLB_IMMR
|
||||
int i = 29;
|
||||
#else
|
||||
@@ -79,7 +79,7 @@ void __init MMU_init_hw(void)
|
||||
for (; i < 32 && mem >= LARGE_PAGE_SIZE_8M; i++) {
|
||||
mtspr(SPRN_MD_CTR, ctr | (i << 8));
|
||||
mtspr(SPRN_MD_EPN, (unsigned long)__va(addr) | MD_EVALID);
|
||||
mtspr(SPRN_MD_TWC, MD_PS8MEG | MD_SVALID);
|
||||
mtspr(SPRN_MD_TWC, MD_PS8MEG | MD_SVALID | M_APG2);
|
||||
mtspr(SPRN_MD_RPN, addr | flags | _PAGE_PRESENT);
|
||||
addr += LARGE_PAGE_SIZE_8M;
|
||||
mem -= LARGE_PAGE_SIZE_8M;
|
||||
|
@@ -9,7 +9,7 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
|
||||
|
||||
obj-y := fault.o mem.o pgtable.o mmap.o \
|
||||
init_$(BITS).o pgtable_$(BITS).o \
|
||||
init-common.o mmu_context.o
|
||||
init-common.o mmu_context.o drmem.o
|
||||
obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \
|
||||
tlb_nohash_low.o
|
||||
obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(BITS)e.o
|
||||
@@ -44,3 +44,4 @@ obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o
|
||||
obj-$(CONFIG_SPAPR_TCE_IOMMU) += mmu_context_iommu.o
|
||||
obj-$(CONFIG_PPC_PTDUMP) += dump_linuxpagetables.o
|
||||
obj-$(CONFIG_PPC_HTDUMP) += dump_hashpagetable.o
|
||||
obj-$(CONFIG_PPC_MEM_KEYS) += pkeys.o
|
||||
|
439
arch/powerpc/mm/drmem.c
Normal file
439
arch/powerpc/mm/drmem.c
Normal file
@@ -0,0 +1,439 @@
|
||||
/*
|
||||
* Dynamic reconfiguration memory support
|
||||
*
|
||||
* Copyright 2017 IBM Corporation
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) "drmem: " fmt
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/of.h>
|
||||
#include <linux/of_fdt.h>
|
||||
#include <linux/memblock.h>
|
||||
#include <asm/prom.h>
|
||||
#include <asm/drmem.h>
|
||||
|
||||
static struct drmem_lmb_info __drmem_info;
|
||||
struct drmem_lmb_info *drmem_info = &__drmem_info;
|
||||
|
||||
u64 drmem_lmb_memory_max(void)
|
||||
{
|
||||
struct drmem_lmb *last_lmb;
|
||||
|
||||
last_lmb = &drmem_info->lmbs[drmem_info->n_lmbs - 1];
|
||||
return last_lmb->base_addr + drmem_lmb_size();
|
||||
}
|
||||
|
||||
static u32 drmem_lmb_flags(struct drmem_lmb *lmb)
|
||||
{
|
||||
/*
|
||||
* Return the value of the lmb flags field minus the reserved
|
||||
* bit used internally for hotplug processing.
|
||||
*/
|
||||
return lmb->flags & ~DRMEM_LMB_RESERVED;
|
||||
}
|
||||
|
||||
static struct property *clone_property(struct property *prop, u32 prop_sz)
|
||||
{
|
||||
struct property *new_prop;
|
||||
|
||||
new_prop = kzalloc(sizeof(*new_prop), GFP_KERNEL);
|
||||
if (!new_prop)
|
||||
return NULL;
|
||||
|
||||
new_prop->name = kstrdup(prop->name, GFP_KERNEL);
|
||||
new_prop->value = kzalloc(prop_sz, GFP_KERNEL);
|
||||
if (!new_prop->name || !new_prop->value) {
|
||||
kfree(new_prop->name);
|
||||
kfree(new_prop->value);
|
||||
kfree(new_prop);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
new_prop->length = prop_sz;
|
||||
#if defined(CONFIG_OF_DYNAMIC)
|
||||
of_property_set_flag(new_prop, OF_DYNAMIC);
|
||||
#endif
|
||||
return new_prop;
|
||||
}
|
||||
|
||||
static int drmem_update_dt_v1(struct device_node *memory,
|
||||
struct property *prop)
|
||||
{
|
||||
struct property *new_prop;
|
||||
struct of_drconf_cell_v1 *dr_cell;
|
||||
struct drmem_lmb *lmb;
|
||||
u32 *p;
|
||||
|
||||
new_prop = clone_property(prop, prop->length);
|
||||
if (!new_prop)
|
||||
return -1;
|
||||
|
||||
p = new_prop->value;
|
||||
*p++ = cpu_to_be32(drmem_info->n_lmbs);
|
||||
|
||||
dr_cell = (struct of_drconf_cell_v1 *)p;
|
||||
|
||||
for_each_drmem_lmb(lmb) {
|
||||
dr_cell->base_addr = cpu_to_be64(lmb->base_addr);
|
||||
dr_cell->drc_index = cpu_to_be32(lmb->drc_index);
|
||||
dr_cell->aa_index = cpu_to_be32(lmb->aa_index);
|
||||
dr_cell->flags = cpu_to_be32(drmem_lmb_flags(lmb));
|
||||
|
||||
dr_cell++;
|
||||
}
|
||||
|
||||
of_update_property(memory, new_prop);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void init_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell,
|
||||
struct drmem_lmb *lmb)
|
||||
{
|
||||
dr_cell->base_addr = cpu_to_be64(lmb->base_addr);
|
||||
dr_cell->drc_index = cpu_to_be32(lmb->drc_index);
|
||||
dr_cell->aa_index = cpu_to_be32(lmb->aa_index);
|
||||
dr_cell->flags = cpu_to_be32(lmb->flags);
|
||||
}
|
||||
|
||||
static int drmem_update_dt_v2(struct device_node *memory,
|
||||
struct property *prop)
|
||||
{
|
||||
struct property *new_prop;
|
||||
struct of_drconf_cell_v2 *dr_cell;
|
||||
struct drmem_lmb *lmb, *prev_lmb;
|
||||
u32 lmb_sets, prop_sz, seq_lmbs;
|
||||
u32 *p;
|
||||
|
||||
/* First pass, determine how many LMB sets are needed. */
|
||||
lmb_sets = 0;
|
||||
prev_lmb = NULL;
|
||||
for_each_drmem_lmb(lmb) {
|
||||
if (!prev_lmb) {
|
||||
prev_lmb = lmb;
|
||||
lmb_sets++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (prev_lmb->aa_index != lmb->aa_index ||
|
||||
prev_lmb->flags != lmb->flags)
|
||||
lmb_sets++;
|
||||
|
||||
prev_lmb = lmb;
|
||||
}
|
||||
|
||||
prop_sz = lmb_sets * sizeof(*dr_cell) + sizeof(__be32);
|
||||
new_prop = clone_property(prop, prop_sz);
|
||||
if (!new_prop)
|
||||
return -1;
|
||||
|
||||
p = new_prop->value;
|
||||
*p++ = cpu_to_be32(lmb_sets);
|
||||
|
||||
dr_cell = (struct of_drconf_cell_v2 *)p;
|
||||
|
||||
/* Second pass, populate the LMB set data */
|
||||
prev_lmb = NULL;
|
||||
seq_lmbs = 0;
|
||||
for_each_drmem_lmb(lmb) {
|
||||
if (prev_lmb == NULL) {
|
||||
/* Start of first LMB set */
|
||||
prev_lmb = lmb;
|
||||
init_drconf_v2_cell(dr_cell, lmb);
|
||||
seq_lmbs++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (prev_lmb->aa_index != lmb->aa_index ||
|
||||
prev_lmb->flags != lmb->flags) {
|
||||
/* end of one set, start of another */
|
||||
dr_cell->seq_lmbs = cpu_to_be32(seq_lmbs);
|
||||
dr_cell++;
|
||||
|
||||
init_drconf_v2_cell(dr_cell, lmb);
|
||||
seq_lmbs = 1;
|
||||
} else {
|
||||
seq_lmbs++;
|
||||
}
|
||||
|
||||
prev_lmb = lmb;
|
||||
}
|
||||
|
||||
/* close out last LMB set */
|
||||
dr_cell->seq_lmbs = cpu_to_be32(seq_lmbs);
|
||||
of_update_property(memory, new_prop);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int drmem_update_dt(void)
|
||||
{
|
||||
struct device_node *memory;
|
||||
struct property *prop;
|
||||
int rc = -1;
|
||||
|
||||
memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
|
||||
if (!memory)
|
||||
return -1;
|
||||
|
||||
prop = of_find_property(memory, "ibm,dynamic-memory", NULL);
|
||||
if (prop) {
|
||||
rc = drmem_update_dt_v1(memory, prop);
|
||||
} else {
|
||||
prop = of_find_property(memory, "ibm,dynamic-memory-v2", NULL);
|
||||
if (prop)
|
||||
rc = drmem_update_dt_v2(memory, prop);
|
||||
}
|
||||
|
||||
of_node_put(memory);
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void __init read_drconf_v1_cell(struct drmem_lmb *lmb,
|
||||
const __be32 **prop)
|
||||
{
|
||||
const __be32 *p = *prop;
|
||||
|
||||
lmb->base_addr = dt_mem_next_cell(dt_root_addr_cells, &p);
|
||||
lmb->drc_index = of_read_number(p++, 1);
|
||||
|
||||
p++; /* skip reserved field */
|
||||
|
||||
lmb->aa_index = of_read_number(p++, 1);
|
||||
lmb->flags = of_read_number(p++, 1);
|
||||
|
||||
*prop = p;
|
||||
}
|
||||
|
||||
static void __init __walk_drmem_v1_lmbs(const __be32 *prop, const __be32 *usm,
|
||||
void (*func)(struct drmem_lmb *, const __be32 **))
|
||||
{
|
||||
struct drmem_lmb lmb;
|
||||
u32 i, n_lmbs;
|
||||
|
||||
n_lmbs = of_read_number(prop++, 1);
|
||||
|
||||
for (i = 0; i < n_lmbs; i++) {
|
||||
read_drconf_v1_cell(&lmb, &prop);
|
||||
func(&lmb, &usm);
|
||||
}
|
||||
}
|
||||
|
||||
static void __init read_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell,
|
||||
const __be32 **prop)
|
||||
{
|
||||
const __be32 *p = *prop;
|
||||
|
||||
dr_cell->seq_lmbs = of_read_number(p++, 1);
|
||||
dr_cell->base_addr = dt_mem_next_cell(dt_root_addr_cells, &p);
|
||||
dr_cell->drc_index = of_read_number(p++, 1);
|
||||
dr_cell->aa_index = of_read_number(p++, 1);
|
||||
dr_cell->flags = of_read_number(p++, 1);
|
||||
|
||||
*prop = p;
|
||||
}
|
||||
|
||||
static void __init __walk_drmem_v2_lmbs(const __be32 *prop, const __be32 *usm,
|
||||
void (*func)(struct drmem_lmb *, const __be32 **))
|
||||
{
|
||||
struct of_drconf_cell_v2 dr_cell;
|
||||
struct drmem_lmb lmb;
|
||||
u32 i, j, lmb_sets;
|
||||
|
||||
lmb_sets = of_read_number(prop++, 1);
|
||||
|
||||
for (i = 0; i < lmb_sets; i++) {
|
||||
read_drconf_v2_cell(&dr_cell, &prop);
|
||||
|
||||
for (j = 0; j < dr_cell.seq_lmbs; j++) {
|
||||
lmb.base_addr = dr_cell.base_addr;
|
||||
dr_cell.base_addr += drmem_lmb_size();
|
||||
|
||||
lmb.drc_index = dr_cell.drc_index;
|
||||
dr_cell.drc_index++;
|
||||
|
||||
lmb.aa_index = dr_cell.aa_index;
|
||||
lmb.flags = dr_cell.flags;
|
||||
|
||||
func(&lmb, &usm);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PPC_PSERIES
|
||||
void __init walk_drmem_lmbs_early(unsigned long node,
|
||||
void (*func)(struct drmem_lmb *, const __be32 **))
|
||||
{
|
||||
const __be32 *prop, *usm;
|
||||
int len;
|
||||
|
||||
prop = of_get_flat_dt_prop(node, "ibm,lmb-size", &len);
|
||||
if (!prop || len < dt_root_size_cells * sizeof(__be32))
|
||||
return;
|
||||
|
||||
drmem_info->lmb_size = dt_mem_next_cell(dt_root_size_cells, &prop);
|
||||
|
||||
usm = of_get_flat_dt_prop(node, "linux,drconf-usable-memory", &len);
|
||||
|
||||
prop = of_get_flat_dt_prop(node, "ibm,dynamic-memory", &len);
|
||||
if (prop) {
|
||||
__walk_drmem_v1_lmbs(prop, usm, func);
|
||||
} else {
|
||||
prop = of_get_flat_dt_prop(node, "ibm,dynamic-memory-v2",
|
||||
&len);
|
||||
if (prop)
|
||||
__walk_drmem_v2_lmbs(prop, usm, func);
|
||||
}
|
||||
|
||||
memblock_dump_all();
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static int __init init_drmem_lmb_size(struct device_node *dn)
|
||||
{
|
||||
const __be32 *prop;
|
||||
int len;
|
||||
|
||||
if (drmem_info->lmb_size)
|
||||
return 0;
|
||||
|
||||
prop = of_get_property(dn, "ibm,lmb-size", &len);
|
||||
if (!prop || len < dt_root_size_cells * sizeof(__be32)) {
|
||||
pr_info("Could not determine LMB size\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
drmem_info->lmb_size = dt_mem_next_cell(dt_root_size_cells, &prop);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the property linux,drconf-usable-memory if
|
||||
* it exists (the property exists only in kexec/kdump kernels,
|
||||
* added by kexec-tools)
|
||||
*/
|
||||
static const __be32 *of_get_usable_memory(struct device_node *dn)
|
||||
{
|
||||
const __be32 *prop;
|
||||
u32 len;
|
||||
|
||||
prop = of_get_property(dn, "linux,drconf-usable-memory", &len);
|
||||
if (!prop || len < sizeof(unsigned int))
|
||||
return NULL;
|
||||
|
||||
return prop;
|
||||
}
|
||||
|
||||
void __init walk_drmem_lmbs(struct device_node *dn,
|
||||
void (*func)(struct drmem_lmb *, const __be32 **))
|
||||
{
|
||||
const __be32 *prop, *usm;
|
||||
|
||||
if (init_drmem_lmb_size(dn))
|
||||
return;
|
||||
|
||||
usm = of_get_usable_memory(dn);
|
||||
|
||||
prop = of_get_property(dn, "ibm,dynamic-memory", NULL);
|
||||
if (prop) {
|
||||
__walk_drmem_v1_lmbs(prop, usm, func);
|
||||
} else {
|
||||
prop = of_get_property(dn, "ibm,dynamic-memory-v2", NULL);
|
||||
if (prop)
|
||||
__walk_drmem_v2_lmbs(prop, usm, func);
|
||||
}
|
||||
}
|
||||
|
||||
static void __init init_drmem_v1_lmbs(const __be32 *prop)
|
||||
{
|
||||
struct drmem_lmb *lmb;
|
||||
|
||||
drmem_info->n_lmbs = of_read_number(prop++, 1);
|
||||
|
||||
drmem_info->lmbs = kcalloc(drmem_info->n_lmbs, sizeof(*lmb),
|
||||
GFP_KERNEL);
|
||||
if (!drmem_info->lmbs)
|
||||
return;
|
||||
|
||||
for_each_drmem_lmb(lmb)
|
||||
read_drconf_v1_cell(lmb, &prop);
|
||||
}
|
||||
|
||||
static void __init init_drmem_v2_lmbs(const __be32 *prop)
|
||||
{
|
||||
struct drmem_lmb *lmb;
|
||||
struct of_drconf_cell_v2 dr_cell;
|
||||
const __be32 *p;
|
||||
u32 i, j, lmb_sets;
|
||||
int lmb_index;
|
||||
|
||||
lmb_sets = of_read_number(prop++, 1);
|
||||
|
||||
/* first pass, calculate the number of LMBs */
|
||||
p = prop;
|
||||
for (i = 0; i < lmb_sets; i++) {
|
||||
read_drconf_v2_cell(&dr_cell, &p);
|
||||
drmem_info->n_lmbs += dr_cell.seq_lmbs;
|
||||
}
|
||||
|
||||
drmem_info->lmbs = kcalloc(drmem_info->n_lmbs, sizeof(*lmb),
|
||||
GFP_KERNEL);
|
||||
if (!drmem_info->lmbs)
|
||||
return;
|
||||
|
||||
/* second pass, read in the LMB information */
|
||||
lmb_index = 0;
|
||||
p = prop;
|
||||
|
||||
for (i = 0; i < lmb_sets; i++) {
|
||||
read_drconf_v2_cell(&dr_cell, &p);
|
||||
|
||||
for (j = 0; j < dr_cell.seq_lmbs; j++) {
|
||||
lmb = &drmem_info->lmbs[lmb_index++];
|
||||
|
||||
lmb->base_addr = dr_cell.base_addr;
|
||||
dr_cell.base_addr += drmem_info->lmb_size;
|
||||
|
||||
lmb->drc_index = dr_cell.drc_index;
|
||||
dr_cell.drc_index++;
|
||||
|
||||
lmb->aa_index = dr_cell.aa_index;
|
||||
lmb->flags = dr_cell.flags;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int __init drmem_init(void)
|
||||
{
|
||||
struct device_node *dn;
|
||||
const __be32 *prop;
|
||||
|
||||
dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
|
||||
if (!dn) {
|
||||
pr_info("No dynamic reconfiguration memory found\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (init_drmem_lmb_size(dn)) {
|
||||
of_node_put(dn);
|
||||
return 0;
|
||||
}
|
||||
|
||||
prop = of_get_property(dn, "ibm,dynamic-memory", NULL);
|
||||
if (prop) {
|
||||
init_drmem_v1_lmbs(prop);
|
||||
} else {
|
||||
prop = of_get_property(dn, "ibm,dynamic-memory-v2", NULL);
|
||||
if (prop)
|
||||
init_drmem_v2_lmbs(prop);
|
||||
}
|
||||
|
||||
of_node_put(dn);
|
||||
return 0;
|
||||
}
|
||||
late_initcall(drmem_init);
|
@@ -112,26 +112,25 @@ struct flag_info {
|
||||
|
||||
static const struct flag_info flag_array[] = {
|
||||
{
|
||||
#ifdef CONFIG_PPC_BOOK3S_64
|
||||
.mask = _PAGE_PRIVILEGED,
|
||||
.val = 0,
|
||||
#else
|
||||
.mask = _PAGE_USER,
|
||||
.mask = _PAGE_USER | _PAGE_PRIVILEGED,
|
||||
.val = _PAGE_USER,
|
||||
#endif
|
||||
.set = "user",
|
||||
.clear = " ",
|
||||
}, {
|
||||
#if _PAGE_RO == 0
|
||||
.mask = _PAGE_RW,
|
||||
.mask = _PAGE_RW | _PAGE_RO | _PAGE_NA,
|
||||
.val = _PAGE_RW,
|
||||
#else
|
||||
.mask = _PAGE_RO,
|
||||
.val = 0,
|
||||
#endif
|
||||
.set = "rw",
|
||||
.clear = "ro",
|
||||
}, {
|
||||
.mask = _PAGE_RW | _PAGE_RO | _PAGE_NA,
|
||||
.val = _PAGE_RO,
|
||||
.set = "ro",
|
||||
}, {
|
||||
#if _PAGE_NA != 0
|
||||
.mask = _PAGE_RW | _PAGE_RO | _PAGE_NA,
|
||||
.val = _PAGE_RO,
|
||||
.set = "na",
|
||||
}, {
|
||||
#endif
|
||||
.mask = _PAGE_EXEC,
|
||||
.val = _PAGE_EXEC,
|
||||
.set = " X ",
|
||||
@@ -213,7 +212,7 @@ static const struct flag_info flag_array[] = {
|
||||
.val = H_PAGE_4K_PFN,
|
||||
.set = "4K_pfn",
|
||||
}, {
|
||||
#endif
|
||||
#else /* CONFIG_PPC_64K_PAGES */
|
||||
.mask = H_PAGE_F_GIX,
|
||||
.val = H_PAGE_F_GIX,
|
||||
.set = "f_gix",
|
||||
@@ -224,14 +223,11 @@ static const struct flag_info flag_array[] = {
|
||||
.val = H_PAGE_F_SECOND,
|
||||
.set = "f_second",
|
||||
}, {
|
||||
#endif /* CONFIG_PPC_64K_PAGES */
|
||||
#endif
|
||||
.mask = _PAGE_SPECIAL,
|
||||
.val = _PAGE_SPECIAL,
|
||||
.set = "special",
|
||||
}, {
|
||||
.mask = _PAGE_SHARED,
|
||||
.val = _PAGE_SHARED,
|
||||
.set = "shared",
|
||||
}
|
||||
};
|
||||
|
||||
|
@@ -107,7 +107,8 @@ static bool store_updates_sp(struct pt_regs *regs)
|
||||
*/
|
||||
|
||||
static int
|
||||
__bad_area_nosemaphore(struct pt_regs *regs, unsigned long address, int si_code)
|
||||
__bad_area_nosemaphore(struct pt_regs *regs, unsigned long address, int si_code,
|
||||
int pkey)
|
||||
{
|
||||
/*
|
||||
* If we are in kernel mode, bail out with a SEGV, this will
|
||||
@@ -117,17 +118,18 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long address, int si_code)
|
||||
if (!user_mode(regs))
|
||||
return SIGSEGV;
|
||||
|
||||
_exception(SIGSEGV, regs, si_code, address);
|
||||
_exception_pkey(SIGSEGV, regs, si_code, address, pkey);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static noinline int bad_area_nosemaphore(struct pt_regs *regs, unsigned long address)
|
||||
{
|
||||
return __bad_area_nosemaphore(regs, address, SEGV_MAPERR);
|
||||
return __bad_area_nosemaphore(regs, address, SEGV_MAPERR, 0);
|
||||
}
|
||||
|
||||
static int __bad_area(struct pt_regs *regs, unsigned long address, int si_code)
|
||||
static int __bad_area(struct pt_regs *regs, unsigned long address, int si_code,
|
||||
int pkey)
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
|
||||
@@ -137,17 +139,23 @@ static int __bad_area(struct pt_regs *regs, unsigned long address, int si_code)
|
||||
*/
|
||||
up_read(&mm->mmap_sem);
|
||||
|
||||
return __bad_area_nosemaphore(regs, address, si_code);
|
||||
return __bad_area_nosemaphore(regs, address, si_code, pkey);
|
||||
}
|
||||
|
||||
static noinline int bad_area(struct pt_regs *regs, unsigned long address)
|
||||
{
|
||||
return __bad_area(regs, address, SEGV_MAPERR);
|
||||
return __bad_area(regs, address, SEGV_MAPERR, 0);
|
||||
}
|
||||
|
||||
static int bad_key_fault_exception(struct pt_regs *regs, unsigned long address,
|
||||
int pkey)
|
||||
{
|
||||
return __bad_area_nosemaphore(regs, address, SEGV_PKUERR, pkey);
|
||||
}
|
||||
|
||||
static noinline int bad_access(struct pt_regs *regs, unsigned long address)
|
||||
{
|
||||
return __bad_area(regs, address, SEGV_ACCERR);
|
||||
return __bad_area(regs, address, SEGV_ACCERR, 0);
|
||||
}
|
||||
|
||||
static int do_sigbus(struct pt_regs *regs, unsigned long address,
|
||||
@@ -432,6 +440,10 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
|
||||
|
||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
|
||||
|
||||
if (error_code & DSISR_KEYFAULT)
|
||||
return bad_key_fault_exception(regs, address,
|
||||
get_mm_addr_key(mm, address));
|
||||
|
||||
/*
|
||||
* We want to do this outside mmap_sem, because reading code around nip
|
||||
* can result in fault, which will cause a deadlock when called with
|
||||
@@ -503,6 +515,31 @@ good_area:
|
||||
* the fault.
|
||||
*/
|
||||
fault = handle_mm_fault(vma, address, flags);
|
||||
|
||||
#ifdef CONFIG_PPC_MEM_KEYS
|
||||
/*
|
||||
* if the HPTE is not hashed, hardware will not detect
|
||||
* a key fault. Lets check if we failed because of a
|
||||
* software detected key fault.
|
||||
*/
|
||||
if (unlikely(fault & VM_FAULT_SIGSEGV) &&
|
||||
!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE,
|
||||
is_exec, 0)) {
|
||||
/*
|
||||
* The PGD-PDT...PMD-PTE tree may not have been fully setup.
|
||||
* Hence we cannot walk the tree to locate the PTE, to locate
|
||||
* the key. Hence let's use vma_pkey() to get the key; instead
|
||||
* of get_mm_addr_key().
|
||||
*/
|
||||
int pkey = vma_pkey(vma);
|
||||
|
||||
if (likely(pkey)) {
|
||||
up_read(&mm->mmap_sem);
|
||||
return bad_key_fault_exception(regs, address, pkey);
|
||||
}
|
||||
}
|
||||
#endif /* CONFIG_PPC_MEM_KEYS */
|
||||
|
||||
major |= fault & VM_FAULT_MAJOR;
|
||||
|
||||
/*
|
||||
@@ -576,7 +613,7 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
|
||||
|
||||
/* kernel has accessed a bad area */
|
||||
|
||||
switch (regs->trap) {
|
||||
switch (TRAP(regs)) {
|
||||
case 0x300:
|
||||
case 0x380:
|
||||
printk(KERN_ALERT "Unable to handle kernel paging request for "
|
||||
|
@@ -20,6 +20,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
|
||||
pte_t *ptep, unsigned long trap, unsigned long flags,
|
||||
int ssize, int subpg_prot)
|
||||
{
|
||||
real_pte_t rpte;
|
||||
unsigned long hpte_group;
|
||||
unsigned long rflags, pa;
|
||||
unsigned long old_pte, new_pte;
|
||||
@@ -54,6 +55,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
|
||||
* need to add in 0x1 if it's a read-only user page
|
||||
*/
|
||||
rflags = htab_convert_pte_flags(new_pte);
|
||||
rpte = __real_pte(__pte(old_pte), ptep);
|
||||
|
||||
if (cpu_has_feature(CPU_FTR_NOEXECUTE) &&
|
||||
!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
|
||||
@@ -64,13 +66,10 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
|
||||
/*
|
||||
* There MIGHT be an HPTE for this pte
|
||||
*/
|
||||
hash = hpt_hash(vpn, shift, ssize);
|
||||
if (old_pte & H_PAGE_F_SECOND)
|
||||
hash = ~hash;
|
||||
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
|
||||
slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT;
|
||||
unsigned long gslot = pte_get_hash_gslot(vpn, shift, ssize,
|
||||
rpte, 0);
|
||||
|
||||
if (mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_4K,
|
||||
if (mmu_hash_ops.hpte_updatepp(gslot, rflags, vpn, MMU_PAGE_4K,
|
||||
MMU_PAGE_4K, ssize, flags) == -1)
|
||||
old_pte &= ~_PAGE_HPTEFLAGS;
|
||||
}
|
||||
@@ -118,8 +117,7 @@ repeat:
|
||||
return -1;
|
||||
}
|
||||
new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
|
||||
new_pte |= (slot << H_PAGE_F_GIX_SHIFT) &
|
||||
(H_PAGE_F_SECOND | H_PAGE_F_GIX);
|
||||
new_pte |= pte_set_hidx(ptep, rpte, 0, slot);
|
||||
}
|
||||
*ptep = __pte(new_pte & ~H_PAGE_BUSY);
|
||||
return 0;
|
||||
|
@@ -15,34 +15,22 @@
|
||||
#include <linux/mm.h>
|
||||
#include <asm/machdep.h>
|
||||
#include <asm/mmu.h>
|
||||
|
||||
/*
|
||||
* Return true, if the entry has a slot value which
|
||||
* the software considers as invalid.
|
||||
*/
|
||||
static inline bool hpte_soft_invalid(unsigned long hidx)
|
||||
{
|
||||
return ((hidx & 0xfUL) == 0xfUL);
|
||||
}
|
||||
|
||||
/*
|
||||
* index from 0 - 15
|
||||
*/
|
||||
bool __rpte_sub_valid(real_pte_t rpte, unsigned long index)
|
||||
{
|
||||
unsigned long g_idx;
|
||||
unsigned long ptev = pte_val(rpte.pte);
|
||||
|
||||
g_idx = (ptev & H_PAGE_COMBO_VALID) >> H_PAGE_F_GIX_SHIFT;
|
||||
index = index >> 2;
|
||||
if (g_idx & (0x1 << index))
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
/*
|
||||
* index from 0 - 15
|
||||
*/
|
||||
static unsigned long mark_subptegroup_valid(unsigned long ptev, unsigned long index)
|
||||
{
|
||||
unsigned long g_idx;
|
||||
|
||||
if (!(ptev & H_PAGE_COMBO))
|
||||
return ptev;
|
||||
index = index >> 2;
|
||||
g_idx = 0x1 << index;
|
||||
|
||||
return ptev | (g_idx << H_PAGE_F_GIX_SHIFT);
|
||||
return !(hpte_soft_invalid(__rpte_to_hidx(rpte, index)));
|
||||
}
|
||||
|
||||
int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
|
||||
@@ -50,12 +38,11 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
|
||||
int ssize, int subpg_prot)
|
||||
{
|
||||
real_pte_t rpte;
|
||||
unsigned long *hidxp;
|
||||
unsigned long hpte_group;
|
||||
unsigned int subpg_index;
|
||||
unsigned long rflags, pa, hidx;
|
||||
unsigned long rflags, pa;
|
||||
unsigned long old_pte, new_pte, subpg_pte;
|
||||
unsigned long vpn, hash, slot;
|
||||
unsigned long vpn, hash, slot, gslot;
|
||||
unsigned long shift = mmu_psize_defs[MMU_PAGE_4K].shift;
|
||||
|
||||
/*
|
||||
@@ -116,8 +103,8 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
|
||||
* On hash insert failure we use old pte value and we don't
|
||||
* want slot information there if we have a insert failure.
|
||||
*/
|
||||
old_pte &= ~(H_PAGE_HASHPTE | H_PAGE_F_GIX | H_PAGE_F_SECOND);
|
||||
new_pte &= ~(H_PAGE_HASHPTE | H_PAGE_F_GIX | H_PAGE_F_SECOND);
|
||||
old_pte &= ~H_PAGE_HASHPTE;
|
||||
new_pte &= ~H_PAGE_HASHPTE;
|
||||
goto htab_insert_hpte;
|
||||
}
|
||||
/*
|
||||
@@ -126,18 +113,14 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
|
||||
if (__rpte_sub_valid(rpte, subpg_index)) {
|
||||
int ret;
|
||||
|
||||
hash = hpt_hash(vpn, shift, ssize);
|
||||
hidx = __rpte_to_hidx(rpte, subpg_index);
|
||||
if (hidx & _PTEIDX_SECONDARY)
|
||||
hash = ~hash;
|
||||
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
|
||||
slot += hidx & _PTEIDX_GROUP_IX;
|
||||
|
||||
ret = mmu_hash_ops.hpte_updatepp(slot, rflags, vpn,
|
||||
gslot = pte_get_hash_gslot(vpn, shift, ssize, rpte,
|
||||
subpg_index);
|
||||
ret = mmu_hash_ops.hpte_updatepp(gslot, rflags, vpn,
|
||||
MMU_PAGE_4K, MMU_PAGE_4K,
|
||||
ssize, flags);
|
||||
|
||||
/*
|
||||
*if we failed because typically the HPTE wasn't really here
|
||||
* If we failed because typically the HPTE wasn't really here
|
||||
* we try an insertion.
|
||||
*/
|
||||
if (ret == -1)
|
||||
@@ -148,6 +131,14 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
|
||||
}
|
||||
|
||||
htab_insert_hpte:
|
||||
|
||||
/*
|
||||
* Initialize all hidx entries to invalid value, the first time
|
||||
* the PTE is about to allocate a 4K HPTE.
|
||||
*/
|
||||
if (!(old_pte & H_PAGE_COMBO))
|
||||
rpte.hidx = INVALID_RPTE_HIDX;
|
||||
|
||||
/*
|
||||
* handle H_PAGE_4K_PFN case
|
||||
*/
|
||||
@@ -172,15 +163,39 @@ repeat:
|
||||
* Primary is full, try the secondary
|
||||
*/
|
||||
if (unlikely(slot == -1)) {
|
||||
bool soft_invalid;
|
||||
|
||||
hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
|
||||
slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa,
|
||||
rflags, HPTE_V_SECONDARY,
|
||||
MMU_PAGE_4K, MMU_PAGE_4K,
|
||||
ssize);
|
||||
if (slot == -1) {
|
||||
if (mftb() & 0x1)
|
||||
|
||||
soft_invalid = hpte_soft_invalid(slot);
|
||||
if (unlikely(soft_invalid)) {
|
||||
/*
|
||||
* We got a valid slot from a hardware point of view.
|
||||
* but we cannot use it, because we use this special
|
||||
* value; as defined by hpte_soft_invalid(), to track
|
||||
* invalid slots. We cannot use it. So invalidate it.
|
||||
*/
|
||||
gslot = slot & _PTEIDX_GROUP_IX;
|
||||
mmu_hash_ops.hpte_invalidate(hpte_group + gslot, vpn,
|
||||
MMU_PAGE_4K, MMU_PAGE_4K,
|
||||
ssize, 0);
|
||||
}
|
||||
|
||||
if (unlikely(slot == -1 || soft_invalid)) {
|
||||
/*
|
||||
* For soft invalid slot, let's ensure that we release a
|
||||
* slot from the primary, with the hope that we will
|
||||
* acquire that slot next time we try. This will ensure
|
||||
* that we do not get the same soft-invalid slot.
|
||||
*/
|
||||
if (soft_invalid || (mftb() & 0x1))
|
||||
hpte_group = ((hash & htab_hash_mask) *
|
||||
HPTES_PER_GROUP) & ~0x7UL;
|
||||
|
||||
mmu_hash_ops.hpte_remove(hpte_group);
|
||||
/*
|
||||
* FIXME!! Should be try the group from which we removed ?
|
||||
@@ -198,21 +213,10 @@ repeat:
|
||||
MMU_PAGE_4K, MMU_PAGE_4K, old_pte);
|
||||
return -1;
|
||||
}
|
||||
/*
|
||||
* Insert slot number & secondary bit in PTE second half,
|
||||
* clear H_PAGE_BUSY and set appropriate HPTE slot bit
|
||||
* Since we have H_PAGE_BUSY set on ptep, we can be sure
|
||||
* nobody is undating hidx.
|
||||
*/
|
||||
hidxp = (unsigned long *)(ptep + PTRS_PER_PTE);
|
||||
rpte.hidx &= ~(0xfUL << (subpg_index << 2));
|
||||
*hidxp = rpte.hidx | (slot << (subpg_index << 2));
|
||||
new_pte = mark_subptegroup_valid(new_pte, subpg_index);
|
||||
new_pte |= H_PAGE_HASHPTE;
|
||||
/*
|
||||
* check __real_pte for details on matching smp_rmb()
|
||||
*/
|
||||
smp_wmb();
|
||||
|
||||
new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot);
|
||||
new_pte |= H_PAGE_HASHPTE;
|
||||
|
||||
*ptep = __pte(new_pte & ~H_PAGE_BUSY);
|
||||
return 0;
|
||||
}
|
||||
@@ -221,6 +225,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
|
||||
unsigned long vsid, pte_t *ptep, unsigned long trap,
|
||||
unsigned long flags, int ssize)
|
||||
{
|
||||
real_pte_t rpte;
|
||||
unsigned long hpte_group;
|
||||
unsigned long rflags, pa;
|
||||
unsigned long old_pte, new_pte;
|
||||
@@ -257,6 +262,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
|
||||
} while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
|
||||
|
||||
rflags = htab_convert_pte_flags(new_pte);
|
||||
rpte = __real_pte(__pte(old_pte), ptep);
|
||||
|
||||
if (cpu_has_feature(CPU_FTR_NOEXECUTE) &&
|
||||
!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
|
||||
@@ -264,16 +270,13 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
|
||||
|
||||
vpn = hpt_vpn(ea, vsid, ssize);
|
||||
if (unlikely(old_pte & H_PAGE_HASHPTE)) {
|
||||
unsigned long gslot;
|
||||
|
||||
/*
|
||||
* There MIGHT be an HPTE for this pte
|
||||
*/
|
||||
hash = hpt_hash(vpn, shift, ssize);
|
||||
if (old_pte & H_PAGE_F_SECOND)
|
||||
hash = ~hash;
|
||||
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
|
||||
slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT;
|
||||
|
||||
if (mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_64K,
|
||||
gslot = pte_get_hash_gslot(vpn, shift, ssize, rpte, 0);
|
||||
if (mmu_hash_ops.hpte_updatepp(gslot, rflags, vpn, MMU_PAGE_64K,
|
||||
MMU_PAGE_64K, ssize,
|
||||
flags) == -1)
|
||||
old_pte &= ~_PAGE_HPTEFLAGS;
|
||||
@@ -322,9 +325,9 @@ repeat:
|
||||
MMU_PAGE_64K, MMU_PAGE_64K, old_pte);
|
||||
return -1;
|
||||
}
|
||||
|
||||
new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
|
||||
new_pte |= (slot << H_PAGE_F_GIX_SHIFT) &
|
||||
(H_PAGE_F_SECOND | H_PAGE_F_GIX);
|
||||
new_pte |= pte_set_hidx(ptep, rpte, 0, slot);
|
||||
}
|
||||
*ptep = __pte(new_pte & ~H_PAGE_BUSY);
|
||||
return 0;
|
||||
|
@@ -47,6 +47,103 @@
|
||||
|
||||
DEFINE_RAW_SPINLOCK(native_tlbie_lock);
|
||||
|
||||
static inline void tlbiel_hash_set_isa206(unsigned int set, unsigned int is)
|
||||
{
|
||||
unsigned long rb;
|
||||
|
||||
rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
|
||||
|
||||
asm volatile("tlbiel %0" : : "r" (rb));
|
||||
}
|
||||
|
||||
/*
|
||||
* tlbiel instruction for hash, set invalidation
|
||||
* i.e., r=1 and is=01 or is=10 or is=11
|
||||
*/
|
||||
static inline void tlbiel_hash_set_isa300(unsigned int set, unsigned int is,
|
||||
unsigned int pid,
|
||||
unsigned int ric, unsigned int prs)
|
||||
{
|
||||
unsigned long rb;
|
||||
unsigned long rs;
|
||||
unsigned int r = 0; /* hash format */
|
||||
|
||||
rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
|
||||
rs = ((unsigned long)pid << PPC_BITLSHIFT(31));
|
||||
|
||||
asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4)
|
||||
: : "r"(rb), "r"(rs), "i"(ric), "i"(prs), "r"(r)
|
||||
: "memory");
|
||||
}
|
||||
|
||||
|
||||
static void tlbiel_all_isa206(unsigned int num_sets, unsigned int is)
|
||||
{
|
||||
unsigned int set;
|
||||
|
||||
asm volatile("ptesync": : :"memory");
|
||||
|
||||
for (set = 0; set < num_sets; set++)
|
||||
tlbiel_hash_set_isa206(set, is);
|
||||
|
||||
asm volatile("ptesync": : :"memory");
|
||||
}
|
||||
|
||||
static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
|
||||
{
|
||||
unsigned int set;
|
||||
|
||||
asm volatile("ptesync": : :"memory");
|
||||
|
||||
/*
|
||||
* Flush the first set of the TLB, and any caching of partition table
|
||||
* entries. Then flush the remaining sets of the TLB. Hash mode uses
|
||||
* partition scoped TLB translations.
|
||||
*/
|
||||
tlbiel_hash_set_isa300(0, is, 0, 2, 0);
|
||||
for (set = 1; set < num_sets; set++)
|
||||
tlbiel_hash_set_isa300(set, is, 0, 0, 0);
|
||||
|
||||
/*
|
||||
* Now invalidate the process table cache.
|
||||
*
|
||||
* From ISA v3.0B p. 1078:
|
||||
* The following forms are invalid.
|
||||
* * PRS=1, R=0, and RIC!=2 (The only process-scoped
|
||||
* HPT caching is of the Process Table.)
|
||||
*/
|
||||
tlbiel_hash_set_isa300(0, is, 0, 2, 1);
|
||||
|
||||
asm volatile("ptesync": : :"memory");
|
||||
}
|
||||
|
||||
void hash__tlbiel_all(unsigned int action)
|
||||
{
|
||||
unsigned int is;
|
||||
|
||||
switch (action) {
|
||||
case TLB_INVAL_SCOPE_GLOBAL:
|
||||
is = 3;
|
||||
break;
|
||||
case TLB_INVAL_SCOPE_LPID:
|
||||
is = 2;
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
if (early_cpu_has_feature(CPU_FTR_ARCH_300))
|
||||
tlbiel_all_isa300(POWER9_TLB_SETS_HASH, is);
|
||||
else if (early_cpu_has_feature(CPU_FTR_ARCH_207S))
|
||||
tlbiel_all_isa206(POWER8_TLB_SETS, is);
|
||||
else if (early_cpu_has_feature(CPU_FTR_ARCH_206))
|
||||
tlbiel_all_isa206(POWER7_TLB_SETS, is);
|
||||
else
|
||||
WARN(1, "%s called on pre-POWER7 CPU\n", __func__);
|
||||
|
||||
asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
|
||||
}
|
||||
|
||||
static inline unsigned long ___tlbie(unsigned long vpn, int psize,
|
||||
int apsize, int ssize)
|
||||
{
|
||||
|
@@ -36,6 +36,7 @@
|
||||
#include <linux/memblock.h>
|
||||
#include <linux/context_tracking.h>
|
||||
#include <linux/libfdt.h>
|
||||
#include <linux/pkeys.h>
|
||||
|
||||
#include <asm/debugfs.h>
|
||||
#include <asm/processor.h>
|
||||
@@ -232,6 +233,7 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags)
|
||||
*/
|
||||
rflags |= HPTE_R_M;
|
||||
|
||||
rflags |= pte_to_hpte_pkey_bits(pteflags);
|
||||
return rflags;
|
||||
}
|
||||
|
||||
@@ -606,7 +608,7 @@ static void init_hpte_page_sizes(void)
|
||||
continue; /* not a supported page size */
|
||||
for (ap = bp; ap < MMU_PAGE_COUNT; ++ap) {
|
||||
penc = mmu_psize_defs[bp].penc[ap];
|
||||
if (penc == -1)
|
||||
if (penc == -1 || !mmu_psize_defs[ap].shift)
|
||||
continue;
|
||||
shift = mmu_psize_defs[ap].shift - LP_SHIFT;
|
||||
if (shift <= 0)
|
||||
@@ -772,7 +774,7 @@ void resize_hpt_for_hotplug(unsigned long new_mem_size)
|
||||
int rc;
|
||||
|
||||
rc = mmu_hash_ops.resize_hpt(target_hpt_shift);
|
||||
if (rc)
|
||||
if (rc && (rc != -ENODEV))
|
||||
printk(KERN_WARNING
|
||||
"Unable to resize hash page table to target order %d: %d\n",
|
||||
target_hpt_shift, rc);
|
||||
@@ -979,8 +981,9 @@ void __init hash__early_init_devtree(void)
|
||||
|
||||
void __init hash__early_init_mmu(void)
|
||||
{
|
||||
#ifndef CONFIG_PPC_64K_PAGES
|
||||
/*
|
||||
* We have code in __hash_page_64K() and elsewhere, which assumes it can
|
||||
* We have code in __hash_page_4K() and elsewhere, which assumes it can
|
||||
* do the following:
|
||||
* new_pte |= (slot << H_PAGE_F_GIX_SHIFT) & (H_PAGE_F_SECOND | H_PAGE_F_GIX);
|
||||
*
|
||||
@@ -991,6 +994,7 @@ void __init hash__early_init_mmu(void)
|
||||
* with a BUILD_BUG_ON().
|
||||
*/
|
||||
BUILD_BUG_ON(H_PAGE_F_SECOND != (1ul << (H_PAGE_F_GIX_SHIFT + 3)));
|
||||
#endif /* CONFIG_PPC_64K_PAGES */
|
||||
|
||||
htab_init_page_sizes();
|
||||
|
||||
@@ -1049,6 +1053,10 @@ void __init hash__early_init_mmu(void)
|
||||
pr_info("Initializing hash mmu with SLB\n");
|
||||
/* Initialize SLB management */
|
||||
slb_initialize();
|
||||
|
||||
if (cpu_has_feature(CPU_FTR_ARCH_206)
|
||||
&& cpu_has_feature(CPU_FTR_HVMODE))
|
||||
tlbiel_all();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
@@ -1068,6 +1076,10 @@ void hash__early_init_mmu_secondary(void)
|
||||
}
|
||||
/* Initialize SLB */
|
||||
slb_initialize();
|
||||
|
||||
if (cpu_has_feature(CPU_FTR_ARCH_206)
|
||||
&& cpu_has_feature(CPU_FTR_HVMODE))
|
||||
tlbiel_all();
|
||||
}
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
@@ -1569,6 +1581,30 @@ out_exit:
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PPC_MEM_KEYS
|
||||
/*
|
||||
* Return the protection key associated with the given address and the
|
||||
* mm_struct.
|
||||
*/
|
||||
u16 get_mm_addr_key(struct mm_struct *mm, unsigned long address)
|
||||
{
|
||||
pte_t *ptep;
|
||||
u16 pkey = 0;
|
||||
unsigned long flags;
|
||||
|
||||
if (!mm || !mm->pgd)
|
||||
return 0;
|
||||
|
||||
local_irq_save(flags);
|
||||
ptep = find_linux_pte(mm->pgd, address, NULL, NULL);
|
||||
if (ptep)
|
||||
pkey = pte_to_pkey_bits(pte_val(READ_ONCE(*ptep)));
|
||||
local_irq_restore(flags);
|
||||
|
||||
return pkey;
|
||||
}
|
||||
#endif /* CONFIG_PPC_MEM_KEYS */
|
||||
|
||||
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
|
||||
static inline void tm_flush_hash_page(int local)
|
||||
{
|
||||
@@ -1592,29 +1628,42 @@ static inline void tm_flush_hash_page(int local)
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Return the global hash slot, corresponding to the given PTE, which contains
|
||||
* the HPTE.
|
||||
*/
|
||||
unsigned long pte_get_hash_gslot(unsigned long vpn, unsigned long shift,
|
||||
int ssize, real_pte_t rpte, unsigned int subpg_index)
|
||||
{
|
||||
unsigned long hash, gslot, hidx;
|
||||
|
||||
hash = hpt_hash(vpn, shift, ssize);
|
||||
hidx = __rpte_to_hidx(rpte, subpg_index);
|
||||
if (hidx & _PTEIDX_SECONDARY)
|
||||
hash = ~hash;
|
||||
gslot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
|
||||
gslot += hidx & _PTEIDX_GROUP_IX;
|
||||
return gslot;
|
||||
}
|
||||
|
||||
/* WARNING: This is called from hash_low_64.S, if you change this prototype,
|
||||
* do not forget to update the assembly call site !
|
||||
*/
|
||||
void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize,
|
||||
unsigned long flags)
|
||||
{
|
||||
unsigned long hash, index, shift, hidx, slot;
|
||||
unsigned long index, shift, gslot;
|
||||
int local = flags & HPTE_LOCAL_UPDATE;
|
||||
|
||||
DBG_LOW("flush_hash_page(vpn=%016lx)\n", vpn);
|
||||
pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) {
|
||||
hash = hpt_hash(vpn, shift, ssize);
|
||||
hidx = __rpte_to_hidx(pte, index);
|
||||
if (hidx & _PTEIDX_SECONDARY)
|
||||
hash = ~hash;
|
||||
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
|
||||
slot += hidx & _PTEIDX_GROUP_IX;
|
||||
DBG_LOW(" sub %ld: hash=%lx, hidx=%lx\n", index, slot, hidx);
|
||||
gslot = pte_get_hash_gslot(vpn, shift, ssize, pte, index);
|
||||
DBG_LOW(" sub %ld: gslot=%lx\n", index, gslot);
|
||||
/*
|
||||
* We use same base page size and actual psize, because we don't
|
||||
* use these functions for hugepage
|
||||
*/
|
||||
mmu_hash_ops.hpte_invalidate(slot, vpn, psize, psize,
|
||||
mmu_hash_ops.hpte_invalidate(gslot, vpn, psize, psize,
|
||||
ssize, local);
|
||||
} pte_iterate_hashed_end();
|
||||
|
||||
@@ -1825,16 +1874,24 @@ void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base,
|
||||
*/
|
||||
BUG_ON(first_memblock_base != 0);
|
||||
|
||||
/* On LPAR systems, the first entry is our RMA region,
|
||||
* non-LPAR 64-bit hash MMU systems don't have a limitation
|
||||
* on real mode access, but using the first entry works well
|
||||
* enough. We also clamp it to 1G to avoid some funky things
|
||||
* such as RTAS bugs etc...
|
||||
/*
|
||||
* On virtualized systems the first entry is our RMA region aka VRMA,
|
||||
* non-virtualized 64-bit hash MMU systems don't have a limitation
|
||||
* on real mode access.
|
||||
*
|
||||
* For guests on platforms before POWER9, we clamp the it limit to 1G
|
||||
* to avoid some funky things such as RTAS bugs etc...
|
||||
*/
|
||||
ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000);
|
||||
if (!early_cpu_has_feature(CPU_FTR_HVMODE)) {
|
||||
ppc64_rma_size = first_memblock_size;
|
||||
if (!early_cpu_has_feature(CPU_FTR_ARCH_300))
|
||||
ppc64_rma_size = min_t(u64, ppc64_rma_size, 0x40000000);
|
||||
|
||||
/* Finally limit subsequent allocations */
|
||||
memblock_set_current_limit(ppc64_rma_size);
|
||||
/* Finally limit subsequent allocations */
|
||||
memblock_set_current_limit(ppc64_rma_size);
|
||||
} else {
|
||||
ppc64_rma_size = ULONG_MAX;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
|
@@ -23,6 +23,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
|
||||
pte_t *ptep, unsigned long trap, unsigned long flags,
|
||||
int ssize, unsigned int shift, unsigned int mmu_psize)
|
||||
{
|
||||
real_pte_t rpte;
|
||||
unsigned long vpn;
|
||||
unsigned long old_pte, new_pte;
|
||||
unsigned long rflags, pa, sz;
|
||||
@@ -62,6 +63,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
|
||||
} while(!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
|
||||
|
||||
rflags = htab_convert_pte_flags(new_pte);
|
||||
rpte = __real_pte(__pte(old_pte), ptep);
|
||||
|
||||
sz = ((1UL) << shift);
|
||||
if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
|
||||
@@ -72,15 +74,10 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
|
||||
/* Check if pte already has an hpte (case 2) */
|
||||
if (unlikely(old_pte & H_PAGE_HASHPTE)) {
|
||||
/* There MIGHT be an HPTE for this pte */
|
||||
unsigned long hash, slot;
|
||||
unsigned long gslot;
|
||||
|
||||
hash = hpt_hash(vpn, shift, ssize);
|
||||
if (old_pte & H_PAGE_F_SECOND)
|
||||
hash = ~hash;
|
||||
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
|
||||
slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT;
|
||||
|
||||
if (mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, mmu_psize,
|
||||
gslot = pte_get_hash_gslot(vpn, shift, ssize, rpte, 0);
|
||||
if (mmu_hash_ops.hpte_updatepp(gslot, rflags, vpn, mmu_psize,
|
||||
mmu_psize, ssize, flags) == -1)
|
||||
old_pte &= ~_PAGE_HPTEFLAGS;
|
||||
}
|
||||
@@ -107,8 +104,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
|
||||
return -1;
|
||||
}
|
||||
|
||||
new_pte |= (slot << H_PAGE_F_GIX_SHIFT) &
|
||||
(H_PAGE_F_SECOND | H_PAGE_F_GIX);
|
||||
new_pte |= pte_set_hidx(ptep, rpte, 0, slot);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@@ -96,7 +96,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
|
||||
*hpdp = __hugepd(__pa(new) |
|
||||
(shift_to_mmu_psize(pshift) << 2));
|
||||
#elif defined(CONFIG_PPC_8xx)
|
||||
*hpdp = __hugepd(__pa(new) |
|
||||
*hpdp = __hugepd(__pa(new) | _PMD_USER |
|
||||
(pshift == PAGE_SHIFT_8M ? _PMD_PAGE_8M :
|
||||
_PMD_PAGE_512K) | _PMD_PRESENT);
|
||||
#else
|
||||
@@ -752,7 +752,7 @@ void flush_dcache_icache_hugepage(struct page *page)
|
||||
* So long as we atomically load page table pointers we are safe against teardown,
|
||||
* we can follow the address down to the the page and take a ref on it.
|
||||
* This function need to be called with interrupts disabled. We use this variant
|
||||
* when we have MSR[EE] = 0 but the paca->soft_enabled = 1
|
||||
* when we have MSR[EE] = 0 but the paca->irq_soft_mask = IRQS_ENABLED
|
||||
*/
|
||||
pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
|
||||
bool *is_thp, unsigned *hpage_shift)
|
||||
@@ -855,9 +855,7 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
|
||||
|
||||
pte = READ_ONCE(*ptep);
|
||||
|
||||
if (!pte_present(pte) || !pte_read(pte))
|
||||
return 0;
|
||||
if (write && !pte_write(pte))
|
||||
if (!pte_access_permitted(pte, write))
|
||||
return 0;
|
||||
|
||||
/* hugepages are never "special" */
|
||||
|
@@ -183,7 +183,8 @@ static __meminit void vmemmap_list_populate(unsigned long phys,
|
||||
vmemmap_list = vmem_back;
|
||||
}
|
||||
|
||||
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
|
||||
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
|
||||
struct vmem_altmap *altmap)
|
||||
{
|
||||
unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
|
||||
|
||||
@@ -193,17 +194,16 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
|
||||
pr_debug("vmemmap_populate %lx..%lx, node %d\n", start, end, node);
|
||||
|
||||
for (; start < end; start += page_size) {
|
||||
struct vmem_altmap *altmap;
|
||||
void *p;
|
||||
int rc;
|
||||
|
||||
if (vmemmap_populated(start, page_size))
|
||||
continue;
|
||||
|
||||
/* altmap lookups only work at section boundaries */
|
||||
altmap = to_vmem_altmap(SECTION_ALIGN_DOWN(start));
|
||||
|
||||
p = __vmemmap_alloc_block_buf(page_size, node, altmap);
|
||||
if (altmap)
|
||||
p = altmap_alloc_block_buf(page_size, altmap);
|
||||
else
|
||||
p = vmemmap_alloc_block_buf(page_size, node);
|
||||
if (!p)
|
||||
return -ENOMEM;
|
||||
|
||||
@@ -214,9 +214,8 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
|
||||
|
||||
rc = vmemmap_create_mapping(start, page_size, __pa(p));
|
||||
if (rc < 0) {
|
||||
pr_warning(
|
||||
"vmemmap_populate: Unable to create vmemmap mapping: %d\n",
|
||||
rc);
|
||||
pr_warn("%s: Unable to create vmemmap mapping: %d\n",
|
||||
__func__, rc);
|
||||
return -EFAULT;
|
||||
}
|
||||
}
|
||||
@@ -257,7 +256,8 @@ static unsigned long vmemmap_list_free(unsigned long start)
|
||||
return vmem_back->phys;
|
||||
}
|
||||
|
||||
void __ref vmemmap_free(unsigned long start, unsigned long end)
|
||||
void __ref vmemmap_free(unsigned long start, unsigned long end,
|
||||
struct vmem_altmap *altmap)
|
||||
{
|
||||
unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
|
||||
unsigned long page_order = get_order(page_size);
|
||||
@@ -268,7 +268,6 @@ void __ref vmemmap_free(unsigned long start, unsigned long end)
|
||||
|
||||
for (; start < end; start += page_size) {
|
||||
unsigned long nr_pages, addr;
|
||||
struct vmem_altmap *altmap;
|
||||
struct page *section_base;
|
||||
struct page *page;
|
||||
|
||||
@@ -288,7 +287,6 @@ void __ref vmemmap_free(unsigned long start, unsigned long end)
|
||||
section_base = pfn_to_page(vmemmap_section_start(start));
|
||||
nr_pages = 1 << page_order;
|
||||
|
||||
altmap = to_vmem_altmap((unsigned long) section_base);
|
||||
if (altmap) {
|
||||
vmem_altmap_free(altmap, nr_pages);
|
||||
} else if (PageReserved(page)) {
|
||||
|
@@ -127,7 +127,8 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end)
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
|
||||
int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
|
||||
bool want_memblock)
|
||||
{
|
||||
unsigned long start_pfn = start >> PAGE_SHIFT;
|
||||
unsigned long nr_pages = size >> PAGE_SHIFT;
|
||||
@@ -138,21 +139,19 @@ int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
|
||||
start = (unsigned long)__va(start);
|
||||
rc = create_section_mapping(start, start + size);
|
||||
if (rc) {
|
||||
pr_warning(
|
||||
"Unable to create mapping for hot added memory 0x%llx..0x%llx: %d\n",
|
||||
pr_warn("Unable to create mapping for hot added memory 0x%llx..0x%llx: %d\n",
|
||||
start, start + size, rc);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
return __add_pages(nid, start_pfn, nr_pages, want_memblock);
|
||||
return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTREMOVE
|
||||
int arch_remove_memory(u64 start, u64 size)
|
||||
int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
|
||||
{
|
||||
unsigned long start_pfn = start >> PAGE_SHIFT;
|
||||
unsigned long nr_pages = size >> PAGE_SHIFT;
|
||||
struct vmem_altmap *altmap;
|
||||
struct page *page;
|
||||
int ret;
|
||||
|
||||
@@ -161,11 +160,10 @@ int arch_remove_memory(u64 start, u64 size)
|
||||
* when querying the zone.
|
||||
*/
|
||||
page = pfn_to_page(start_pfn);
|
||||
altmap = to_vmem_altmap((unsigned long) page);
|
||||
if (altmap)
|
||||
page += vmem_altmap_offset(altmap);
|
||||
|
||||
ret = __remove_pages(page_zone(page), start_pfn, nr_pages);
|
||||
ret = __remove_pages(page_zone(page), start_pfn, nr_pages, altmap);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
|
@@ -16,6 +16,7 @@
|
||||
#include <linux/string.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/pkeys.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/idr.h>
|
||||
#include <linux/export.h>
|
||||
@@ -118,6 +119,7 @@ static int hash__init_new_context(struct mm_struct *mm)
|
||||
|
||||
subpage_prot_init_new_context(mm);
|
||||
|
||||
pkey_mm_init(mm);
|
||||
return index;
|
||||
}
|
||||
|
||||
|
@@ -40,6 +40,7 @@
|
||||
#include <asm/hvcall.h>
|
||||
#include <asm/setup.h>
|
||||
#include <asm/vdso.h>
|
||||
#include <asm/drmem.h>
|
||||
|
||||
static int numa_enabled = 1;
|
||||
|
||||
@@ -179,21 +180,6 @@ static const __be32 *of_get_associativity(struct device_node *dev)
|
||||
return of_get_property(dev, "ibm,associativity", NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the property linux,drconf-usable-memory if
|
||||
* it exists (the property exists only in kexec/kdump kernels,
|
||||
* added by kexec-tools)
|
||||
*/
|
||||
static const __be32 *of_get_usable_memory(struct device_node *memory)
|
||||
{
|
||||
const __be32 *prop;
|
||||
u32 len;
|
||||
prop = of_get_property(memory, "linux,drconf-usable-memory", &len);
|
||||
if (!prop || len < sizeof(unsigned int))
|
||||
return NULL;
|
||||
return prop;
|
||||
}
|
||||
|
||||
int __node_distance(int a, int b)
|
||||
{
|
||||
int i;
|
||||
@@ -387,69 +373,6 @@ static unsigned long read_n_cells(int n, const __be32 **buf)
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* Read the next memblock list entry from the ibm,dynamic-memory property
|
||||
* and return the information in the provided of_drconf_cell structure.
|
||||
*/
|
||||
static void read_drconf_cell(struct of_drconf_cell *drmem, const __be32 **cellp)
|
||||
{
|
||||
const __be32 *cp;
|
||||
|
||||
drmem->base_addr = read_n_cells(n_mem_addr_cells, cellp);
|
||||
|
||||
cp = *cellp;
|
||||
drmem->drc_index = of_read_number(cp, 1);
|
||||
drmem->reserved = of_read_number(&cp[1], 1);
|
||||
drmem->aa_index = of_read_number(&cp[2], 1);
|
||||
drmem->flags = of_read_number(&cp[3], 1);
|
||||
|
||||
*cellp = cp + 4;
|
||||
}
|
||||
|
||||
/*
|
||||
* Retrieve and validate the ibm,dynamic-memory property of the device tree.
|
||||
*
|
||||
* The layout of the ibm,dynamic-memory property is a number N of memblock
|
||||
* list entries followed by N memblock list entries. Each memblock list entry
|
||||
* contains information as laid out in the of_drconf_cell struct above.
|
||||
*/
|
||||
static int of_get_drconf_memory(struct device_node *memory, const __be32 **dm)
|
||||
{
|
||||
const __be32 *prop;
|
||||
u32 len, entries;
|
||||
|
||||
prop = of_get_property(memory, "ibm,dynamic-memory", &len);
|
||||
if (!prop || len < sizeof(unsigned int))
|
||||
return 0;
|
||||
|
||||
entries = of_read_number(prop++, 1);
|
||||
|
||||
/* Now that we know the number of entries, revalidate the size
|
||||
* of the property read in to ensure we have everything
|
||||
*/
|
||||
if (len < (entries * (n_mem_addr_cells + 4) + 1) * sizeof(unsigned int))
|
||||
return 0;
|
||||
|
||||
*dm = prop;
|
||||
return entries;
|
||||
}
|
||||
|
||||
/*
|
||||
* Retrieve and validate the ibm,lmb-size property for drconf memory
|
||||
* from the device tree.
|
||||
*/
|
||||
static u64 of_get_lmb_size(struct device_node *memory)
|
||||
{
|
||||
const __be32 *prop;
|
||||
u32 len;
|
||||
|
||||
prop = of_get_property(memory, "ibm,lmb-size", &len);
|
||||
if (!prop || len < sizeof(unsigned int))
|
||||
return 0;
|
||||
|
||||
return read_n_cells(n_mem_size_cells, &prop);
|
||||
}
|
||||
|
||||
struct assoc_arrays {
|
||||
u32 n_arrays;
|
||||
u32 array_sz;
|
||||
@@ -466,19 +389,27 @@ struct assoc_arrays {
|
||||
* indicating the size of each associativity array, followed by a list
|
||||
* of N associativity arrays.
|
||||
*/
|
||||
static int of_get_assoc_arrays(struct device_node *memory,
|
||||
struct assoc_arrays *aa)
|
||||
static int of_get_assoc_arrays(struct assoc_arrays *aa)
|
||||
{
|
||||
struct device_node *memory;
|
||||
const __be32 *prop;
|
||||
u32 len;
|
||||
|
||||
prop = of_get_property(memory, "ibm,associativity-lookup-arrays", &len);
|
||||
if (!prop || len < 2 * sizeof(unsigned int))
|
||||
memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
|
||||
if (!memory)
|
||||
return -1;
|
||||
|
||||
prop = of_get_property(memory, "ibm,associativity-lookup-arrays", &len);
|
||||
if (!prop || len < 2 * sizeof(unsigned int)) {
|
||||
of_node_put(memory);
|
||||
return -1;
|
||||
}
|
||||
|
||||
aa->n_arrays = of_read_number(prop++, 1);
|
||||
aa->array_sz = of_read_number(prop++, 1);
|
||||
|
||||
of_node_put(memory);
|
||||
|
||||
/* Now that we know the number of arrays and size of each array,
|
||||
* revalidate the size of the property read in.
|
||||
*/
|
||||
@@ -493,26 +424,30 @@ static int of_get_assoc_arrays(struct device_node *memory,
|
||||
* This is like of_node_to_nid_single() for memory represented in the
|
||||
* ibm,dynamic-reconfiguration-memory node.
|
||||
*/
|
||||
static int of_drconf_to_nid_single(struct of_drconf_cell *drmem,
|
||||
struct assoc_arrays *aa)
|
||||
static int of_drconf_to_nid_single(struct drmem_lmb *lmb)
|
||||
{
|
||||
struct assoc_arrays aa = { .arrays = NULL };
|
||||
int default_nid = 0;
|
||||
int nid = default_nid;
|
||||
int index;
|
||||
int rc, index;
|
||||
|
||||
if (min_common_depth > 0 && min_common_depth <= aa->array_sz &&
|
||||
!(drmem->flags & DRCONF_MEM_AI_INVALID) &&
|
||||
drmem->aa_index < aa->n_arrays) {
|
||||
index = drmem->aa_index * aa->array_sz + min_common_depth - 1;
|
||||
nid = of_read_number(&aa->arrays[index], 1);
|
||||
rc = of_get_assoc_arrays(&aa);
|
||||
if (rc)
|
||||
return default_nid;
|
||||
|
||||
if (min_common_depth > 0 && min_common_depth <= aa.array_sz &&
|
||||
!(lmb->flags & DRCONF_MEM_AI_INVALID) &&
|
||||
lmb->aa_index < aa.n_arrays) {
|
||||
index = lmb->aa_index * aa.array_sz + min_common_depth - 1;
|
||||
nid = of_read_number(&aa.arrays[index], 1);
|
||||
|
||||
if (nid == 0xffff || nid >= MAX_NUMNODES)
|
||||
nid = default_nid;
|
||||
|
||||
if (nid > 0) {
|
||||
index = drmem->aa_index * aa->array_sz;
|
||||
index = lmb->aa_index * aa.array_sz;
|
||||
initialize_distance_lookup_table(nid,
|
||||
&aa->arrays[index]);
|
||||
&aa.arrays[index]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -551,7 +486,7 @@ static int numa_setup_cpu(unsigned long lcpu)
|
||||
nid = of_node_to_nid_single(cpu);
|
||||
|
||||
out_present:
|
||||
if (nid < 0 || !node_online(nid))
|
||||
if (nid < 0 || !node_possible(nid))
|
||||
nid = first_online_node;
|
||||
|
||||
map_cpu_to_node(lcpu, nid);
|
||||
@@ -645,67 +580,48 @@ static inline int __init read_usm_ranges(const __be32 **usm)
|
||||
* Extract NUMA information from the ibm,dynamic-reconfiguration-memory
|
||||
* node. This assumes n_mem_{addr,size}_cells have been set.
|
||||
*/
|
||||
static void __init parse_drconf_memory(struct device_node *memory)
|
||||
static void __init numa_setup_drmem_lmb(struct drmem_lmb *lmb,
|
||||
const __be32 **usm)
|
||||
{
|
||||
const __be32 *uninitialized_var(dm), *usm;
|
||||
unsigned int n, rc, ranges, is_kexec_kdump = 0;
|
||||
unsigned long lmb_size, base, size, sz;
|
||||
unsigned int ranges, is_kexec_kdump = 0;
|
||||
unsigned long base, size, sz;
|
||||
int nid;
|
||||
struct assoc_arrays aa = { .arrays = NULL };
|
||||
|
||||
n = of_get_drconf_memory(memory, &dm);
|
||||
if (!n)
|
||||
/*
|
||||
* Skip this block if the reserved bit is set in flags (0x80)
|
||||
* or if the block is not assigned to this partition (0x8)
|
||||
*/
|
||||
if ((lmb->flags & DRCONF_MEM_RESERVED)
|
||||
|| !(lmb->flags & DRCONF_MEM_ASSIGNED))
|
||||
return;
|
||||
|
||||
lmb_size = of_get_lmb_size(memory);
|
||||
if (!lmb_size)
|
||||
return;
|
||||
|
||||
rc = of_get_assoc_arrays(memory, &aa);
|
||||
if (rc)
|
||||
return;
|
||||
|
||||
/* check if this is a kexec/kdump kernel */
|
||||
usm = of_get_usable_memory(memory);
|
||||
if (usm != NULL)
|
||||
if (*usm)
|
||||
is_kexec_kdump = 1;
|
||||
|
||||
for (; n != 0; --n) {
|
||||
struct of_drconf_cell drmem;
|
||||
base = lmb->base_addr;
|
||||
size = drmem_lmb_size();
|
||||
ranges = 1;
|
||||
|
||||
read_drconf_cell(&drmem, &dm);
|
||||
|
||||
/* skip this block if the reserved bit is set in flags (0x80)
|
||||
or if the block is not assigned to this partition (0x8) */
|
||||
if ((drmem.flags & DRCONF_MEM_RESERVED)
|
||||
|| !(drmem.flags & DRCONF_MEM_ASSIGNED))
|
||||
continue;
|
||||
|
||||
base = drmem.base_addr;
|
||||
size = lmb_size;
|
||||
ranges = 1;
|
||||
|
||||
if (is_kexec_kdump) {
|
||||
ranges = read_usm_ranges(&usm);
|
||||
if (!ranges) /* there are no (base, size) duple */
|
||||
continue;
|
||||
}
|
||||
do {
|
||||
if (is_kexec_kdump) {
|
||||
base = read_n_cells(n_mem_addr_cells, &usm);
|
||||
size = read_n_cells(n_mem_size_cells, &usm);
|
||||
}
|
||||
nid = of_drconf_to_nid_single(&drmem, &aa);
|
||||
fake_numa_create_new_node(
|
||||
((base + size) >> PAGE_SHIFT),
|
||||
&nid);
|
||||
node_set_online(nid);
|
||||
sz = numa_enforce_memory_limit(base, size);
|
||||
if (sz)
|
||||
memblock_set_node(base, sz,
|
||||
&memblock.memory, nid);
|
||||
} while (--ranges);
|
||||
if (is_kexec_kdump) {
|
||||
ranges = read_usm_ranges(usm);
|
||||
if (!ranges) /* there are no (base, size) duple */
|
||||
return;
|
||||
}
|
||||
|
||||
do {
|
||||
if (is_kexec_kdump) {
|
||||
base = read_n_cells(n_mem_addr_cells, usm);
|
||||
size = read_n_cells(n_mem_size_cells, usm);
|
||||
}
|
||||
|
||||
nid = of_drconf_to_nid_single(lmb);
|
||||
fake_numa_create_new_node(((base + size) >> PAGE_SHIFT),
|
||||
&nid);
|
||||
node_set_online(nid);
|
||||
sz = numa_enforce_memory_limit(base, size);
|
||||
if (sz)
|
||||
memblock_set_node(base, sz, &memblock.memory, nid);
|
||||
} while (--ranges);
|
||||
}
|
||||
|
||||
static int __init parse_numa_properties(void)
|
||||
@@ -800,8 +716,10 @@ new_range:
|
||||
* ibm,dynamic-reconfiguration-memory node.
|
||||
*/
|
||||
memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
|
||||
if (memory)
|
||||
parse_drconf_memory(memory);
|
||||
if (memory) {
|
||||
walk_drmem_lmbs(memory, numa_setup_drmem_lmb);
|
||||
of_node_put(memory);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -892,6 +810,32 @@ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
|
||||
NODE_DATA(nid)->node_spanned_pages = spanned_pages;
|
||||
}
|
||||
|
||||
static void __init find_possible_nodes(void)
|
||||
{
|
||||
struct device_node *rtas;
|
||||
u32 numnodes, i;
|
||||
|
||||
if (min_common_depth <= 0)
|
||||
return;
|
||||
|
||||
rtas = of_find_node_by_path("/rtas");
|
||||
if (!rtas)
|
||||
return;
|
||||
|
||||
if (of_property_read_u32_index(rtas,
|
||||
"ibm,max-associativity-domains",
|
||||
min_common_depth, &numnodes))
|
||||
goto out;
|
||||
|
||||
for (i = 0; i < numnodes; i++) {
|
||||
if (!node_possible(i))
|
||||
node_set(i, node_possible_map);
|
||||
}
|
||||
|
||||
out:
|
||||
of_node_put(rtas);
|
||||
}
|
||||
|
||||
void __init initmem_init(void)
|
||||
{
|
||||
int nid, cpu;
|
||||
@@ -905,12 +849,15 @@ void __init initmem_init(void)
|
||||
memblock_dump_all();
|
||||
|
||||
/*
|
||||
* Reduce the possible NUMA nodes to the online NUMA nodes,
|
||||
* since we do not support node hotplug. This ensures that we
|
||||
* lower the maximum NUMA node ID to what is actually present.
|
||||
* Modify the set of possible NUMA nodes to reflect information
|
||||
* available about the set of online nodes, and the set of nodes
|
||||
* that we expect to make use of for this platform's affinity
|
||||
* calculations.
|
||||
*/
|
||||
nodes_and(node_possible_map, node_possible_map, node_online_map);
|
||||
|
||||
find_possible_nodes();
|
||||
|
||||
for_each_online_node(nid) {
|
||||
unsigned long start_pfn, end_pfn;
|
||||
|
||||
@@ -979,43 +926,26 @@ early_param("topology_updates", early_topology_updates);
|
||||
* memory represented in the device tree by the property
|
||||
* ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory.
|
||||
*/
|
||||
static int hot_add_drconf_scn_to_nid(struct device_node *memory,
|
||||
unsigned long scn_addr)
|
||||
static int hot_add_drconf_scn_to_nid(unsigned long scn_addr)
|
||||
{
|
||||
const __be32 *dm;
|
||||
unsigned int drconf_cell_cnt, rc;
|
||||
struct drmem_lmb *lmb;
|
||||
unsigned long lmb_size;
|
||||
struct assoc_arrays aa;
|
||||
int nid = -1;
|
||||
|
||||
drconf_cell_cnt = of_get_drconf_memory(memory, &dm);
|
||||
if (!drconf_cell_cnt)
|
||||
return -1;
|
||||
|
||||
lmb_size = of_get_lmb_size(memory);
|
||||
if (!lmb_size)
|
||||
return -1;
|
||||
|
||||
rc = of_get_assoc_arrays(memory, &aa);
|
||||
if (rc)
|
||||
return -1;
|
||||
|
||||
for (; drconf_cell_cnt != 0; --drconf_cell_cnt) {
|
||||
struct of_drconf_cell drmem;
|
||||
|
||||
read_drconf_cell(&drmem, &dm);
|
||||
lmb_size = drmem_lmb_size();
|
||||
|
||||
for_each_drmem_lmb(lmb) {
|
||||
/* skip this block if it is reserved or not assigned to
|
||||
* this partition */
|
||||
if ((drmem.flags & DRCONF_MEM_RESERVED)
|
||||
|| !(drmem.flags & DRCONF_MEM_ASSIGNED))
|
||||
if ((lmb->flags & DRCONF_MEM_RESERVED)
|
||||
|| !(lmb->flags & DRCONF_MEM_ASSIGNED))
|
||||
continue;
|
||||
|
||||
if ((scn_addr < drmem.base_addr)
|
||||
|| (scn_addr >= (drmem.base_addr + lmb_size)))
|
||||
if ((scn_addr < lmb->base_addr)
|
||||
|| (scn_addr >= (lmb->base_addr + lmb_size)))
|
||||
continue;
|
||||
|
||||
nid = of_drconf_to_nid_single(&drmem, &aa);
|
||||
nid = of_drconf_to_nid_single(lmb);
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -1080,7 +1010,7 @@ int hot_add_scn_to_nid(unsigned long scn_addr)
|
||||
|
||||
memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
|
||||
if (memory) {
|
||||
nid = hot_add_drconf_scn_to_nid(memory, scn_addr);
|
||||
nid = hot_add_drconf_scn_to_nid(scn_addr);
|
||||
of_node_put(memory);
|
||||
} else {
|
||||
nid = hot_add_node_scn_to_nid(scn_addr);
|
||||
@@ -1096,11 +1026,7 @@ static u64 hot_add_drconf_memory_max(void)
|
||||
{
|
||||
struct device_node *memory = NULL;
|
||||
struct device_node *dn = NULL;
|
||||
unsigned int drconf_cell_cnt = 0;
|
||||
u64 lmb_size = 0;
|
||||
const __be32 *dm = NULL;
|
||||
const __be64 *lrdr = NULL;
|
||||
struct of_drconf_cell drmem;
|
||||
|
||||
dn = of_find_node_by_path("/rtas");
|
||||
if (dn) {
|
||||
@@ -1112,14 +1038,8 @@ static u64 hot_add_drconf_memory_max(void)
|
||||
|
||||
memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
|
||||
if (memory) {
|
||||
drconf_cell_cnt = of_get_drconf_memory(memory, &dm);
|
||||
lmb_size = of_get_lmb_size(memory);
|
||||
|
||||
/* Advance to the last cell, each cell has 6 32 bit integers */
|
||||
dm += (drconf_cell_cnt - 1) * 6;
|
||||
read_drconf_cell(&drmem, &dm);
|
||||
of_node_put(memory);
|
||||
return drmem.base_addr + lmb_size;
|
||||
return drmem_lmb_memory_max();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@@ -1278,6 +1198,42 @@ static long vphn_get_associativity(unsigned long cpu,
|
||||
return rc;
|
||||
}
|
||||
|
||||
int find_and_online_cpu_nid(int cpu)
|
||||
{
|
||||
__be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
|
||||
int new_nid;
|
||||
|
||||
/* Use associativity from first thread for all siblings */
|
||||
vphn_get_associativity(cpu, associativity);
|
||||
new_nid = associativity_to_nid(associativity);
|
||||
if (new_nid < 0 || !node_possible(new_nid))
|
||||
new_nid = first_online_node;
|
||||
|
||||
if (NODE_DATA(new_nid) == NULL) {
|
||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||
/*
|
||||
* Need to ensure that NODE_DATA is initialized for a node from
|
||||
* available memory (see memblock_alloc_try_nid). If unable to
|
||||
* init the node, then default to nearest node that has memory
|
||||
* installed.
|
||||
*/
|
||||
if (try_online_node(new_nid))
|
||||
new_nid = first_online_node;
|
||||
#else
|
||||
/*
|
||||
* Default to using the nearest node that has memory installed.
|
||||
* Otherwise, it would be necessary to patch the kernel MM code
|
||||
* to deal with more memoryless-node error conditions.
|
||||
*/
|
||||
new_nid = first_online_node;
|
||||
#endif
|
||||
}
|
||||
|
||||
pr_debug("%s:%d cpu %d nid %d\n", __FUNCTION__, __LINE__,
|
||||
cpu, new_nid);
|
||||
return new_nid;
|
||||
}
|
||||
|
||||
/*
|
||||
* Update the CPU maps and sysfs entries for a single CPU when its NUMA
|
||||
* characteristics change. This function doesn't perform any locking and is
|
||||
@@ -1345,7 +1301,6 @@ int numa_update_cpu_topology(bool cpus_locked)
|
||||
{
|
||||
unsigned int cpu, sibling, changed = 0;
|
||||
struct topology_update_data *updates, *ud;
|
||||
__be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
|
||||
cpumask_t updated_cpus;
|
||||
struct device *dev;
|
||||
int weight, new_nid, i = 0;
|
||||
@@ -1383,11 +1338,7 @@ int numa_update_cpu_topology(bool cpus_locked)
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Use associativity from first thread for all siblings */
|
||||
vphn_get_associativity(cpu, associativity);
|
||||
new_nid = associativity_to_nid(associativity);
|
||||
if (new_nid < 0 || !node_online(new_nid))
|
||||
new_nid = first_online_node;
|
||||
new_nid = find_and_online_cpu_nid(cpu);
|
||||
|
||||
if (new_nid == numa_cpu_lookup_table[cpu]) {
|
||||
cpumask_andnot(&cpu_associativity_changes_mask,
|
||||
|
@@ -90,16 +90,19 @@ void serialize_against_pte_lookup(struct mm_struct *mm)
|
||||
* We use this to invalidate a pmdp entry before switching from a
|
||||
* hugepte to regular pmd entry.
|
||||
*/
|
||||
void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
|
||||
pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
|
||||
pmd_t *pmdp)
|
||||
{
|
||||
pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0);
|
||||
unsigned long old_pmd;
|
||||
|
||||
old_pmd = pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0);
|
||||
flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
|
||||
/*
|
||||
* This ensures that generic code that rely on IRQ disabling
|
||||
* to prevent a parallel THP split work as expected.
|
||||
*/
|
||||
serialize_against_pte_lookup(vma->vm_mm);
|
||||
return __pmd(old_pmd);
|
||||
}
|
||||
|
||||
static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot)
|
||||
|
@@ -296,28 +296,6 @@ pgtable_t hash__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
|
||||
return pgtable;
|
||||
}
|
||||
|
||||
void hash__pmdp_huge_split_prepare(struct vm_area_struct *vma,
|
||||
unsigned long address, pmd_t *pmdp)
|
||||
{
|
||||
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
|
||||
VM_BUG_ON(REGION_ID(address) != USER_REGION_ID);
|
||||
VM_BUG_ON(pmd_devmap(*pmdp));
|
||||
|
||||
/*
|
||||
* We can't mark the pmd none here, because that will cause a race
|
||||
* against exit_mmap. We need to continue mark pmd TRANS HUGE, while
|
||||
* we spilt, but at the same time we wan't rest of the ppc64 code
|
||||
* not to insert hash pte on this, because we will be modifying
|
||||
* the deposited pgtable in the caller of this function. Hence
|
||||
* clear the _PAGE_USER so that we move the fault handling to
|
||||
* higher level function and that will serialize against ptl.
|
||||
* We need to flush existing hash pte entries here even though,
|
||||
* the translation is still valid, because we will withdraw
|
||||
* pgtable_t after this.
|
||||
*/
|
||||
pmd_hugepage_update(vma->vm_mm, address, pmdp, 0, _PAGE_PRIVILEGED);
|
||||
}
|
||||
|
||||
/*
|
||||
* A linux hugepage PMD was changed and the corresponding hash table entries
|
||||
* neesd to be flushed.
|
||||
|
@@ -579,6 +579,9 @@ void __init radix__early_init_mmu(void)
|
||||
|
||||
radix_init_iamr();
|
||||
radix_init_pgtable();
|
||||
|
||||
if (cpu_has_feature(CPU_FTR_HVMODE))
|
||||
tlbiel_all();
|
||||
}
|
||||
|
||||
void radix__early_init_mmu_secondary(void)
|
||||
@@ -600,6 +603,9 @@ void radix__early_init_mmu_secondary(void)
|
||||
radix_init_amor();
|
||||
}
|
||||
radix_init_iamr();
|
||||
|
||||
if (cpu_has_feature(CPU_FTR_HVMODE))
|
||||
tlbiel_all();
|
||||
}
|
||||
|
||||
void radix__mmu_cleanup_all(void)
|
||||
@@ -622,22 +628,11 @@ void radix__setup_initial_memory_limit(phys_addr_t first_memblock_base,
|
||||
* physical on those processors
|
||||
*/
|
||||
BUG_ON(first_memblock_base != 0);
|
||||
|
||||
/*
|
||||
* We limit the allocation that depend on ppc64_rma_size
|
||||
* to first_memblock_size. We also clamp it to 1GB to
|
||||
* avoid some funky things such as RTAS bugs.
|
||||
*
|
||||
* On radix config we really don't have a limitation
|
||||
* on real mode access. But keeping it as above works
|
||||
* well enough.
|
||||
* Radix mode is not limited by RMA / VRMA addressing.
|
||||
*/
|
||||
ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000);
|
||||
/*
|
||||
* Finally limit subsequent allocations. We really don't want
|
||||
* to limit the memblock allocations to rma_size. FIXME!! should
|
||||
* we even limit at all ?
|
||||
*/
|
||||
memblock_set_current_limit(first_memblock_base + first_memblock_size);
|
||||
ppc64_rma_size = ULONG_MAX;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||
|
@@ -54,7 +54,8 @@ static inline int pte_looks_normal(pte_t pte)
|
||||
return 0;
|
||||
#else
|
||||
return (pte_val(pte) &
|
||||
(_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE | _PAGE_USER)) ==
|
||||
(_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE | _PAGE_USER |
|
||||
_PAGE_PRIVILEGED)) ==
|
||||
(_PAGE_PRESENT | _PAGE_USER);
|
||||
#endif
|
||||
}
|
||||
|
@@ -98,14 +98,7 @@ ioremap_prot(phys_addr_t addr, unsigned long size, unsigned long flags)
|
||||
|
||||
/* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */
|
||||
flags &= ~(_PAGE_USER | _PAGE_EXEC);
|
||||
|
||||
#ifdef _PAGE_BAP_SR
|
||||
/* _PAGE_USER contains _PAGE_BAP_SR on BookE using the new PTE format
|
||||
* which means that we just cleared supervisor access... oops ;-) This
|
||||
* restores it
|
||||
*/
|
||||
flags |= _PAGE_BAP_SR;
|
||||
#endif
|
||||
flags |= _PAGE_PRIVILEGED;
|
||||
|
||||
return __ioremap_caller(addr, size, flags, __builtin_return_address(0));
|
||||
}
|
||||
|
@@ -244,20 +244,8 @@ void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size,
|
||||
/*
|
||||
* Force kernel mapping.
|
||||
*/
|
||||
#if defined(CONFIG_PPC_BOOK3S_64)
|
||||
flags |= _PAGE_PRIVILEGED;
|
||||
#else
|
||||
flags &= ~_PAGE_USER;
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef _PAGE_BAP_SR
|
||||
/* _PAGE_USER contains _PAGE_BAP_SR on BookE using the new PTE format
|
||||
* which means that we just cleared supervisor access... oops ;-) This
|
||||
* restores it
|
||||
*/
|
||||
flags |= _PAGE_BAP_SR;
|
||||
#endif
|
||||
flags |= _PAGE_PRIVILEGED;
|
||||
|
||||
if (ppc_md.ioremap)
|
||||
return ppc_md.ioremap(addr, size, flags, caller);
|
||||
|
468
arch/powerpc/mm/pkeys.c
Normal file
468
arch/powerpc/mm/pkeys.c
Normal file
@@ -0,0 +1,468 @@
|
||||
// SPDX-License-Identifier: GPL-2.0+
|
||||
/*
|
||||
* PowerPC Memory Protection Keys management
|
||||
*
|
||||
* Copyright 2017, Ram Pai, IBM Corporation.
|
||||
*/
|
||||
|
||||
#include <asm/mman.h>
|
||||
#include <asm/setup.h>
|
||||
#include <linux/pkeys.h>
|
||||
#include <linux/of_device.h>
|
||||
|
||||
DEFINE_STATIC_KEY_TRUE(pkey_disabled);
|
||||
bool pkey_execute_disable_supported;
|
||||
int pkeys_total; /* Total pkeys as per device tree */
|
||||
bool pkeys_devtree_defined; /* pkey property exported by device tree */
|
||||
u32 initial_allocation_mask; /* Bits set for reserved keys */
|
||||
u64 pkey_amr_uamor_mask; /* Bits in AMR/UMOR not to be touched */
|
||||
u64 pkey_iamr_mask; /* Bits in AMR not to be touched */
|
||||
|
||||
#define AMR_BITS_PER_PKEY 2
|
||||
#define AMR_RD_BIT 0x1UL
|
||||
#define AMR_WR_BIT 0x2UL
|
||||
#define IAMR_EX_BIT 0x1UL
|
||||
#define PKEY_REG_BITS (sizeof(u64)*8)
|
||||
#define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey+1) * AMR_BITS_PER_PKEY))
|
||||
|
||||
static void scan_pkey_feature(void)
|
||||
{
|
||||
u32 vals[2];
|
||||
struct device_node *cpu;
|
||||
|
||||
cpu = of_find_node_by_type(NULL, "cpu");
|
||||
if (!cpu)
|
||||
return;
|
||||
|
||||
if (of_property_read_u32_array(cpu,
|
||||
"ibm,processor-storage-keys", vals, 2))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Since any pkey can be used for data or execute, we will just treat
|
||||
* all keys as equal and track them as one entity.
|
||||
*/
|
||||
pkeys_total = be32_to_cpu(vals[0]);
|
||||
pkeys_devtree_defined = true;
|
||||
}
|
||||
|
||||
static inline bool pkey_mmu_enabled(void)
|
||||
{
|
||||
if (firmware_has_feature(FW_FEATURE_LPAR))
|
||||
return pkeys_total;
|
||||
else
|
||||
return cpu_has_feature(CPU_FTR_PKEY);
|
||||
}
|
||||
|
||||
int pkey_initialize(void)
|
||||
{
|
||||
int os_reserved, i;
|
||||
|
||||
/*
|
||||
* We define PKEY_DISABLE_EXECUTE in addition to the arch-neutral
|
||||
* generic defines for PKEY_DISABLE_ACCESS and PKEY_DISABLE_WRITE.
|
||||
* Ensure that the bits a distinct.
|
||||
*/
|
||||
BUILD_BUG_ON(PKEY_DISABLE_EXECUTE &
|
||||
(PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
|
||||
|
||||
/*
|
||||
* pkey_to_vmflag_bits() assumes that the pkey bits are contiguous
|
||||
* in the vmaflag. Make sure that is really the case.
|
||||
*/
|
||||
BUILD_BUG_ON(__builtin_clzl(ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT) +
|
||||
__builtin_popcountl(ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT)
|
||||
!= (sizeof(u64) * BITS_PER_BYTE));
|
||||
|
||||
/* scan the device tree for pkey feature */
|
||||
scan_pkey_feature();
|
||||
|
||||
/*
|
||||
* Let's assume 32 pkeys on P8 bare metal, if its not defined by device
|
||||
* tree. We make this exception since skiboot forgot to expose this
|
||||
* property on power8.
|
||||
*/
|
||||
if (!pkeys_devtree_defined && !firmware_has_feature(FW_FEATURE_LPAR) &&
|
||||
cpu_has_feature(CPU_FTRS_POWER8))
|
||||
pkeys_total = 32;
|
||||
|
||||
/*
|
||||
* Adjust the upper limit, based on the number of bits supported by
|
||||
* arch-neutral code.
|
||||
*/
|
||||
pkeys_total = min_t(int, pkeys_total,
|
||||
(ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT));
|
||||
|
||||
if (!pkey_mmu_enabled() || radix_enabled() || !pkeys_total)
|
||||
static_branch_enable(&pkey_disabled);
|
||||
else
|
||||
static_branch_disable(&pkey_disabled);
|
||||
|
||||
if (static_branch_likely(&pkey_disabled))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* The device tree cannot be relied to indicate support for
|
||||
* execute_disable support. Instead we use a PVR check.
|
||||
*/
|
||||
if (pvr_version_is(PVR_POWER7) || pvr_version_is(PVR_POWER7p))
|
||||
pkey_execute_disable_supported = false;
|
||||
else
|
||||
pkey_execute_disable_supported = true;
|
||||
|
||||
#ifdef CONFIG_PPC_4K_PAGES
|
||||
/*
|
||||
* The OS can manage only 8 pkeys due to its inability to represent them
|
||||
* in the Linux 4K PTE.
|
||||
*/
|
||||
os_reserved = pkeys_total - 8;
|
||||
#else
|
||||
os_reserved = 0;
|
||||
#endif
|
||||
/*
|
||||
* Bits are in LE format. NOTE: 1, 0 are reserved.
|
||||
* key 0 is the default key, which allows read/write/execute.
|
||||
* key 1 is recommended not to be used. PowerISA(3.0) page 1015,
|
||||
* programming note.
|
||||
*/
|
||||
initial_allocation_mask = ~0x0;
|
||||
|
||||
/* register mask is in BE format */
|
||||
pkey_amr_uamor_mask = ~0x0ul;
|
||||
pkey_iamr_mask = ~0x0ul;
|
||||
|
||||
for (i = 2; i < (pkeys_total - os_reserved); i++) {
|
||||
initial_allocation_mask &= ~(0x1 << i);
|
||||
pkey_amr_uamor_mask &= ~(0x3ul << pkeyshift(i));
|
||||
pkey_iamr_mask &= ~(0x1ul << pkeyshift(i));
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
arch_initcall(pkey_initialize);
|
||||
|
||||
void pkey_mm_init(struct mm_struct *mm)
|
||||
{
|
||||
if (static_branch_likely(&pkey_disabled))
|
||||
return;
|
||||
mm_pkey_allocation_map(mm) = initial_allocation_mask;
|
||||
/* -1 means unallocated or invalid */
|
||||
mm->context.execute_only_pkey = -1;
|
||||
}
|
||||
|
||||
static inline u64 read_amr(void)
|
||||
{
|
||||
return mfspr(SPRN_AMR);
|
||||
}
|
||||
|
||||
static inline void write_amr(u64 value)
|
||||
{
|
||||
mtspr(SPRN_AMR, value);
|
||||
}
|
||||
|
||||
static inline u64 read_iamr(void)
|
||||
{
|
||||
if (!likely(pkey_execute_disable_supported))
|
||||
return 0x0UL;
|
||||
|
||||
return mfspr(SPRN_IAMR);
|
||||
}
|
||||
|
||||
static inline void write_iamr(u64 value)
|
||||
{
|
||||
if (!likely(pkey_execute_disable_supported))
|
||||
return;
|
||||
|
||||
mtspr(SPRN_IAMR, value);
|
||||
}
|
||||
|
||||
static inline u64 read_uamor(void)
|
||||
{
|
||||
return mfspr(SPRN_UAMOR);
|
||||
}
|
||||
|
||||
static inline void write_uamor(u64 value)
|
||||
{
|
||||
mtspr(SPRN_UAMOR, value);
|
||||
}
|
||||
|
||||
static bool is_pkey_enabled(int pkey)
|
||||
{
|
||||
u64 uamor = read_uamor();
|
||||
u64 pkey_bits = 0x3ul << pkeyshift(pkey);
|
||||
u64 uamor_pkey_bits = (uamor & pkey_bits);
|
||||
|
||||
/*
|
||||
* Both the bits in UAMOR corresponding to the key should be set or
|
||||
* reset.
|
||||
*/
|
||||
WARN_ON(uamor_pkey_bits && (uamor_pkey_bits != pkey_bits));
|
||||
return !!(uamor_pkey_bits);
|
||||
}
|
||||
|
||||
static inline void init_amr(int pkey, u8 init_bits)
|
||||
{
|
||||
u64 new_amr_bits = (((u64)init_bits & 0x3UL) << pkeyshift(pkey));
|
||||
u64 old_amr = read_amr() & ~((u64)(0x3ul) << pkeyshift(pkey));
|
||||
|
||||
write_amr(old_amr | new_amr_bits);
|
||||
}
|
||||
|
||||
static inline void init_iamr(int pkey, u8 init_bits)
|
||||
{
|
||||
u64 new_iamr_bits = (((u64)init_bits & 0x1UL) << pkeyshift(pkey));
|
||||
u64 old_iamr = read_iamr() & ~((u64)(0x1ul) << pkeyshift(pkey));
|
||||
|
||||
write_iamr(old_iamr | new_iamr_bits);
|
||||
}
|
||||
|
||||
static void pkey_status_change(int pkey, bool enable)
|
||||
{
|
||||
u64 old_uamor;
|
||||
|
||||
/* Reset the AMR and IAMR bits for this key */
|
||||
init_amr(pkey, 0x0);
|
||||
init_iamr(pkey, 0x0);
|
||||
|
||||
/* Enable/disable key */
|
||||
old_uamor = read_uamor();
|
||||
if (enable)
|
||||
old_uamor |= (0x3ul << pkeyshift(pkey));
|
||||
else
|
||||
old_uamor &= ~(0x3ul << pkeyshift(pkey));
|
||||
write_uamor(old_uamor);
|
||||
}
|
||||
|
||||
void __arch_activate_pkey(int pkey)
|
||||
{
|
||||
pkey_status_change(pkey, true);
|
||||
}
|
||||
|
||||
void __arch_deactivate_pkey(int pkey)
|
||||
{
|
||||
pkey_status_change(pkey, false);
|
||||
}
|
||||
|
||||
/*
|
||||
* Set the access rights in AMR IAMR and UAMOR registers for @pkey to that
|
||||
* specified in @init_val.
|
||||
*/
|
||||
int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
|
||||
unsigned long init_val)
|
||||
{
|
||||
u64 new_amr_bits = 0x0ul;
|
||||
u64 new_iamr_bits = 0x0ul;
|
||||
|
||||
if (!is_pkey_enabled(pkey))
|
||||
return -EINVAL;
|
||||
|
||||
if (init_val & PKEY_DISABLE_EXECUTE) {
|
||||
if (!pkey_execute_disable_supported)
|
||||
return -EINVAL;
|
||||
new_iamr_bits |= IAMR_EX_BIT;
|
||||
}
|
||||
init_iamr(pkey, new_iamr_bits);
|
||||
|
||||
/* Set the bits we need in AMR: */
|
||||
if (init_val & PKEY_DISABLE_ACCESS)
|
||||
new_amr_bits |= AMR_RD_BIT | AMR_WR_BIT;
|
||||
else if (init_val & PKEY_DISABLE_WRITE)
|
||||
new_amr_bits |= AMR_WR_BIT;
|
||||
|
||||
init_amr(pkey, new_amr_bits);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void thread_pkey_regs_save(struct thread_struct *thread)
|
||||
{
|
||||
if (static_branch_likely(&pkey_disabled))
|
||||
return;
|
||||
|
||||
/*
|
||||
* TODO: Skip saving registers if @thread hasn't used any keys yet.
|
||||
*/
|
||||
thread->amr = read_amr();
|
||||
thread->iamr = read_iamr();
|
||||
thread->uamor = read_uamor();
|
||||
}
|
||||
|
||||
void thread_pkey_regs_restore(struct thread_struct *new_thread,
|
||||
struct thread_struct *old_thread)
|
||||
{
|
||||
if (static_branch_likely(&pkey_disabled))
|
||||
return;
|
||||
|
||||
/*
|
||||
* TODO: Just set UAMOR to zero if @new_thread hasn't used any keys yet.
|
||||
*/
|
||||
if (old_thread->amr != new_thread->amr)
|
||||
write_amr(new_thread->amr);
|
||||
if (old_thread->iamr != new_thread->iamr)
|
||||
write_iamr(new_thread->iamr);
|
||||
if (old_thread->uamor != new_thread->uamor)
|
||||
write_uamor(new_thread->uamor);
|
||||
}
|
||||
|
||||
void thread_pkey_regs_init(struct thread_struct *thread)
|
||||
{
|
||||
if (static_branch_likely(&pkey_disabled))
|
||||
return;
|
||||
|
||||
write_amr(read_amr() & pkey_amr_uamor_mask);
|
||||
write_iamr(read_iamr() & pkey_iamr_mask);
|
||||
write_uamor(read_uamor() & pkey_amr_uamor_mask);
|
||||
}
|
||||
|
||||
static inline bool pkey_allows_readwrite(int pkey)
|
||||
{
|
||||
int pkey_shift = pkeyshift(pkey);
|
||||
|
||||
if (!is_pkey_enabled(pkey))
|
||||
return true;
|
||||
|
||||
return !(read_amr() & ((AMR_RD_BIT|AMR_WR_BIT) << pkey_shift));
|
||||
}
|
||||
|
||||
int __execute_only_pkey(struct mm_struct *mm)
|
||||
{
|
||||
bool need_to_set_mm_pkey = false;
|
||||
int execute_only_pkey = mm->context.execute_only_pkey;
|
||||
int ret;
|
||||
|
||||
/* Do we need to assign a pkey for mm's execute-only maps? */
|
||||
if (execute_only_pkey == -1) {
|
||||
/* Go allocate one to use, which might fail */
|
||||
execute_only_pkey = mm_pkey_alloc(mm);
|
||||
if (execute_only_pkey < 0)
|
||||
return -1;
|
||||
need_to_set_mm_pkey = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* We do not want to go through the relatively costly dance to set AMR
|
||||
* if we do not need to. Check it first and assume that if the
|
||||
* execute-only pkey is readwrite-disabled than we do not have to set it
|
||||
* ourselves.
|
||||
*/
|
||||
if (!need_to_set_mm_pkey && !pkey_allows_readwrite(execute_only_pkey))
|
||||
return execute_only_pkey;
|
||||
|
||||
/*
|
||||
* Set up AMR so that it denies access for everything other than
|
||||
* execution.
|
||||
*/
|
||||
ret = __arch_set_user_pkey_access(current, execute_only_pkey,
|
||||
PKEY_DISABLE_ACCESS |
|
||||
PKEY_DISABLE_WRITE);
|
||||
/*
|
||||
* If the AMR-set operation failed somehow, just return 0 and
|
||||
* effectively disable execute-only support.
|
||||
*/
|
||||
if (ret) {
|
||||
mm_pkey_free(mm, execute_only_pkey);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* We got one, store it and use it from here on out */
|
||||
if (need_to_set_mm_pkey)
|
||||
mm->context.execute_only_pkey = execute_only_pkey;
|
||||
return execute_only_pkey;
|
||||
}
|
||||
|
||||
static inline bool vma_is_pkey_exec_only(struct vm_area_struct *vma)
|
||||
{
|
||||
/* Do this check first since the vm_flags should be hot */
|
||||
if ((vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)) != VM_EXEC)
|
||||
return false;
|
||||
|
||||
return (vma_pkey(vma) == vma->vm_mm->context.execute_only_pkey);
|
||||
}
|
||||
|
||||
/*
|
||||
* This should only be called for *plain* mprotect calls.
|
||||
*/
|
||||
int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot,
|
||||
int pkey)
|
||||
{
|
||||
/*
|
||||
* If the currently associated pkey is execute-only, but the requested
|
||||
* protection requires read or write, move it back to the default pkey.
|
||||
*/
|
||||
if (vma_is_pkey_exec_only(vma) && (prot & (PROT_READ | PROT_WRITE)))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* The requested protection is execute-only. Hence let's use an
|
||||
* execute-only pkey.
|
||||
*/
|
||||
if (prot == PROT_EXEC) {
|
||||
pkey = execute_only_pkey(vma->vm_mm);
|
||||
if (pkey > 0)
|
||||
return pkey;
|
||||
}
|
||||
|
||||
/* Nothing to override. */
|
||||
return vma_pkey(vma);
|
||||
}
|
||||
|
||||
static bool pkey_access_permitted(int pkey, bool write, bool execute)
|
||||
{
|
||||
int pkey_shift;
|
||||
u64 amr;
|
||||
|
||||
if (!pkey)
|
||||
return true;
|
||||
|
||||
if (!is_pkey_enabled(pkey))
|
||||
return true;
|
||||
|
||||
pkey_shift = pkeyshift(pkey);
|
||||
if (execute && !(read_iamr() & (IAMR_EX_BIT << pkey_shift)))
|
||||
return true;
|
||||
|
||||
amr = read_amr(); /* Delay reading amr until absolutely needed */
|
||||
return ((!write && !(amr & (AMR_RD_BIT << pkey_shift))) ||
|
||||
(write && !(amr & (AMR_WR_BIT << pkey_shift))));
|
||||
}
|
||||
|
||||
bool arch_pte_access_permitted(u64 pte, bool write, bool execute)
|
||||
{
|
||||
if (static_branch_likely(&pkey_disabled))
|
||||
return true;
|
||||
|
||||
return pkey_access_permitted(pte_to_pkey_bits(pte), write, execute);
|
||||
}
|
||||
|
||||
/*
|
||||
* We only want to enforce protection keys on the current thread because we
|
||||
* effectively have no access to AMR/IAMR for other threads or any way to tell
|
||||
* which AMR/IAMR in a threaded process we could use.
|
||||
*
|
||||
* So do not enforce things if the VMA is not from the current mm, or if we are
|
||||
* in a kernel thread.
|
||||
*/
|
||||
static inline bool vma_is_foreign(struct vm_area_struct *vma)
|
||||
{
|
||||
if (!current->mm)
|
||||
return true;
|
||||
|
||||
/* if it is not our ->mm, it has to be foreign */
|
||||
if (current->mm != vma->vm_mm)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write,
|
||||
bool execute, bool foreign)
|
||||
{
|
||||
if (static_branch_likely(&pkey_disabled))
|
||||
return true;
|
||||
/*
|
||||
* Do not enforce our key-permissions on a foreign vma.
|
||||
*/
|
||||
if (foreign || vma_is_foreign(vma))
|
||||
return true;
|
||||
|
||||
return pkey_access_permitted(vma_pkey(vma), write, execute);
|
||||
}
|
@@ -195,6 +195,9 @@ long sys_subpage_prot(unsigned long addr, unsigned long len, u32 __user *map)
|
||||
unsigned long next, limit;
|
||||
int err;
|
||||
|
||||
if (radix_enabled())
|
||||
return -ENOENT;
|
||||
|
||||
/* Check parameters */
|
||||
if ((addr & ~PAGE_MASK) || (len & ~PAGE_MASK) ||
|
||||
addr >= mm->task_size || len >= mm->task_size ||
|
||||
|
@@ -23,6 +23,72 @@
|
||||
#define RIC_FLUSH_PWC 1
|
||||
#define RIC_FLUSH_ALL 2
|
||||
|
||||
/*
|
||||
* tlbiel instruction for radix, set invalidation
|
||||
* i.e., r=1 and is=01 or is=10 or is=11
|
||||
*/
|
||||
static inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is,
|
||||
unsigned int pid,
|
||||
unsigned int ric, unsigned int prs)
|
||||
{
|
||||
unsigned long rb;
|
||||
unsigned long rs;
|
||||
unsigned int r = 1; /* radix format */
|
||||
|
||||
rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
|
||||
rs = ((unsigned long)pid << PPC_BITLSHIFT(31));
|
||||
|
||||
asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4)
|
||||
: : "r"(rb), "r"(rs), "i"(ric), "i"(prs), "r"(r)
|
||||
: "memory");
|
||||
}
|
||||
|
||||
static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
|
||||
{
|
||||
unsigned int set;
|
||||
|
||||
asm volatile("ptesync": : :"memory");
|
||||
|
||||
/*
|
||||
* Flush the first set of the TLB, and the entire Page Walk Cache
|
||||
* and partition table entries. Then flush the remaining sets of the
|
||||
* TLB.
|
||||
*/
|
||||
tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0);
|
||||
for (set = 1; set < num_sets; set++)
|
||||
tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 0);
|
||||
|
||||
/* Do the same for process scoped entries. */
|
||||
tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1);
|
||||
for (set = 1; set < num_sets; set++)
|
||||
tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1);
|
||||
|
||||
asm volatile("ptesync": : :"memory");
|
||||
}
|
||||
|
||||
void radix__tlbiel_all(unsigned int action)
|
||||
{
|
||||
unsigned int is;
|
||||
|
||||
switch (action) {
|
||||
case TLB_INVAL_SCOPE_GLOBAL:
|
||||
is = 3;
|
||||
break;
|
||||
case TLB_INVAL_SCOPE_LPID:
|
||||
is = 2;
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
if (early_cpu_has_feature(CPU_FTR_ARCH_300))
|
||||
tlbiel_all_isa300(POWER9_TLB_SETS_RADIX, is);
|
||||
else
|
||||
WARN(1, "%s called on pre-POWER9 CPU\n", __func__);
|
||||
|
||||
asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
|
||||
}
|
||||
|
||||
static inline void __tlbiel_pid(unsigned long pid, int set,
|
||||
unsigned long ric)
|
||||
{
|
||||
@@ -600,14 +666,12 @@ void radix__flush_tlb_all(void)
|
||||
*/
|
||||
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
|
||||
: : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory");
|
||||
trace_tlbie(0, 0, rb, rs, ric, prs, r);
|
||||
/*
|
||||
* now flush host entires by passing PRS = 0 and LPID == 0
|
||||
*/
|
||||
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
|
||||
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory");
|
||||
asm volatile("eieio; tlbsync; ptesync": : :"memory");
|
||||
trace_tlbie(0, 0, rb, 0, ric, prs, r);
|
||||
}
|
||||
|
||||
void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm,
|
||||
|
@@ -388,7 +388,10 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
|
||||
unsigned long end)
|
||||
|
||||
{
|
||||
flush_tlb_mm(vma->vm_mm);
|
||||
if (end - start == PAGE_SIZE && !(start & ~PAGE_MASK))
|
||||
flush_tlb_page(vma, start);
|
||||
else
|
||||
flush_tlb_mm(vma->vm_mm);
|
||||
}
|
||||
EXPORT_SYMBOL(flush_tlb_range);
|
||||
|
||||
|
Verwijs in nieuw issue
Block a user