Merge tag 'powerpc-4.21-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux

Pull powerpc updates from Michael Ellerman:
 "Notable changes:

   - Mitigations for Spectre v2 on some Freescale (NXP) CPUs.

   - A large series adding support for pass-through of Nvidia V100 GPUs
     to guests on Power9.

   - Another large series to enable hardware assistance for TLB table
     walk on MPC8xx CPUs.

   - Some preparatory changes to our DMA code, to make way for further
     cleanups from Christoph.

   - Several fixes for our Transactional Memory handling discovered by
     fuzzing the signal return path.

   - Support for generating our system call table(s) from a text file
     like other architectures.

   - A fix to our page fault handler so that instead of generating a
     WARN_ON_ONCE, user accesses of kernel addresses instead print a
     ratelimited and appropriately scary warning.

   - A cosmetic change to make our unhandled page fault messages more
     similar to other arches and also more compact and informative.

   - Freescale updates from Scott:
       "Highlights include elimination of legacy clock bindings use from
        dts files, an 83xx watchdog handler, fixes to old dts interrupt
        errors, and some minor cleanup."

  And many clean-ups, reworks and minor fixes etc.

  Thanks to: Alexandre Belloni, Alexey Kardashevskiy, Andrew Donnellan,
  Aneesh Kumar K.V, Arnd Bergmann, Benjamin Herrenschmidt, Breno Leitao,
  Christian Lamparter, Christophe Leroy, Christoph Hellwig, Daniel
  Axtens, Darren Stevens, David Gibson, Diana Craciun, Dmitry V. Levin,
  Firoz Khan, Geert Uytterhoeven, Greg Kurz, Gustavo Romero, Hari
  Bathini, Joel Stanley, Kees Cook, Madhavan Srinivasan, Mahesh
  Salgaonkar, Markus Elfring, Mathieu Malaterre, Michal Suchánek, Naveen
  N. Rao, Nick Desaulniers, Oliver O'Halloran, Paul Mackerras, Ram Pai,
  Ravi Bangoria, Rob Herring, Russell Currey, Sabyasachi Gupta, Sam
  Bobroff, Satheesh Rajendran, Scott Wood, Segher Boessenkool, Stephen
  Rothwell, Tang Yuantian, Thiago Jung Bauermann, Yangtao Li, Yuantian
  Tang, Yue Haibing"

* tag 'powerpc-4.21-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (201 commits)
  Revert "powerpc/fsl_pci: simplify fsl_pci_dma_set_mask"
  powerpc/zImage: Also check for stdout-path
  powerpc: Fix HMIs on big-endian with CONFIG_RELOCATABLE=y
  macintosh: Use of_node_name_{eq, prefix} for node name comparisons
  ide: Use of_node_name_eq for node name comparisons
  powerpc: Use of_node_name_eq for node name comparisons
  powerpc/pseries/pmem: Convert to %pOFn instead of device_node.name
  powerpc/mm: Remove very old comment in hash-4k.h
  powerpc/pseries: Fix node leak in update_lmb_associativity_index()
  powerpc/configs/85xx: Enable CONFIG_DEBUG_KERNEL
  powerpc/dts/fsl: Fix dtc-flagged interrupt errors
  clk: qoriq: add more compatibles strings
  powerpc/fsl: Use new clockgen binding
  powerpc/83xx: handle machine check caused by watchdog timer
  powerpc/fsl-rio: fix spelling mistake "reserverd" -> "reserved"
  powerpc/fsl_pci: simplify fsl_pci_dma_set_mask
  arch/powerpc/fsl_rmu: Use dma_zalloc_coherent
  vfio_pci: Add NVIDIA GV100GL [Tesla V100 SXM2] subdriver
  vfio_pci: Allow regions to add own capabilities
  vfio_pci: Allow mapping extra regions
  ...
This commit is contained in:
Linus Torvalds
2018-12-27 10:43:24 -08:00
296 changed files with 4698 additions and 3420 deletions

View File

@@ -29,6 +29,7 @@
#include <asm/mmu.h>
#include <asm/page.h>
#include <asm/cacheflush.h>
#include <asm/code-patching.h>
#include "mmu_decl.h"
@@ -43,22 +44,13 @@ unsigned long tlb_47x_boltmap[1024/8];
static void ppc44x_update_tlb_hwater(void)
{
extern unsigned int tlb_44x_patch_hwater_D[];
extern unsigned int tlb_44x_patch_hwater_I[];
/* The TLB miss handlers hard codes the watermark in a cmpli
* instruction to improve performances rather than loading it
* from the global variable. Thus, we patch the instructions
* in the 2 TLB miss handlers when updating the value
*/
tlb_44x_patch_hwater_D[0] = (tlb_44x_patch_hwater_D[0] & 0xffff0000) |
tlb_44x_hwater;
flush_icache_range((unsigned long)&tlb_44x_patch_hwater_D[0],
(unsigned long)&tlb_44x_patch_hwater_D[1]);
tlb_44x_patch_hwater_I[0] = (tlb_44x_patch_hwater_I[0] & 0xffff0000) |
tlb_44x_hwater;
flush_icache_range((unsigned long)&tlb_44x_patch_hwater_I[0],
(unsigned long)&tlb_44x_patch_hwater_I[1]);
modify_instruction_site(&patch__tlb_44x_hwater_D, 0xffff, tlb_44x_hwater);
modify_instruction_site(&patch__tlb_44x_hwater_I, 0xffff, tlb_44x_hwater);
}
/*

View File

@@ -100,11 +100,7 @@ static void __init mmu_mapin_immr(void)
static void __init mmu_patch_cmp_limit(s32 *site, unsigned long mapped)
{
unsigned int instr = *(unsigned int *)patch_site_addr(site);
instr &= 0xffff0000;
instr |= (unsigned long)__va(mapped) >> 16;
patch_instruction_site(site, instr);
modify_instruction_site(site, 0xffff, (unsigned long)__va(mapped) >> 16);
}
unsigned long __init mmu_mapin_ram(unsigned long top)
@@ -175,12 +171,12 @@ void set_context(unsigned long id, pgd_t *pgd)
*(ptr + 1) = pgd;
#endif
/* Register M_TW will contain base address of level 1 table minus the
/* Register M_TWB will contain base address of level 1 table minus the
* lower part of the kernel PGDIR base address, so that all accesses to
* level 1 table are done relative to lower part of kernel PGDIR base
* address.
*/
mtspr(SPRN_M_TW, __pa(pgd) - offset);
mtspr(SPRN_M_TWB, __pa(pgd) - offset);
/* Update context */
mtspr(SPRN_M_CASID, id - 1);

View File

@@ -15,10 +15,13 @@ obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \
obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(BITS)e.o
hash64-$(CONFIG_PPC_NATIVE) := hash_native_64.o
obj-$(CONFIG_PPC_BOOK3E_64) += pgtable-book3e.o
obj-$(CONFIG_PPC_BOOK3S_64) += pgtable-hash64.o hash_utils_64.o slb.o $(hash64-y) mmu_context_book3s64.o pgtable-book3s64.o
obj-$(CONFIG_PPC_BOOK3S_64) += pgtable-hash64.o hash_utils_64.o slb.o \
$(hash64-y) mmu_context_book3s64.o \
pgtable-book3s64.o pgtable-frag.o
obj-$(CONFIG_PPC32) += pgtable-frag.o
obj-$(CONFIG_PPC_RADIX_MMU) += pgtable-radix.o tlb-radix.o
obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o hash_low_32.o mmu_context_hash32.o
obj-$(CONFIG_PPC_STD_MMU) += tlb_hash$(BITS).o
obj-$(CONFIG_PPC_BOOK3S_32) += ppc_mmu_32.o hash_low_32.o mmu_context_hash32.o
obj-$(CONFIG_PPC_BOOK3S) += tlb_hash$(BITS).o
ifdef CONFIG_PPC_BOOK3S_64
obj-$(CONFIG_PPC_4K_PAGES) += hash64_4k.o
obj-$(CONFIG_PPC_64K_PAGES) += hash64_64k.o
@@ -47,7 +50,7 @@ ifdef CONFIG_PPC_PTDUMP
obj-$(CONFIG_4xx) += dump_linuxpagetables-generic.o
obj-$(CONFIG_PPC_8xx) += dump_linuxpagetables-8xx.o
obj-$(CONFIG_PPC_BOOK3E_MMU) += dump_linuxpagetables-generic.o
obj-$(CONFIG_PPC_BOOK3S_32) += dump_linuxpagetables-generic.o
obj-$(CONFIG_PPC_BOOK3S_32) += dump_linuxpagetables-generic.o dump_bats.o dump_sr.o
obj-$(CONFIG_PPC_BOOK3S_64) += dump_linuxpagetables-book3s64.o
endif
obj-$(CONFIG_PPC_HTDUMP) += dump_hashpagetable.o

View File

@@ -29,7 +29,7 @@
#include <linux/string.h>
#include <linux/types.h>
#include <linux/highmem.h>
#include <linux/dma-mapping.h>
#include <linux/dma-direct.h>
#include <linux/export.h>
#include <asm/tlbflush.h>
@@ -151,8 +151,8 @@ static struct ppc_vm_region *ppc_vm_region_find(struct ppc_vm_region *head, unsi
* Allocate DMA-coherent memory space and return both the kernel remapped
* virtual and bus address for that space.
*/
void *
__dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp)
void *__dma_nommu_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
{
struct page *page;
struct ppc_vm_region *c;
@@ -223,7 +223,7 @@ __dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t
/*
* Set the "dma handle"
*/
*handle = page_to_phys(page);
*dma_handle = phys_to_dma(dev, page_to_phys(page));
do {
SetPageReserved(page);
@@ -249,12 +249,12 @@ __dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t
no_page:
return NULL;
}
EXPORT_SYMBOL(__dma_alloc_coherent);
/*
* free a page as defined by the above mapping.
*/
void __dma_free_coherent(size_t size, void *vaddr)
void __dma_nommu_free_coherent(struct device *dev, size_t size, void *vaddr,
dma_addr_t dma_handle, unsigned long attrs)
{
struct ppc_vm_region *c;
unsigned long flags, addr;
@@ -309,7 +309,6 @@ void __dma_free_coherent(size_t size, void *vaddr)
__func__, vaddr);
dump_stack();
}
EXPORT_SYMBOL(__dma_free_coherent);
/*
* make an area consistent.
@@ -401,7 +400,7 @@ EXPORT_SYMBOL(__dma_sync_page);
/*
* Return the PFN for a given cpu virtual address returned by
* __dma_alloc_coherent. This is used by dma_mmap_coherent()
* __dma_nommu_alloc_coherent. This is used by dma_mmap_coherent()
*/
unsigned long __dma_get_coherent_pfn(unsigned long cpu_addr)
{

173
arch/powerpc/mm/dump_bats.c Normal file
View File

@@ -0,0 +1,173 @@
// SPDX-License-Identifier: GPL-2.0+
/*
* Copyright 2018, Christophe Leroy CS S.I.
* <christophe.leroy@c-s.fr>
*
* This dumps the content of BATS
*/
#include <asm/debugfs.h>
#include <asm/pgtable.h>
#include <asm/cpu_has_feature.h>
static char *pp_601(int k, int pp)
{
if (pp == 0)
return k ? "NA" : "RWX";
if (pp == 1)
return k ? "ROX" : "RWX";
if (pp == 2)
return k ? "RWX" : "RWX";
return k ? "ROX" : "ROX";
}
static void bat_show_601(struct seq_file *m, int idx, u32 lower, u32 upper)
{
u32 blpi = upper & 0xfffe0000;
u32 k = (upper >> 2) & 3;
u32 pp = upper & 3;
phys_addr_t pbn = PHYS_BAT_ADDR(lower);
u32 bsm = lower & 0x3ff;
u32 size = (bsm + 1) << 17;
seq_printf(m, "%d: ", idx);
if (!(lower & 0x40)) {
seq_puts(m, " -\n");
return;
}
seq_printf(m, "0x%08x-0x%08x ", blpi, blpi + size - 1);
#ifdef CONFIG_PHYS_64BIT
seq_printf(m, "0x%016llx ", pbn);
#else
seq_printf(m, "0x%08x ", pbn);
#endif
seq_printf(m, "Kernel %s User %s", pp_601(k & 2, pp), pp_601(k & 1, pp));
if (lower & _PAGE_WRITETHRU)
seq_puts(m, "write through ");
if (lower & _PAGE_NO_CACHE)
seq_puts(m, "no cache ");
if (lower & _PAGE_COHERENT)
seq_puts(m, "coherent ");
seq_puts(m, "\n");
}
#define BAT_SHOW_601(_m, _n, _l, _u) bat_show_601(_m, _n, mfspr(_l), mfspr(_u))
static int bats_show_601(struct seq_file *m, void *v)
{
seq_puts(m, "---[ Block Address Translation ]---\n");
BAT_SHOW_601(m, 0, SPRN_IBAT0L, SPRN_IBAT0U);
BAT_SHOW_601(m, 1, SPRN_IBAT1L, SPRN_IBAT1U);
BAT_SHOW_601(m, 2, SPRN_IBAT2L, SPRN_IBAT2U);
BAT_SHOW_601(m, 3, SPRN_IBAT3L, SPRN_IBAT3U);
return 0;
}
static void bat_show_603(struct seq_file *m, int idx, u32 lower, u32 upper, bool is_d)
{
u32 bepi = upper & 0xfffe0000;
u32 bl = (upper >> 2) & 0x7ff;
u32 k = upper & 3;
phys_addr_t brpn = PHYS_BAT_ADDR(lower);
u32 size = (bl + 1) << 17;
seq_printf(m, "%d: ", idx);
if (k == 0) {
seq_puts(m, " -\n");
return;
}
seq_printf(m, "0x%08x-0x%08x ", bepi, bepi + size - 1);
#ifdef CONFIG_PHYS_64BIT
seq_printf(m, "0x%016llx ", brpn);
#else
seq_printf(m, "0x%08x ", brpn);
#endif
if (k == 1)
seq_puts(m, "User ");
else if (k == 2)
seq_puts(m, "Kernel ");
else
seq_puts(m, "Kernel/User ");
if (lower & BPP_RX)
seq_puts(m, is_d ? "RO " : "EXEC ");
else if (lower & BPP_RW)
seq_puts(m, is_d ? "RW " : "EXEC ");
else
seq_puts(m, is_d ? "NA " : "NX ");
if (lower & _PAGE_WRITETHRU)
seq_puts(m, "write through ");
if (lower & _PAGE_NO_CACHE)
seq_puts(m, "no cache ");
if (lower & _PAGE_COHERENT)
seq_puts(m, "coherent ");
if (lower & _PAGE_GUARDED)
seq_puts(m, "guarded ");
seq_puts(m, "\n");
}
#define BAT_SHOW_603(_m, _n, _l, _u, _d) bat_show_603(_m, _n, mfspr(_l), mfspr(_u), _d)
static int bats_show_603(struct seq_file *m, void *v)
{
seq_puts(m, "---[ Instruction Block Address Translation ]---\n");
BAT_SHOW_603(m, 0, SPRN_IBAT0L, SPRN_IBAT0U, false);
BAT_SHOW_603(m, 1, SPRN_IBAT1L, SPRN_IBAT1U, false);
BAT_SHOW_603(m, 2, SPRN_IBAT2L, SPRN_IBAT2U, false);
BAT_SHOW_603(m, 3, SPRN_IBAT3L, SPRN_IBAT3U, false);
if (mmu_has_feature(MMU_FTR_USE_HIGH_BATS)) {
BAT_SHOW_603(m, 4, SPRN_IBAT4L, SPRN_IBAT4U, false);
BAT_SHOW_603(m, 5, SPRN_IBAT5L, SPRN_IBAT5U, false);
BAT_SHOW_603(m, 6, SPRN_IBAT6L, SPRN_IBAT6U, false);
BAT_SHOW_603(m, 7, SPRN_IBAT7L, SPRN_IBAT7U, false);
}
seq_puts(m, "\n---[ Data Block Address Translation ]---\n");
BAT_SHOW_603(m, 0, SPRN_DBAT0L, SPRN_DBAT0U, true);
BAT_SHOW_603(m, 1, SPRN_DBAT1L, SPRN_DBAT1U, true);
BAT_SHOW_603(m, 2, SPRN_DBAT2L, SPRN_DBAT2U, true);
BAT_SHOW_603(m, 3, SPRN_DBAT3L, SPRN_DBAT3U, true);
if (mmu_has_feature(MMU_FTR_USE_HIGH_BATS)) {
BAT_SHOW_603(m, 4, SPRN_DBAT4L, SPRN_DBAT4U, true);
BAT_SHOW_603(m, 5, SPRN_DBAT5L, SPRN_DBAT5U, true);
BAT_SHOW_603(m, 6, SPRN_DBAT6L, SPRN_DBAT6U, true);
BAT_SHOW_603(m, 7, SPRN_DBAT7L, SPRN_DBAT7U, true);
}
return 0;
}
static int bats_open(struct inode *inode, struct file *file)
{
if (cpu_has_feature(CPU_FTR_601))
return single_open(file, bats_show_601, NULL);
return single_open(file, bats_show_603, NULL);
}
static const struct file_operations bats_fops = {
.open = bats_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static int __init bats_init(void)
{
struct dentry *debugfs_file;
debugfs_file = debugfs_create_file("block_address_translation", 0400,
powerpc_debugfs_root, NULL, &bats_fops);
return debugfs_file ? 0 : -ENOMEM;
}
device_initcall(bats_init);

View File

@@ -21,13 +21,11 @@ static const struct flag_info flag_array[] = {
.set = "rw",
.clear = "r ",
}, {
#ifndef CONFIG_PPC_BOOK3S_32
.mask = _PAGE_EXEC,
.val = _PAGE_EXEC,
.set = " X ",
.clear = " ",
}, {
#endif
.mask = _PAGE_PRESENT,
.val = _PAGE_PRESENT,
.set = "present",

64
arch/powerpc/mm/dump_sr.c Normal file
View File

@@ -0,0 +1,64 @@
// SPDX-License-Identifier: GPL-2.0+
/*
* Copyright 2018, Christophe Leroy CS S.I.
* <christophe.leroy@c-s.fr>
*
* This dumps the content of Segment Registers
*/
#include <asm/debugfs.h>
static void seg_show(struct seq_file *m, int i)
{
u32 val = mfsrin(i << 28);
seq_printf(m, "0x%01x0000000-0x%01xfffffff ", i, i);
seq_printf(m, "Kern key %d ", (val >> 30) & 1);
seq_printf(m, "User key %d ", (val >> 29) & 1);
if (val & 0x80000000) {
seq_printf(m, "Device 0x%03x", (val >> 20) & 0x1ff);
seq_printf(m, "-0x%05x", val & 0xfffff);
} else {
if (val & 0x10000000)
seq_puts(m, "No Exec ");
seq_printf(m, "VSID 0x%06x", val & 0xffffff);
}
seq_puts(m, "\n");
}
static int sr_show(struct seq_file *m, void *v)
{
int i;
seq_puts(m, "---[ User Segments ]---\n");
for (i = 0; i < TASK_SIZE >> 28; i++)
seg_show(m, i);
seq_puts(m, "\n---[ Kernel Segments ]---\n");
for (; i < 16; i++)
seg_show(m, i);
return 0;
}
static int sr_open(struct inode *inode, struct file *file)
{
return single_open(file, sr_show, NULL);
}
static const struct file_operations sr_fops = {
.open = sr_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static int __init sr_init(void)
{
struct dentry *debugfs_file;
debugfs_file = debugfs_create_file("segment_registers", 0400,
powerpc_debugfs_root, NULL, &sr_fops);
return debugfs_file ? 0 : -ENOMEM;
}
device_initcall(sr_init);

View File

@@ -226,7 +226,9 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr,
static bool bad_kernel_fault(bool is_exec, unsigned long error_code,
unsigned long address)
{
if (is_exec && (error_code & (DSISR_NOEXEC_OR_G | DSISR_KEYFAULT))) {
/* NX faults set DSISR_PROTFAULT on the 8xx, DSISR_NOEXEC_OR_G on others */
if (is_exec && (error_code & (DSISR_NOEXEC_OR_G | DSISR_KEYFAULT |
DSISR_PROTFAULT))) {
printk_ratelimited(KERN_CRIT "kernel tried to execute"
" exec-protected page (%lx) -"
"exploit attempt? (uid: %d)\n",
@@ -341,9 +343,20 @@ static inline void cmo_account_page_fault(void)
static inline void cmo_account_page_fault(void) { }
#endif /* CONFIG_PPC_SMLPAR */
#ifdef CONFIG_PPC_STD_MMU
static void sanity_check_fault(bool is_write, unsigned long error_code)
#ifdef CONFIG_PPC_BOOK3S
static void sanity_check_fault(bool is_write, bool is_user,
unsigned long error_code, unsigned long address)
{
/*
* Userspace trying to access kernel address, we get PROTFAULT for that.
*/
if (is_user && address >= TASK_SIZE) {
pr_crit_ratelimited("%s[%d]: User access of kernel address (%lx) - exploit attempt? (uid: %d)\n",
current->comm, current->pid, address,
from_kuid(&init_user_ns, current_uid()));
return;
}
/*
* For hash translation mode, we should never get a
* PROTFAULT. Any update to pte to reduce access will result in us
@@ -373,12 +386,15 @@ static void sanity_check_fault(bool is_write, unsigned long error_code)
* For radix, we can get prot fault for autonuma case, because radix
* page table will have them marked noaccess for user.
*/
if (!radix_enabled() && !is_write)
WARN_ON_ONCE(error_code & DSISR_PROTFAULT);
if (radix_enabled() || is_write)
return;
WARN_ON_ONCE(error_code & DSISR_PROTFAULT);
}
#else
static void sanity_check_fault(bool is_write, unsigned long error_code) { }
#endif /* CONFIG_PPC_STD_MMU */
static void sanity_check_fault(bool is_write, bool is_user,
unsigned long error_code, unsigned long address) { }
#endif /* CONFIG_PPC_BOOK3S */
/*
* Define the correct "is_write" bit in error_code based
@@ -435,7 +451,7 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
}
/* Additional sanity check(s) */
sanity_check_fault(is_write, error_code);
sanity_check_fault(is_write, is_user, error_code, address);
/*
* The kernel should never take an execute fault nor should it
@@ -637,21 +653,22 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
case 0x300:
case 0x380:
case 0xe00:
printk(KERN_ALERT "Unable to handle kernel paging request for "
"data at address 0x%08lx\n", regs->dar);
pr_alert("BUG: %s at 0x%08lx\n",
regs->dar < PAGE_SIZE ? "Kernel NULL pointer dereference" :
"Unable to handle kernel data access", regs->dar);
break;
case 0x400:
case 0x480:
printk(KERN_ALERT "Unable to handle kernel paging request for "
"instruction fetch\n");
pr_alert("BUG: Unable to handle kernel instruction fetch%s",
regs->nip < PAGE_SIZE ? " (NULL pointer?)\n" : "\n");
break;
case 0x600:
printk(KERN_ALERT "Unable to handle kernel paging request for "
"unaligned access at address 0x%08lx\n", regs->dar);
pr_alert("BUG: Unable to handle kernel unaligned access at 0x%08lx\n",
regs->dar);
break;
default:
printk(KERN_ALERT "Unable to handle kernel paging request for "
"unknown fault\n");
pr_alert("BUG: Unable to handle unknown paging fault at 0x%08lx\n",
regs->dar);
break;
}
printk(KERN_ALERT "Faulting instruction address: 0x%08lx\n",

View File

@@ -28,6 +28,7 @@
#include <asm/asm-offsets.h>
#include <asm/export.h>
#include <asm/feature-fixups.h>
#include <asm/code-patching-asm.h>
#ifdef CONFIG_SMP
.section .bss
@@ -337,11 +338,13 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
rlwimi r5,r4,10,26,31 /* put in API (abbrev page index) */
SET_V(r5) /* set V (valid) bit */
patch_site 0f, patch__hash_page_A0
patch_site 1f, patch__hash_page_A1
patch_site 2f, patch__hash_page_A2
/* Get the address of the primary PTE group in the hash table (r3) */
_GLOBAL(hash_page_patch_A)
addis r0,r7,Hash_base@h /* base address of hash table */
rlwimi r0,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* VSID -> hash */
rlwinm r3,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */
0: addis r0,r7,Hash_base@h /* base address of hash table */
1: rlwimi r0,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* VSID -> hash */
2: rlwinm r3,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */
xor r3,r3,r0 /* make primary hash */
li r0,8 /* PTEs/group */
@@ -366,10 +369,10 @@ _GLOBAL(hash_page_patch_A)
bdnzf 2,1b /* loop while ctr != 0 && !cr0.eq */
beq+ found_slot
patch_site 0f, patch__hash_page_B
/* Search the secondary PTEG for a matching PTE */
ori r5,r5,PTE_H /* set H (secondary hash) bit */
_GLOBAL(hash_page_patch_B)
xoris r4,r3,Hash_msk>>16 /* compute secondary hash */
0: xoris r4,r3,Hash_msk>>16 /* compute secondary hash */
xori r4,r4,(-PTEG_SIZE & 0xffff)
addi r4,r4,-HPTE_SIZE
mtctr r0
@@ -393,10 +396,10 @@ _GLOBAL(hash_page_patch_B)
addi r6,r6,1
stw r6,primary_pteg_full@l(r4)
patch_site 0f, patch__hash_page_C
/* Search the secondary PTEG for an empty slot */
ori r5,r5,PTE_H /* set H (secondary hash) bit */
_GLOBAL(hash_page_patch_C)
xoris r4,r3,Hash_msk>>16 /* compute secondary hash */
0: xoris r4,r3,Hash_msk>>16 /* compute secondary hash */
xori r4,r4,(-PTEG_SIZE & 0xffff)
addi r4,r4,-HPTE_SIZE
mtctr r0
@@ -577,11 +580,13 @@ _GLOBAL(flush_hash_pages)
stwcx. r8,0,r5 /* update the pte */
bne- 33b
patch_site 0f, patch__flush_hash_A0
patch_site 1f, patch__flush_hash_A1
patch_site 2f, patch__flush_hash_A2
/* Get the address of the primary PTE group in the hash table (r3) */
_GLOBAL(flush_hash_patch_A)
addis r8,r7,Hash_base@h /* base address of hash table */
rlwimi r8,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* VSID -> hash */
rlwinm r0,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */
0: addis r8,r7,Hash_base@h /* base address of hash table */
1: rlwimi r8,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* VSID -> hash */
2: rlwinm r0,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */
xor r8,r0,r8 /* make primary hash */
/* Search the primary PTEG for a PTE whose 1st (d)word matches r5 */
@@ -593,11 +598,11 @@ _GLOBAL(flush_hash_patch_A)
bdnzf 2,1b /* loop while ctr != 0 && !cr0.eq */
beq+ 3f
patch_site 0f, patch__flush_hash_B
/* Search the secondary PTEG for a matching PTE */
ori r11,r11,PTE_H /* set H (secondary hash) bit */
li r0,8 /* PTEs/group */
_GLOBAL(flush_hash_patch_B)
xoris r12,r8,Hash_msk>>16 /* compute secondary hash */
0: xoris r12,r8,Hash_msk>>16 /* compute secondary hash */
xori r12,r12,(-PTEG_SIZE & 0xffff)
addi r12,r12,-HPTE_SIZE
mtctr r0

View File

@@ -42,6 +42,8 @@ EXPORT_SYMBOL(HPAGE_SHIFT);
#define hugepd_none(hpd) (hpd_val(hpd) == 0)
#define PTE_T_ORDER (__builtin_ffs(sizeof(pte_t)) - __builtin_ffs(sizeof(void *)))
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz)
{
/*
@@ -61,14 +63,17 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
int num_hugepd;
if (pshift >= pdshift) {
cachep = hugepte_cache;
cachep = PGT_CACHE(PTE_T_ORDER);
num_hugepd = 1 << (pshift - pdshift);
} else if (IS_ENABLED(CONFIG_PPC_8xx)) {
cachep = PGT_CACHE(PTE_INDEX_SIZE);
num_hugepd = 1;
} else {
cachep = PGT_CACHE(pdshift - pshift);
num_hugepd = 1;
}
new = kmem_cache_zalloc(cachep, pgtable_gfp_flags(mm, GFP_KERNEL));
new = kmem_cache_alloc(cachep, pgtable_gfp_flags(mm, GFP_KERNEL));
BUG_ON(pshift > HUGEPD_SHIFT_MASK);
BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK);
@@ -264,7 +269,7 @@ static void hugepd_free_rcu_callback(struct rcu_head *head)
unsigned int i;
for (i = 0; i < batch->index; i++)
kmem_cache_free(hugepte_cache, batch->ptes[i]);
kmem_cache_free(PGT_CACHE(PTE_T_ORDER), batch->ptes[i]);
free_page((unsigned long)batch);
}
@@ -277,7 +282,7 @@ static void hugepd_free(struct mmu_gather *tlb, void *hugepte)
if (atomic_read(&tlb->mm->mm_users) < 2 ||
mm_is_thread_local(tlb->mm)) {
kmem_cache_free(hugepte_cache, hugepte);
kmem_cache_free(PGT_CACHE(PTE_T_ORDER), hugepte);
put_cpu_var(hugepd_freelist_cur);
return;
}
@@ -329,6 +334,9 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif
if (shift >= pdshift)
hugepd_free(tlb, hugepte);
else if (IS_ENABLED(CONFIG_PPC_8xx))
pgtable_free_tlb(tlb, hugepte,
get_hugepd_cache_index(PTE_INDEX_SIZE));
else
pgtable_free_tlb(tlb, hugepte,
get_hugepd_cache_index(pdshift - shift));
@@ -652,7 +660,6 @@ static int __init hugepage_setup_sz(char *str)
}
__setup("hugepagesz=", hugepage_setup_sz);
struct kmem_cache *hugepte_cache;
static int __init hugetlbpage_init(void)
{
int psize;
@@ -699,24 +706,13 @@ static int __init hugetlbpage_init(void)
* if we have pdshift and shift value same, we don't
* use pgt cache for hugepd.
*/
if (pdshift > shift)
pgtable_cache_add(pdshift - shift, NULL);
if (pdshift > shift && IS_ENABLED(CONFIG_PPC_8xx))
pgtable_cache_add(PTE_INDEX_SIZE);
else if (pdshift > shift)
pgtable_cache_add(pdshift - shift);
#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx)
else if (!hugepte_cache) {
/*
* Create a kmem cache for hugeptes. The bottom bits in
* the pte have size information encoded in them, so
* align them to allow this
*/
hugepte_cache = kmem_cache_create("hugepte-cache",
sizeof(pte_t),
HUGEPD_SHIFT_MASK + 1,
0, NULL);
if (hugepte_cache == NULL)
panic("%s: Unable to create kmem cache "
"for hugeptes\n", __func__);
}
else
pgtable_cache_add(PTE_T_ORDER);
#endif
}

View File

@@ -25,22 +25,40 @@
#include <asm/pgalloc.h>
#include <asm/pgtable.h>
static void pgd_ctor(void *addr)
{
memset(addr, 0, PGD_TABLE_SIZE);
#define CTOR(shift) static void ctor_##shift(void *addr) \
{ \
memset(addr, 0, sizeof(void *) << (shift)); \
}
static void pud_ctor(void *addr)
CTOR(0); CTOR(1); CTOR(2); CTOR(3); CTOR(4); CTOR(5); CTOR(6); CTOR(7);
CTOR(8); CTOR(9); CTOR(10); CTOR(11); CTOR(12); CTOR(13); CTOR(14); CTOR(15);
static inline void (*ctor(int shift))(void *)
{
memset(addr, 0, PUD_TABLE_SIZE);
BUILD_BUG_ON(MAX_PGTABLE_INDEX_SIZE != 15);
switch (shift) {
case 0: return ctor_0;
case 1: return ctor_1;
case 2: return ctor_2;
case 3: return ctor_3;
case 4: return ctor_4;
case 5: return ctor_5;
case 6: return ctor_6;
case 7: return ctor_7;
case 8: return ctor_8;
case 9: return ctor_9;
case 10: return ctor_10;
case 11: return ctor_11;
case 12: return ctor_12;
case 13: return ctor_13;
case 14: return ctor_14;
case 15: return ctor_15;
}
return NULL;
}
static void pmd_ctor(void *addr)
{
memset(addr, 0, PMD_TABLE_SIZE);
}
struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE];
struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE + 1];
EXPORT_SYMBOL_GPL(pgtable_cache); /* used by kvm_hv module */
/*
@@ -50,7 +68,7 @@ EXPORT_SYMBOL_GPL(pgtable_cache); /* used by kvm_hv module */
* everything else. Caches created by this function are used for all
* the higher level pagetables, and for hugepage pagetables.
*/
void pgtable_cache_add(unsigned shift, void (*ctor)(void *))
void pgtable_cache_add(unsigned int shift)
{
char *name;
unsigned long table_size = sizeof(void *) << shift;
@@ -71,19 +89,19 @@ void pgtable_cache_add(unsigned shift, void (*ctor)(void *))
* moment, gcc doesn't seem to recognize is_power_of_2 as a
* constant expression, so so much for that. */
BUG_ON(!is_power_of_2(minalign));
BUG_ON((shift < 1) || (shift > MAX_PGTABLE_INDEX_SIZE));
BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
if (PGT_CACHE(shift))
return; /* Already have a cache of this size */
align = max_t(unsigned long, align, minalign);
name = kasprintf(GFP_KERNEL, "pgtable-2^%d", shift);
new = kmem_cache_create(name, table_size, align, 0, ctor);
new = kmem_cache_create(name, table_size, align, 0, ctor(shift));
if (!new)
panic("Could not allocate pgtable cache for order %d", shift);
kfree(name);
pgtable_cache[shift - 1] = new;
pgtable_cache[shift] = new;
pr_debug("Allocated pgtable cache for order %d\n", shift);
}
@@ -91,15 +109,15 @@ EXPORT_SYMBOL_GPL(pgtable_cache_add); /* used by kvm_hv module */
void pgtable_cache_init(void)
{
pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor);
pgtable_cache_add(PGD_INDEX_SIZE);
if (PMD_CACHE_INDEX && !PGT_CACHE(PMD_CACHE_INDEX))
pgtable_cache_add(PMD_CACHE_INDEX, pmd_ctor);
if (PMD_CACHE_INDEX)
pgtable_cache_add(PMD_CACHE_INDEX);
/*
* In all current configs, when the PUD index exists it's the
* same size as either the pgd or pmd index except with THP enabled
* on book3s 64
*/
if (PUD_CACHE_INDEX && !PGT_CACHE(PUD_CACHE_INDEX))
pgtable_cache_add(PUD_CACHE_INDEX, pud_ctor);
if (PUD_CACHE_INDEX)
pgtable_cache_add(PUD_CACHE_INDEX);
}

View File

@@ -246,35 +246,19 @@ static int __init mark_nonram_nosave(void)
}
#endif
static bool zone_limits_final;
/*
* The memory zones past TOP_ZONE are managed by generic mm code.
* These should be set to zero since that's what every other
* architecture does.
* Zones usage:
*
* We setup ZONE_DMA to be 31-bits on all platforms and ZONE_NORMAL to be
* everything else. GFP_DMA32 page allocations automatically fall back to
* ZONE_DMA.
*
* By using 31-bit unconditionally, we can exploit ARCH_ZONE_DMA_BITS to
* inform the generic DMA mapping code. 32-bit only devices (if not handled
* by an IOMMU anyway) will take a first dip into ZONE_NORMAL and get
* otherwise served by ZONE_DMA.
*/
static unsigned long max_zone_pfns[MAX_NR_ZONES] = {
[0 ... TOP_ZONE ] = ~0UL,
[TOP_ZONE + 1 ... MAX_NR_ZONES - 1] = 0
};
/*
* Restrict the specified zone and all more restrictive zones
* to be below the specified pfn. May not be called after
* paging_init().
*/
void __init limit_zone_pfn(enum zone_type zone, unsigned long pfn_limit)
{
int i;
if (WARN_ON(zone_limits_final))
return;
for (i = zone; i >= 0; i--) {
if (max_zone_pfns[i] > pfn_limit)
max_zone_pfns[i] = pfn_limit;
}
}
static unsigned long max_zone_pfns[MAX_NR_ZONES];
/*
* Find the least restrictive zone that is entirely below the
@@ -324,11 +308,14 @@ void __init paging_init(void)
printk(KERN_DEBUG "Memory hole size: %ldMB\n",
(long int)((top_of_ram - total_ram) >> 20));
#ifdef CONFIG_HIGHMEM
limit_zone_pfn(ZONE_NORMAL, lowmem_end_addr >> PAGE_SHIFT);
#ifdef CONFIG_ZONE_DMA
max_zone_pfns[ZONE_DMA] = min(max_low_pfn, 0x7fffffffUL >> PAGE_SHIFT);
#endif
limit_zone_pfn(TOP_ZONE, top_of_ram >> PAGE_SHIFT);
zone_limits_final = true;
max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
#ifdef CONFIG_HIGHMEM
max_zone_pfns[ZONE_HIGHMEM] = max_pfn;
#endif
free_area_init_nodes(max_zone_pfns);
mark_nonram_nosave();
@@ -503,7 +490,7 @@ EXPORT_SYMBOL(flush_icache_user_range);
void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
pte_t *ptep)
{
#ifdef CONFIG_PPC_STD_MMU
#ifdef CONFIG_PPC_BOOK3S
/*
* We don't need to worry about _PAGE_PRESENT here because we are
* called with either mm->page_table_lock held or ptl lock held
@@ -541,7 +528,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
}
hash_preload(vma->vm_mm, address, is_exec, trap);
#endif /* CONFIG_PPC_STD_MMU */
#endif /* CONFIG_PPC_BOOK3S */
#if (defined(CONFIG_PPC_BOOK3E_64) || defined(CONFIG_PPC_FSL_BOOK3E)) \
&& defined(CONFIG_HUGETLB_PAGE)
if (is_vm_hugetlb_page(vma))

View File

@@ -15,6 +15,7 @@
#include <linux/sched/mm.h>
#include <asm/mmu_context.h>
#include <asm/pgalloc.h>
#if defined(CONFIG_PPC32)
static inline void switch_mm_pgdir(struct task_struct *tsk,
@@ -97,3 +98,12 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
switch_mmu_context(prev, next, tsk);
}
#ifdef CONFIG_PPC32
void arch_exit_mmap(struct mm_struct *mm)
{
void *frag = pte_frag_get(&mm->context);
if (frag)
pte_frag_destroy(frag);
}
#endif

View File

@@ -164,21 +164,6 @@ static void destroy_contexts(mm_context_t *ctx)
}
}
static void pte_frag_destroy(void *pte_frag)
{
int count;
struct page *page;
page = virt_to_page(pte_frag);
/* drop all the pending references */
count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT;
/* We allow PTE_FRAG_NR fragments from a PTE page */
if (atomic_sub_and_test(PTE_FRAG_NR - count, &page->pt_frag_refcount)) {
pgtable_page_dtor(page);
__free_page(page);
}
}
static void pmd_frag_destroy(void *pmd_frag)
{
int count;

View File

@@ -36,6 +36,8 @@ struct mm_iommu_table_group_mem_t {
u64 ua; /* userspace address */
u64 entries; /* number of entries in hpas[] */
u64 *hpas; /* vmalloc'ed */
#define MM_IOMMU_TABLE_INVALID_HPA ((uint64_t)-1)
u64 dev_hpa; /* Device memory base address */
};
static long mm_iommu_adjust_locked_vm(struct mm_struct *mm,
@@ -126,7 +128,8 @@ static int mm_iommu_move_page_from_cma(struct page *page)
return 0;
}
long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
unsigned long entries, unsigned long dev_hpa,
struct mm_iommu_table_group_mem_t **pmem)
{
struct mm_iommu_table_group_mem_t *mem;
@@ -140,12 +143,6 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list,
next) {
if ((mem->ua == ua) && (mem->entries == entries)) {
++mem->used;
*pmem = mem;
goto unlock_exit;
}
/* Overlap? */
if ((mem->ua < (ua + (entries << PAGE_SHIFT))) &&
(ua < (mem->ua +
@@ -156,11 +153,13 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
}
ret = mm_iommu_adjust_locked_vm(mm, entries, true);
if (ret)
goto unlock_exit;
if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) {
ret = mm_iommu_adjust_locked_vm(mm, entries, true);
if (ret)
goto unlock_exit;
locked_entries = entries;
locked_entries = entries;
}
mem = kzalloc(sizeof(*mem), GFP_KERNEL);
if (!mem) {
@@ -168,6 +167,13 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
goto unlock_exit;
}
if (dev_hpa != MM_IOMMU_TABLE_INVALID_HPA) {
mem->pageshift = __ffs(dev_hpa | (entries << PAGE_SHIFT));
mem->dev_hpa = dev_hpa;
goto good_exit;
}
mem->dev_hpa = MM_IOMMU_TABLE_INVALID_HPA;
/*
* For a starting point for a maximum page size calculation
* we use @ua and @entries natural alignment to allow IOMMU pages
@@ -236,6 +242,7 @@ populate:
mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT;
}
good_exit:
atomic64_set(&mem->mapped, 1);
mem->used = 1;
mem->ua = ua;
@@ -252,13 +259,31 @@ unlock_exit:
return ret;
}
EXPORT_SYMBOL_GPL(mm_iommu_get);
long mm_iommu_new(struct mm_struct *mm, unsigned long ua, unsigned long entries,
struct mm_iommu_table_group_mem_t **pmem)
{
return mm_iommu_do_alloc(mm, ua, entries, MM_IOMMU_TABLE_INVALID_HPA,
pmem);
}
EXPORT_SYMBOL_GPL(mm_iommu_new);
long mm_iommu_newdev(struct mm_struct *mm, unsigned long ua,
unsigned long entries, unsigned long dev_hpa,
struct mm_iommu_table_group_mem_t **pmem)
{
return mm_iommu_do_alloc(mm, ua, entries, dev_hpa, pmem);
}
EXPORT_SYMBOL_GPL(mm_iommu_newdev);
static void mm_iommu_unpin(struct mm_iommu_table_group_mem_t *mem)
{
long i;
struct page *page = NULL;
if (!mem->hpas)
return;
for (i = 0; i < mem->entries; ++i) {
if (!mem->hpas[i])
continue;
@@ -300,6 +325,7 @@ static void mm_iommu_release(struct mm_iommu_table_group_mem_t *mem)
long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem)
{
long ret = 0;
unsigned long entries, dev_hpa;
mutex_lock(&mem_list_mutex);
@@ -321,9 +347,12 @@ long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem)
}
/* @mapped became 0 so now mappings are disabled, release the region */
entries = mem->entries;
dev_hpa = mem->dev_hpa;
mm_iommu_release(mem);
mm_iommu_adjust_locked_vm(mm, mem->entries, false);
if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA)
mm_iommu_adjust_locked_vm(mm, entries, false);
unlock_exit:
mutex_unlock(&mem_list_mutex);
@@ -368,27 +397,32 @@ struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(struct mm_struct *mm,
return ret;
}
struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm,
struct mm_iommu_table_group_mem_t *mm_iommu_get(struct mm_struct *mm,
unsigned long ua, unsigned long entries)
{
struct mm_iommu_table_group_mem_t *mem, *ret = NULL;
mutex_lock(&mem_list_mutex);
list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) {
if ((mem->ua == ua) && (mem->entries == entries)) {
ret = mem;
++mem->used;
break;
}
}
mutex_unlock(&mem_list_mutex);
return ret;
}
EXPORT_SYMBOL_GPL(mm_iommu_find);
EXPORT_SYMBOL_GPL(mm_iommu_get);
long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
unsigned long ua, unsigned int pageshift, unsigned long *hpa)
{
const long entry = (ua - mem->ua) >> PAGE_SHIFT;
u64 *va = &mem->hpas[entry];
u64 *va;
if (entry >= mem->entries)
return -EFAULT;
@@ -396,6 +430,12 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
if (pageshift > mem->pageshift)
return -EFAULT;
if (!mem->hpas) {
*hpa = mem->dev_hpa + (ua - mem->ua);
return 0;
}
va = &mem->hpas[entry];
*hpa = (*va & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK);
return 0;
@@ -406,7 +446,6 @@ long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
unsigned long ua, unsigned int pageshift, unsigned long *hpa)
{
const long entry = (ua - mem->ua) >> PAGE_SHIFT;
void *va = &mem->hpas[entry];
unsigned long *pa;
if (entry >= mem->entries)
@@ -415,7 +454,12 @@ long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
if (pageshift > mem->pageshift)
return -EFAULT;
pa = (void *) vmalloc_to_phys(va);
if (!mem->hpas) {
*hpa = mem->dev_hpa + (ua - mem->ua);
return 0;
}
pa = (void *) vmalloc_to_phys(&mem->hpas[entry]);
if (!pa)
return -EFAULT;
@@ -435,6 +479,9 @@ extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua)
if (!mem)
return;
if (mem->dev_hpa != MM_IOMMU_TABLE_INVALID_HPA)
return;
entry = (ua - mem->ua) >> PAGE_SHIFT;
va = &mem->hpas[entry];
@@ -445,6 +492,33 @@ extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua)
*pa |= MM_IOMMU_TABLE_GROUP_PAGE_DIRTY;
}
bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa,
unsigned int pageshift, unsigned long *size)
{
struct mm_iommu_table_group_mem_t *mem;
unsigned long end;
list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) {
if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA)
continue;
end = mem->dev_hpa + (mem->entries << PAGE_SHIFT);
if ((mem->dev_hpa <= hpa) && (hpa < end)) {
/*
* Since the IOMMU page size might be bigger than
* PAGE_SIZE, the amount of preregistered memory
* starting from @hpa might be smaller than 1<<pageshift
* and the caller needs to distinguish this situation.
*/
*size = min(1UL << pageshift, end - hpa);
return true;
}
}
return false;
}
EXPORT_SYMBOL_GPL(mm_iommu_is_devmem);
long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem)
{
if (atomic64_inc_not_zero(&mem->mapped))

View File

@@ -372,7 +372,6 @@ int init_new_context(struct task_struct *t, struct mm_struct *mm)
{
pr_hard("initing context for mm @%p\n", mm);
#ifdef CONFIG_PPC_MM_SLICES
/*
* We have MMU_NO_CONTEXT set to be ~0. Hence check
* explicitly against context.id == 0. This ensures that we properly
@@ -382,9 +381,9 @@ int init_new_context(struct task_struct *t, struct mm_struct *mm)
*/
if (mm->context.id == 0)
slice_init_new_context_exec(mm);
#endif
mm->context.id = MMU_NO_CONTEXT;
mm->context.active = 0;
pte_frag_set(&mm->context, NULL);
return 0;
}
@@ -487,4 +486,3 @@ void __init mmu_context_init(void)
next_context = FIRST_CONTEXT;
nr_free_contexts = LAST_CONTEXT - FIRST_CONTEXT + 1;
}

View File

@@ -155,7 +155,7 @@ struct tlbcam {
};
#endif
#if defined(CONFIG_6xx) || defined(CONFIG_FSL_BOOKE) || defined(CONFIG_PPC_8xx)
#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_FSL_BOOKE) || defined(CONFIG_PPC_8xx)
/* 6xx have BATS */
/* FSL_BOOKE have TLBCAM */
/* 8xx have LTLB */

View File

@@ -1475,7 +1475,7 @@ static int dt_update_callback(struct notifier_block *nb,
switch (action) {
case OF_RECONFIG_UPDATE_PROPERTY:
if (!of_prop_cmp(update->dn->type, "cpu") &&
if (of_node_is_type(update->dn, "cpu") &&
!of_prop_cmp(update->prop->name, "ibm,associativity")) {
u32 core_id;
of_property_read_u32(update->dn, "reg", &core_id);

View File

@@ -244,6 +244,9 @@ static pmd_t *get_pmd_from_cache(struct mm_struct *mm)
{
void *pmd_frag, *ret;
if (PMD_FRAG_NR == 1)
return NULL;
spin_lock(&mm->page_table_lock);
ret = mm->context.pmd_frag;
if (ret) {
@@ -322,91 +325,6 @@ void pmd_fragment_free(unsigned long *pmd)
}
}
static pte_t *get_pte_from_cache(struct mm_struct *mm)
{
void *pte_frag, *ret;
spin_lock(&mm->page_table_lock);
ret = mm->context.pte_frag;
if (ret) {
pte_frag = ret + PTE_FRAG_SIZE;
/*
* If we have taken up all the fragments mark PTE page NULL
*/
if (((unsigned long)pte_frag & ~PAGE_MASK) == 0)
pte_frag = NULL;
mm->context.pte_frag = pte_frag;
}
spin_unlock(&mm->page_table_lock);
return (pte_t *)ret;
}
static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
{
void *ret = NULL;
struct page *page;
if (!kernel) {
page = alloc_page(PGALLOC_GFP | __GFP_ACCOUNT);
if (!page)
return NULL;
if (!pgtable_page_ctor(page)) {
__free_page(page);
return NULL;
}
} else {
page = alloc_page(PGALLOC_GFP);
if (!page)
return NULL;
}
atomic_set(&page->pt_frag_refcount, 1);
ret = page_address(page);
/*
* if we support only one fragment just return the
* allocated page.
*/
if (PTE_FRAG_NR == 1)
return ret;
spin_lock(&mm->page_table_lock);
/*
* If we find pgtable_page set, we return
* the allocated page with single fragement
* count.
*/
if (likely(!mm->context.pte_frag)) {
atomic_set(&page->pt_frag_refcount, PTE_FRAG_NR);
mm->context.pte_frag = ret + PTE_FRAG_SIZE;
}
spin_unlock(&mm->page_table_lock);
return (pte_t *)ret;
}
pte_t *pte_fragment_alloc(struct mm_struct *mm, unsigned long vmaddr, int kernel)
{
pte_t *pte;
pte = get_pte_from_cache(mm);
if (pte)
return pte;
return __alloc_for_ptecache(mm, kernel);
}
void pte_fragment_free(unsigned long *table, int kernel)
{
struct page *page = virt_to_page(table);
BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0);
if (atomic_dec_and_test(&page->pt_frag_refcount)) {
if (!kernel)
pgtable_page_dtor(page);
__free_page(page);
}
}
static inline void pgtable_free(void *table, int index)
{
switch (index) {

View File

@@ -0,0 +1,119 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Handling Page Tables through page fragments
*
*/
#include <linux/kernel.h>
#include <linux/gfp.h>
#include <linux/mm.h>
#include <linux/percpu.h>
#include <linux/hardirq.h>
#include <linux/hugetlb.h>
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm/tlb.h>
void pte_frag_destroy(void *pte_frag)
{
int count;
struct page *page;
page = virt_to_page(pte_frag);
/* drop all the pending references */
count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT;
/* We allow PTE_FRAG_NR fragments from a PTE page */
if (atomic_sub_and_test(PTE_FRAG_NR - count, &page->pt_frag_refcount)) {
pgtable_page_dtor(page);
__free_page(page);
}
}
static pte_t *get_pte_from_cache(struct mm_struct *mm)
{
void *pte_frag, *ret;
if (PTE_FRAG_NR == 1)
return NULL;
spin_lock(&mm->page_table_lock);
ret = pte_frag_get(&mm->context);
if (ret) {
pte_frag = ret + PTE_FRAG_SIZE;
/*
* If we have taken up all the fragments mark PTE page NULL
*/
if (((unsigned long)pte_frag & ~PAGE_MASK) == 0)
pte_frag = NULL;
pte_frag_set(&mm->context, pte_frag);
}
spin_unlock(&mm->page_table_lock);
return (pte_t *)ret;
}
static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
{
void *ret = NULL;
struct page *page;
if (!kernel) {
page = alloc_page(PGALLOC_GFP | __GFP_ACCOUNT);
if (!page)
return NULL;
if (!pgtable_page_ctor(page)) {
__free_page(page);
return NULL;
}
} else {
page = alloc_page(PGALLOC_GFP);
if (!page)
return NULL;
}
atomic_set(&page->pt_frag_refcount, 1);
ret = page_address(page);
/*
* if we support only one fragment just return the
* allocated page.
*/
if (PTE_FRAG_NR == 1)
return ret;
spin_lock(&mm->page_table_lock);
/*
* If we find pgtable_page set, we return
* the allocated page with single fragement
* count.
*/
if (likely(!pte_frag_get(&mm->context))) {
atomic_set(&page->pt_frag_refcount, PTE_FRAG_NR);
pte_frag_set(&mm->context, ret + PTE_FRAG_SIZE);
}
spin_unlock(&mm->page_table_lock);
return (pte_t *)ret;
}
pte_t *pte_fragment_alloc(struct mm_struct *mm, unsigned long vmaddr, int kernel)
{
pte_t *pte;
pte = get_pte_from_cache(mm);
if (pte)
return pte;
return __alloc_for_ptecache(mm, kernel);
}
void pte_fragment_free(unsigned long *table, int kernel)
{
struct page *page = virt_to_page(table);
BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0);
if (atomic_dec_and_test(&page->pt_frag_refcount)) {
if (!kernel)
pgtable_page_dtor(page);
__free_page(page);
}
}

View File

@@ -74,7 +74,7 @@ static struct page *maybe_pte_to_page(pte_t pte)
* support falls into the same category.
*/
static pte_t set_pte_filter(pte_t pte)
static pte_t set_pte_filter_hash(pte_t pte)
{
if (radix_enabled())
return pte;
@@ -93,14 +93,12 @@ static pte_t set_pte_filter(pte_t pte)
return pte;
}
static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma,
int dirty)
{
return pte;
}
#else /* CONFIG_PPC_BOOK3S */
static pte_t set_pte_filter_hash(pte_t pte) { return pte; }
#endif /* CONFIG_PPC_BOOK3S */
/* Embedded type MMU with HW exec support. This is a bit more complicated
* as we don't have two bits to spare for _PAGE_EXEC and _PAGE_HWEXEC so
* instead we "filter out" the exec permission for non clean pages.
@@ -109,6 +107,9 @@ static pte_t set_pte_filter(pte_t pte)
{
struct page *pg;
if (mmu_has_feature(MMU_FTR_HPTE_TABLE))
return set_pte_filter_hash(pte);
/* No exec permission in the first place, move on */
if (!pte_exec(pte) || !pte_looks_normal(pte))
return pte;
@@ -138,6 +139,9 @@ static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma,
{
struct page *pg;
if (mmu_has_feature(MMU_FTR_HPTE_TABLE))
return pte;
/* So here, we only care about exec faults, as we use them
* to recover lost _PAGE_EXEC and perform I$/D$ coherency
* if necessary. Also if _PAGE_EXEC is already set, same deal,
@@ -172,8 +176,6 @@ static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma,
return pte_mkexec(pte);
}
#endif /* CONFIG_PPC_BOOK3S */
/*
* set_pte stores a linux PTE into the linux page table.
*/
@@ -221,9 +223,9 @@ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
}
#ifdef CONFIG_HUGETLB_PAGE
extern int huge_ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep,
pte_t pte, int dirty)
int huge_ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep,
pte_t pte, int dirty)
{
#ifdef HUGETLB_NEED_PRELOAD
/*

View File

@@ -45,32 +45,15 @@ extern char etext[], _stext[], _sinittext[], _einittext[];
__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
{
pte_t *pte;
if (!slab_is_available())
return memblock_alloc(PTE_FRAG_SIZE, PTE_FRAG_SIZE);
if (slab_is_available()) {
pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
} else {
pte = __va(memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE));
if (pte)
clear_page(pte);
}
return pte;
return (pte_t *)pte_fragment_alloc(mm, address, 1);
}
pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
struct page *ptepage;
gfp_t flags = GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT;
ptepage = alloc_pages(flags, 0);
if (!ptepage)
return NULL;
if (!pgtable_page_ctor(ptepage)) {
__free_page(ptepage);
return NULL;
}
return ptepage;
return (pgtable_t)pte_fragment_alloc(mm, address, 0);
}
void __iomem *
@@ -160,7 +143,7 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, pgprot_t prot, void *call
* Don't allow anybody to remap normal RAM that we're using.
* mem_init() sets high_memory so only do the check after that.
*/
if (slab_is_available() && (p < virt_to_phys(high_memory)) &&
if (slab_is_available() && p <= virt_to_phys(high_memory - 1) &&
page_is_ram(__phys_to_pfn(p))) {
printk("__ioremap(): phys addr 0x%llx is RAM lr %ps\n",
(unsigned long long)p, __builtin_return_address(0));
@@ -260,7 +243,7 @@ static void __init __mapin_ram_chunk(unsigned long offset, unsigned long top)
ktext = ((char *)v >= _stext && (char *)v < etext) ||
((char *)v >= _sinittext && (char *)v < _einittext);
map_kernel_page(v, p, ktext ? PAGE_KERNEL_TEXT : PAGE_KERNEL);
#ifdef CONFIG_PPC_STD_MMU_32
#ifdef CONFIG_PPC_BOOK3S_32
if (ktext)
hash_preload(&init_mm, v, false, 0x300);
#endif

View File

@@ -6,20 +6,21 @@
*/
#include <asm/mman.h>
#include <asm/mmu_context.h>
#include <asm/setup.h>
#include <linux/pkeys.h>
#include <linux/of_device.h>
DEFINE_STATIC_KEY_TRUE(pkey_disabled);
bool pkey_execute_disable_supported;
int pkeys_total; /* Total pkeys as per device tree */
bool pkeys_devtree_defined; /* pkey property exported by device tree */
u32 initial_allocation_mask; /* Bits set for the initially allocated keys */
u32 reserved_allocation_mask; /* Bits set for reserved keys */
u64 pkey_amr_mask; /* Bits in AMR not to be touched */
u64 pkey_iamr_mask; /* Bits in AMR not to be touched */
u64 pkey_uamor_mask; /* Bits in UMOR not to be touched */
int execute_only_key = 2;
static bool pkey_execute_disable_supported;
static bool pkeys_devtree_defined; /* property exported by device tree */
static u64 pkey_amr_mask; /* Bits in AMR not to be touched */
static u64 pkey_iamr_mask; /* Bits in AMR not to be touched */
static u64 pkey_uamor_mask; /* Bits in UMOR not to be touched */
static int execute_only_key = 2;
#define AMR_BITS_PER_PKEY 2
#define AMR_RD_BIT 0x1UL
@@ -57,7 +58,7 @@ static inline bool pkey_mmu_enabled(void)
return cpu_has_feature(CPU_FTR_PKEY);
}
int pkey_initialize(void)
static int pkey_initialize(void)
{
int os_reserved, i;
@@ -414,3 +415,13 @@ bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write,
return pkey_access_permitted(vma_pkey(vma), write, execute);
}
void arch_dup_pkeys(struct mm_struct *oldmm, struct mm_struct *mm)
{
if (static_branch_likely(&pkey_disabled))
return;
/* Duplicate the oldmm pkey state in mm: */
mm_pkey_allocation_map(mm) = mm_pkey_allocation_map(oldmm);
mm->context.execute_only_pkey = oldmm->context.execute_only_pkey;
}

View File

@@ -31,6 +31,7 @@
#include <asm/prom.h>
#include <asm/mmu.h>
#include <asm/machdep.h>
#include <asm/code-patching.h>
#include "mmu_decl.h"
@@ -52,7 +53,7 @@ struct batrange { /* stores address ranges mapped by BATs */
phys_addr_t v_block_mapped(unsigned long va)
{
int b;
for (b = 0; b < 4; ++b)
for (b = 0; b < ARRAY_SIZE(bat_addrs); ++b)
if (va >= bat_addrs[b].start && va < bat_addrs[b].limit)
return bat_addrs[b].phys + (va - bat_addrs[b].start);
return 0;
@@ -64,7 +65,7 @@ phys_addr_t v_block_mapped(unsigned long va)
unsigned long p_block_mapped(phys_addr_t pa)
{
int b;
for (b = 0; b < 4; ++b)
for (b = 0; b < ARRAY_SIZE(bat_addrs); ++b)
if (pa >= bat_addrs[b].phys
&& pa < (bat_addrs[b].limit-bat_addrs[b].start)
+bat_addrs[b].phys)
@@ -182,22 +183,8 @@ void __init MMU_init_hw(void)
unsigned int hmask, mb, mb2;
unsigned int n_hpteg, lg_n_hpteg;
extern unsigned int hash_page_patch_A[];
extern unsigned int hash_page_patch_B[], hash_page_patch_C[];
extern unsigned int hash_page[];
extern unsigned int flush_hash_patch_A[], flush_hash_patch_B[];
if (!mmu_has_feature(MMU_FTR_HPTE_TABLE)) {
/*
* Put a blr (procedure return) instruction at the
* start of hash_page, since we can still get DSI
* exceptions on a 603.
*/
hash_page[0] = 0x4e800020;
flush_icache_range((unsigned long) &hash_page[0],
(unsigned long) &hash_page[1]);
if (!mmu_has_feature(MMU_FTR_HPTE_TABLE))
return;
}
if ( ppc_md.progress ) ppc_md.progress("hash:enter", 0x105);
@@ -244,31 +231,19 @@ void __init MMU_init_hw(void)
if (lg_n_hpteg > 16)
mb2 = 16 - LG_HPTEG_SIZE;
hash_page_patch_A[0] = (hash_page_patch_A[0] & ~0xffff)
| ((unsigned int)(Hash) >> 16);
hash_page_patch_A[1] = (hash_page_patch_A[1] & ~0x7c0) | (mb << 6);
hash_page_patch_A[2] = (hash_page_patch_A[2] & ~0x7c0) | (mb2 << 6);
hash_page_patch_B[0] = (hash_page_patch_B[0] & ~0xffff) | hmask;
hash_page_patch_C[0] = (hash_page_patch_C[0] & ~0xffff) | hmask;
/*
* Ensure that the locations we've patched have been written
* out from the data cache and invalidated in the instruction
* cache, on those machines with split caches.
*/
flush_icache_range((unsigned long) &hash_page_patch_A[0],
(unsigned long) &hash_page_patch_C[1]);
modify_instruction_site(&patch__hash_page_A0, 0xffff, (unsigned int)Hash >> 16);
modify_instruction_site(&patch__hash_page_A1, 0x7c0, mb << 6);
modify_instruction_site(&patch__hash_page_A2, 0x7c0, mb2 << 6);
modify_instruction_site(&patch__hash_page_B, 0xffff, hmask);
modify_instruction_site(&patch__hash_page_C, 0xffff, hmask);
/*
* Patch up the instructions in hashtable.S:flush_hash_page
*/
flush_hash_patch_A[0] = (flush_hash_patch_A[0] & ~0xffff)
| ((unsigned int)(Hash) >> 16);
flush_hash_patch_A[1] = (flush_hash_patch_A[1] & ~0x7c0) | (mb << 6);
flush_hash_patch_A[2] = (flush_hash_patch_A[2] & ~0x7c0) | (mb2 << 6);
flush_hash_patch_B[0] = (flush_hash_patch_B[0] & ~0xffff) | hmask;
flush_icache_range((unsigned long) &flush_hash_patch_A[0],
(unsigned long) &flush_hash_patch_B[1]);
modify_instruction_site(&patch__flush_hash_A0, 0xffff, (unsigned int)Hash >> 16);
modify_instruction_site(&patch__flush_hash_A1, 0x7c0, mb << 6);
modify_instruction_site(&patch__flush_hash_A2, 0x7c0, mb2 << 6);
modify_instruction_site(&patch__flush_hash_B, 0xffff, hmask);
if ( ppc_md.progress ) ppc_md.progress("hash:done", 0x205);
}

View File

@@ -70,6 +70,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
std r15,EX_TLB_R15(r12)
std r10,EX_TLB_CR(r12)
#ifdef CONFIG_PPC_FSL_BOOK3E
START_BTB_FLUSH_SECTION
mfspr r11, SPRN_SRR1
andi. r10,r11,MSR_PR
beq 1f
BTB_FLUSH(r10)
1:
END_BTB_FLUSH_SECTION
std r7,EX_TLB_R7(r12)
#endif
TLB_MISS_PROLOG_STATS