Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 mm updates from Ingo Molnar:
 "The main changes in this cycle were:

   - Update and clean up x86 fault handling, by Andy Lutomirski.

   - Drop usage of __flush_tlb_all() in kernel_physical_mapping_init()
     and related fallout, by Dan Williams.

   - CPA cleanups and reorganization by Peter Zijlstra: simplify the
     flow and remove a few warts.

   - Other misc cleanups"

* 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (29 commits)
  x86/mm/dump_pagetables: Use DEFINE_SHOW_ATTRIBUTE()
  x86/mm/cpa: Rename @addrinarray to @numpages
  x86/mm/cpa: Better use CLFLUSHOPT
  x86/mm/cpa: Fold cpa_flush_range() and cpa_flush_array() into a single cpa_flush() function
  x86/mm/cpa: Make cpa_data::numpages invariant
  x86/mm/cpa: Optimize cpa_flush_array() TLB invalidation
  x86/mm/cpa: Simplify the code after making cpa->vaddr invariant
  x86/mm/cpa: Make cpa_data::vaddr invariant
  x86/mm/cpa: Add __cpa_addr() helper
  x86/mm/cpa: Add ARRAY and PAGES_ARRAY selftests
  x86/mm: Drop usage of __flush_tlb_all() in kernel_physical_mapping_init()
  x86/mm: Validate kernel_physical_mapping_init() PTE population
  generic/pgtable: Introduce set_pte_safe()
  generic/pgtable: Introduce {p4d,pgd}_same()
  generic/pgtable: Make {pmd, pud}_same() unconditionally available
  x86/fault: Clean up the page fault oops decoder a bit
  x86/fault: Decode page fault OOPSes better
  x86/vsyscall/64: Use X86_PF constants in the simulated #PF error code
  x86/oops: Show the correct CS value in show_regs()
  x86/fault: Don't try to recover from an implicit supervisor access
  ...
This commit is contained in:
Linus Torvalds
2018-12-26 18:08:18 -08:00
16 changed files with 405 additions and 355 deletions

View File

@@ -26,6 +26,8 @@
#include <asm/pat.h>
#include <asm/set_memory.h>
#include "mm_internal.h"
/*
* The current flushing context - we pass it instead of 5 arguments:
*/
@@ -35,11 +37,11 @@ struct cpa_data {
pgprot_t mask_set;
pgprot_t mask_clr;
unsigned long numpages;
int flags;
unsigned long curpage;
unsigned long pfn;
unsigned force_split : 1,
unsigned int flags;
unsigned int force_split : 1,
force_static_prot : 1;
int curpage;
struct page **pages;
};
@@ -228,19 +230,28 @@ static bool __cpa_pfn_in_highmap(unsigned long pfn)
#endif
static unsigned long __cpa_addr(struct cpa_data *cpa, unsigned long idx)
{
if (cpa->flags & CPA_PAGES_ARRAY) {
struct page *page = cpa->pages[idx];
if (unlikely(PageHighMem(page)))
return 0;
return (unsigned long)page_address(page);
}
if (cpa->flags & CPA_ARRAY)
return cpa->vaddr[idx];
return *cpa->vaddr + idx * PAGE_SIZE;
}
/*
* Flushing functions
*/
/**
* clflush_cache_range - flush a cache range with clflush
* @vaddr: virtual start address
* @size: number of bytes to flush
*
* clflushopt is an unordered instruction which needs fencing with mfence or
* sfence to avoid ordering issues.
*/
void clflush_cache_range(void *vaddr, unsigned int size)
static void clflush_cache_range_opt(void *vaddr, unsigned int size)
{
const unsigned long clflush_size = boot_cpu_data.x86_clflush_size;
void *p = (void *)((unsigned long)vaddr & ~(clflush_size - 1));
@@ -249,11 +260,22 @@ void clflush_cache_range(void *vaddr, unsigned int size)
if (p >= vend)
return;
mb();
for (; p < vend; p += clflush_size)
clflushopt(p);
}
/**
* clflush_cache_range - flush a cache range with clflush
* @vaddr: virtual start address
* @size: number of bytes to flush
*
* CLFLUSHOPT is an unordered instruction which needs fencing with MFENCE or
* SFENCE to avoid ordering issues.
*/
void clflush_cache_range(void *vaddr, unsigned int size)
{
mb();
clflush_cache_range_opt(vaddr, size);
mb();
}
EXPORT_SYMBOL_GPL(clflush_cache_range);
@@ -285,87 +307,49 @@ static void cpa_flush_all(unsigned long cache)
on_each_cpu(__cpa_flush_all, (void *) cache, 1);
}
static bool __inv_flush_all(int cache)
void __cpa_flush_tlb(void *data)
{
struct cpa_data *cpa = data;
unsigned int i;
for (i = 0; i < cpa->numpages; i++)
__flush_tlb_one_kernel(__cpa_addr(cpa, i));
}
static void cpa_flush(struct cpa_data *data, int cache)
{
struct cpa_data *cpa = data;
unsigned int i;
BUG_ON(irqs_disabled() && !early_boot_irqs_disabled);
if (cache && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
cpa_flush_all(cache);
return true;
return;
}
return false;
}
static void cpa_flush_range(unsigned long start, int numpages, int cache)
{
unsigned int i, level;
unsigned long addr;
WARN_ON(PAGE_ALIGN(start) != start);
if (__inv_flush_all(cache))
return;
flush_tlb_kernel_range(start, start + PAGE_SIZE * numpages);
if (cpa->numpages <= tlb_single_page_flush_ceiling)
on_each_cpu(__cpa_flush_tlb, cpa, 1);
else
flush_tlb_all();
if (!cache)
return;
/*
* We only need to flush on one CPU,
* clflush is a MESI-coherent instruction that
* will cause all other CPUs to flush the same
* cachelines:
*/
for (i = 0, addr = start; i < numpages; i++, addr += PAGE_SIZE) {
mb();
for (i = 0; i < cpa->numpages; i++) {
unsigned long addr = __cpa_addr(cpa, i);
unsigned int level;
pte_t *pte = lookup_address(addr, &level);
/*
* Only flush present addresses:
*/
if (pte && (pte_val(*pte) & _PAGE_PRESENT))
clflush_cache_range((void *) addr, PAGE_SIZE);
}
}
static void cpa_flush_array(unsigned long baddr, unsigned long *start,
int numpages, int cache,
int in_flags, struct page **pages)
{
unsigned int i, level;
if (__inv_flush_all(cache))
return;
flush_tlb_all();
if (!cache)
return;
/*
* We only need to flush on one CPU,
* clflush is a MESI-coherent instruction that
* will cause all other CPUs to flush the same
* cachelines:
*/
for (i = 0; i < numpages; i++) {
unsigned long addr;
pte_t *pte;
if (in_flags & CPA_PAGES_ARRAY)
addr = (unsigned long)page_address(pages[i]);
else
addr = start[i];
pte = lookup_address(addr, &level);
/*
* Only flush present addresses:
*/
if (pte && (pte_val(*pte) & _PAGE_PRESENT))
clflush_cache_range((void *)addr, PAGE_SIZE);
clflush_cache_range_opt((void *)addr, PAGE_SIZE);
}
mb();
}
static bool overlaps(unsigned long r1_start, unsigned long r1_end,
@@ -1476,15 +1460,7 @@ static int __change_page_attr(struct cpa_data *cpa, int primary)
unsigned int level;
pte_t *kpte, old_pte;
if (cpa->flags & CPA_PAGES_ARRAY) {
struct page *page = cpa->pages[cpa->curpage];
if (unlikely(PageHighMem(page)))
return 0;
address = (unsigned long)page_address(page);
} else if (cpa->flags & CPA_ARRAY)
address = cpa->vaddr[cpa->curpage];
else
address = *cpa->vaddr;
address = __cpa_addr(cpa, cpa->curpage);
repeat:
kpte = _lookup_address_cpa(cpa, address, &level);
if (!kpte)
@@ -1565,22 +1541,14 @@ static int cpa_process_alias(struct cpa_data *cpa)
* No need to redo, when the primary call touched the direct
* mapping already:
*/
if (cpa->flags & CPA_PAGES_ARRAY) {
struct page *page = cpa->pages[cpa->curpage];
if (unlikely(PageHighMem(page)))
return 0;
vaddr = (unsigned long)page_address(page);
} else if (cpa->flags & CPA_ARRAY)
vaddr = cpa->vaddr[cpa->curpage];
else
vaddr = *cpa->vaddr;
vaddr = __cpa_addr(cpa, cpa->curpage);
if (!(within(vaddr, PAGE_OFFSET,
PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)))) {
alias_cpa = *cpa;
alias_cpa.vaddr = &laddr;
alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
alias_cpa.curpage = 0;
ret = __change_page_attr_set_clr(&alias_cpa, 0);
if (ret)
@@ -1600,6 +1568,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
alias_cpa = *cpa;
alias_cpa.vaddr = &temp_cpa_vaddr;
alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
alias_cpa.curpage = 0;
/*
* The high mapping range is imprecise, so ignore the
@@ -1615,14 +1584,15 @@ static int cpa_process_alias(struct cpa_data *cpa)
static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
{
unsigned long numpages = cpa->numpages;
int ret;
unsigned long rempages = numpages;
int ret = 0;
while (numpages) {
while (rempages) {
/*
* Store the remaining nr of pages for the large page
* preservation check.
*/
cpa->numpages = numpages;
cpa->numpages = rempages;
/* for array changes, we can't use large page */
if (cpa->flags & (CPA_ARRAY | CPA_PAGES_ARRAY))
cpa->numpages = 1;
@@ -1633,12 +1603,12 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
if (!debug_pagealloc_enabled())
spin_unlock(&cpa_lock);
if (ret)
return ret;
goto out;
if (checkalias) {
ret = cpa_process_alias(cpa);
if (ret)
return ret;
goto out;
}
/*
@@ -1646,15 +1616,15 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
* CPA operation. Either a large page has been
* preserved or a single page update happened.
*/
BUG_ON(cpa->numpages > numpages || !cpa->numpages);
numpages -= cpa->numpages;
if (cpa->flags & (CPA_PAGES_ARRAY | CPA_ARRAY))
cpa->curpage++;
else
*cpa->vaddr += cpa->numpages * PAGE_SIZE;
BUG_ON(cpa->numpages > rempages || !cpa->numpages);
rempages -= cpa->numpages;
cpa->curpage += cpa->numpages;
}
return 0;
out:
/* Restore the original numpages */
cpa->numpages = numpages;
return ret;
}
/*
@@ -1687,7 +1657,6 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
{
struct cpa_data cpa;
int ret, cache, checkalias;
unsigned long baddr = 0;
memset(&cpa, 0, sizeof(cpa));
@@ -1721,11 +1690,6 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
*/
WARN_ON_ONCE(1);
}
/*
* Save address for cache flush. *addr is modified in the call
* to __change_page_attr_set_clr() below.
*/
baddr = make_addr_canonical_again(*addr);
}
/* Must avoid aliasing mappings in the highmem code */
@@ -1773,13 +1737,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
goto out;
}
if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) {
cpa_flush_array(baddr, addr, numpages, cache,
cpa.flags, pages);
} else {
cpa_flush_range(baddr, numpages, cache);
}
cpa_flush(&cpa, cache);
out:
return ret;
}
@@ -1850,14 +1808,14 @@ out_err:
}
EXPORT_SYMBOL(set_memory_uc);
static int _set_memory_array(unsigned long *addr, int addrinarray,
static int _set_memory_array(unsigned long *addr, int numpages,
enum page_cache_mode new_type)
{
enum page_cache_mode set_type;
int i, j;
int ret;
for (i = 0; i < addrinarray; i++) {
for (i = 0; i < numpages; i++) {
ret = reserve_memtype(__pa(addr[i]), __pa(addr[i]) + PAGE_SIZE,
new_type, NULL);
if (ret)
@@ -1868,11 +1826,11 @@ static int _set_memory_array(unsigned long *addr, int addrinarray,
set_type = (new_type == _PAGE_CACHE_MODE_WC) ?
_PAGE_CACHE_MODE_UC_MINUS : new_type;
ret = change_page_attr_set(addr, addrinarray,
ret = change_page_attr_set(addr, numpages,
cachemode2pgprot(set_type), 1);
if (!ret && new_type == _PAGE_CACHE_MODE_WC)
ret = change_page_attr_set_clr(addr, addrinarray,
ret = change_page_attr_set_clr(addr, numpages,
cachemode2pgprot(
_PAGE_CACHE_MODE_WC),
__pgprot(_PAGE_CACHE_MASK),
@@ -1889,36 +1847,34 @@ out_free:
return ret;
}
int set_memory_array_uc(unsigned long *addr, int addrinarray)
int set_memory_array_uc(unsigned long *addr, int numpages)
{
return _set_memory_array(addr, addrinarray, _PAGE_CACHE_MODE_UC_MINUS);
return _set_memory_array(addr, numpages, _PAGE_CACHE_MODE_UC_MINUS);
}
EXPORT_SYMBOL(set_memory_array_uc);
int set_memory_array_wc(unsigned long *addr, int addrinarray)
int set_memory_array_wc(unsigned long *addr, int numpages)
{
return _set_memory_array(addr, addrinarray, _PAGE_CACHE_MODE_WC);
return _set_memory_array(addr, numpages, _PAGE_CACHE_MODE_WC);
}
EXPORT_SYMBOL(set_memory_array_wc);
int set_memory_array_wt(unsigned long *addr, int addrinarray)
int set_memory_array_wt(unsigned long *addr, int numpages)
{
return _set_memory_array(addr, addrinarray, _PAGE_CACHE_MODE_WT);
return _set_memory_array(addr, numpages, _PAGE_CACHE_MODE_WT);
}
EXPORT_SYMBOL_GPL(set_memory_array_wt);
int _set_memory_wc(unsigned long addr, int numpages)
{
int ret;
unsigned long addr_copy = addr;
ret = change_page_attr_set(&addr, numpages,
cachemode2pgprot(_PAGE_CACHE_MODE_UC_MINUS),
0);
if (!ret) {
ret = change_page_attr_set_clr(&addr_copy, numpages,
cachemode2pgprot(
_PAGE_CACHE_MODE_WC),
ret = change_page_attr_set_clr(&addr, numpages,
cachemode2pgprot(_PAGE_CACHE_MODE_WC),
__pgprot(_PAGE_CACHE_MASK),
0, 0, NULL);
}
@@ -1985,18 +1941,18 @@ int set_memory_wb(unsigned long addr, int numpages)
}
EXPORT_SYMBOL(set_memory_wb);
int set_memory_array_wb(unsigned long *addr, int addrinarray)
int set_memory_array_wb(unsigned long *addr, int numpages)
{
int i;
int ret;
/* WB cache mode is hard wired to all cache attribute bits being 0 */
ret = change_page_attr_clear(addr, addrinarray,
ret = change_page_attr_clear(addr, numpages,
__pgprot(_PAGE_CACHE_MASK), 1);
if (ret)
return ret;
for (i = 0; i < addrinarray; i++)
for (i = 0; i < numpages; i++)
free_memtype(__pa(addr[i]), __pa(addr[i]) + PAGE_SIZE);
return 0;
@@ -2066,7 +2022,6 @@ int set_memory_global(unsigned long addr, int numpages)
static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
{
struct cpa_data cpa;
unsigned long start;
int ret;
/* Nothing to do if memory encryption is not active */
@@ -2077,8 +2032,6 @@ static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
if (WARN_ONCE(addr & ~PAGE_MASK, "misaligned address: %#lx\n", addr))
addr &= PAGE_MASK;
start = addr;
memset(&cpa, 0, sizeof(cpa));
cpa.vaddr = &addr;
cpa.numpages = numpages;
@@ -2093,18 +2046,18 @@ static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
/*
* Before changing the encryption attribute, we need to flush caches.
*/
cpa_flush_range(start, numpages, 1);
cpa_flush(&cpa, 1);
ret = __change_page_attr_set_clr(&cpa, 1);
/*
* After changing the encryption attribute, we need to flush TLBs
* again in case any speculative TLB caching occurred (but no need
* to flush caches again). We could just use cpa_flush_all(), but
* in case TLB flushing gets optimized in the cpa_flush_range()
* path use the same logic as above.
* After changing the encryption attribute, we need to flush TLBs again
* in case any speculative TLB caching occurred (but no need to flush
* caches again). We could just use cpa_flush_all(), but in case TLB
* flushing gets optimized in the cpa_flush() path use the same logic
* as above.
*/
cpa_flush_range(start, numpages, 0);
cpa_flush(&cpa, 0);
return ret;
}
@@ -2129,7 +2082,7 @@ int set_pages_uc(struct page *page, int numpages)
}
EXPORT_SYMBOL(set_pages_uc);
static int _set_pages_array(struct page **pages, int addrinarray,
static int _set_pages_array(struct page **pages, int numpages,
enum page_cache_mode new_type)
{
unsigned long start;
@@ -2139,7 +2092,7 @@ static int _set_pages_array(struct page **pages, int addrinarray,
int free_idx;
int ret;
for (i = 0; i < addrinarray; i++) {
for (i = 0; i < numpages; i++) {
if (PageHighMem(pages[i]))
continue;
start = page_to_pfn(pages[i]) << PAGE_SHIFT;
@@ -2152,10 +2105,10 @@ static int _set_pages_array(struct page **pages, int addrinarray,
set_type = (new_type == _PAGE_CACHE_MODE_WC) ?
_PAGE_CACHE_MODE_UC_MINUS : new_type;
ret = cpa_set_pages_array(pages, addrinarray,
ret = cpa_set_pages_array(pages, numpages,
cachemode2pgprot(set_type));
if (!ret && new_type == _PAGE_CACHE_MODE_WC)
ret = change_page_attr_set_clr(NULL, addrinarray,
ret = change_page_attr_set_clr(NULL, numpages,
cachemode2pgprot(
_PAGE_CACHE_MODE_WC),
__pgprot(_PAGE_CACHE_MASK),
@@ -2175,21 +2128,21 @@ err_out:
return -EINVAL;
}
int set_pages_array_uc(struct page **pages, int addrinarray)
int set_pages_array_uc(struct page **pages, int numpages)
{
return _set_pages_array(pages, addrinarray, _PAGE_CACHE_MODE_UC_MINUS);
return _set_pages_array(pages, numpages, _PAGE_CACHE_MODE_UC_MINUS);
}
EXPORT_SYMBOL(set_pages_array_uc);
int set_pages_array_wc(struct page **pages, int addrinarray)
int set_pages_array_wc(struct page **pages, int numpages)
{
return _set_pages_array(pages, addrinarray, _PAGE_CACHE_MODE_WC);
return _set_pages_array(pages, numpages, _PAGE_CACHE_MODE_WC);
}
EXPORT_SYMBOL(set_pages_array_wc);
int set_pages_array_wt(struct page **pages, int addrinarray)
int set_pages_array_wt(struct page **pages, int numpages)
{
return _set_pages_array(pages, addrinarray, _PAGE_CACHE_MODE_WT);
return _set_pages_array(pages, numpages, _PAGE_CACHE_MODE_WT);
}
EXPORT_SYMBOL_GPL(set_pages_array_wt);
@@ -2201,7 +2154,7 @@ int set_pages_wb(struct page *page, int numpages)
}
EXPORT_SYMBOL(set_pages_wb);
int set_pages_array_wb(struct page **pages, int addrinarray)
int set_pages_array_wb(struct page **pages, int numpages)
{
int retval;
unsigned long start;
@@ -2209,12 +2162,12 @@ int set_pages_array_wb(struct page **pages, int addrinarray)
int i;
/* WB cache mode is hard wired to all cache attribute bits being 0 */
retval = cpa_clear_pages_array(pages, addrinarray,
retval = cpa_clear_pages_array(pages, numpages,
__pgprot(_PAGE_CACHE_MASK));
if (retval)
return retval;
for (i = 0; i < addrinarray; i++) {
for (i = 0; i < numpages; i++) {
if (PageHighMem(pages[i]))
continue;
start = page_to_pfn(pages[i]) << PAGE_SHIFT;