Merge branch 'akpm' (patches from Andrew)

Merge misc updates from Andrew Morton:

 - a few MM hotfixes

 - kthread, tools, scripts, ntfs and ocfs2

 - some of MM

Subsystems affected by this patch series: kthread, tools, scripts, ntfs,
ocfs2 and mm (hofixes, pagealloc, slab-generic, slab, slub, kcsan,
debug, pagecache, gup, swap, shmem, memcg, pagemap, mremap, mincore,
sparsemem, vmalloc, kasan, pagealloc, hugetlb and vmscan).

* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (162 commits)
  mm: vmscan: consistent update to pgrefill
  mm/vmscan.c: fix typo
  khugepaged: khugepaged_test_exit() check mmget_still_valid()
  khugepaged: retract_page_tables() remember to test exit
  khugepaged: collapse_pte_mapped_thp() protect the pmd lock
  khugepaged: collapse_pte_mapped_thp() flush the right range
  mm/hugetlb: fix calculation of adjust_range_if_pmd_sharing_possible
  mm: thp: replace HTTP links with HTTPS ones
  mm/page_alloc: fix memalloc_nocma_{save/restore} APIs
  mm/page_alloc.c: skip setting nodemask when we are in interrupt
  mm/page_alloc: fallbacks at most has 3 elements
  mm/page_alloc: silence a KASAN false positive
  mm/page_alloc.c: remove unnecessary end_bitidx for [set|get]_pfnblock_flags_mask()
  mm/page_alloc.c: simplify pageblock bitmap access
  mm/page_alloc.c: extract the common part in pfn_to_bitidx()
  mm/page_alloc.c: replace the definition of NR_MIGRATETYPE_BITS with PB_migratetype_bits
  mm/shuffle: remove dynamic reconfiguration
  mm/memory_hotplug: document why shuffle_zone() is relevant
  mm/page_alloc: remove nr_free_pagecache_pages()
  mm: remove vm_total_pages
  ...
This commit is contained in:
Linus Torvalds
2020-08-07 11:39:33 -07:00
396 changed files with 4906 additions and 3437 deletions

View File

@@ -11,7 +11,7 @@ crosstests.conf - this config shows an example of testing a git repo against
lots of different architectures. It only does build tests, but makes
it easy to compile test different archs. You can download the arch
cross compilers from:
http://kernel.org/pub/tools/crosstool/files/bin/x86_64/
https://kernel.org/pub/tools/crosstool/files/bin/x86_64/
test.conf - A generic example of a config. This is based on an actual config
used to perform real testing.

View File

@@ -3,7 +3,7 @@
#
# In this config, it is expected that the tool chains from:
#
# http://kernel.org/pub/tools/crosstool/files/bin/x86_64/
# https://kernel.org/pub/tools/crosstool/files/bin/x86_64/
#
# running on a x86_64 system have been downloaded and installed into:
#

View File

@@ -32,6 +32,7 @@ TARGETS += lkdtm
TARGETS += membarrier
TARGETS += memfd
TARGETS += memory-hotplug
TARGETS += mincore
TARGETS += mount
TARGETS += mqueue
TARGETS += net

View File

@@ -2,3 +2,4 @@
test_memcontrol
test_core
test_freezer
test_kmem

View File

@@ -6,11 +6,13 @@ all:
TEST_FILES := with_stress.sh
TEST_PROGS := test_stress.sh
TEST_GEN_PROGS = test_memcontrol
TEST_GEN_PROGS += test_kmem
TEST_GEN_PROGS += test_core
TEST_GEN_PROGS += test_freezer
include ../lib.mk
$(OUTPUT)/test_memcontrol: cgroup_util.c ../clone3/clone3_selftests.h
$(OUTPUT)/test_kmem: cgroup_util.c ../clone3/clone3_selftests.h
$(OUTPUT)/test_core: cgroup_util.c ../clone3/clone3_selftests.h
$(OUTPUT)/test_freezer: cgroup_util.c ../clone3/clone3_selftests.h

View File

@@ -106,7 +106,7 @@ int cg_read_strcmp(const char *cgroup, const char *control,
/* Handle the case of comparing against empty string */
if (!expected)
size = 32;
return -1;
else
size = strlen(expected) + 1;

View File

@@ -0,0 +1,382 @@
// SPDX-License-Identifier: GPL-2.0
#define _GNU_SOURCE
#include <linux/limits.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <sys/wait.h>
#include <errno.h>
#include <sys/sysinfo.h>
#include <pthread.h>
#include "../kselftest.h"
#include "cgroup_util.h"
static int alloc_dcache(const char *cgroup, void *arg)
{
unsigned long i;
struct stat st;
char buf[128];
for (i = 0; i < (unsigned long)arg; i++) {
snprintf(buf, sizeof(buf),
"/something-non-existent-with-a-long-name-%64lu-%d",
i, getpid());
stat(buf, &st);
}
return 0;
}
/*
* This test allocates 100000 of negative dentries with long names.
* Then it checks that "slab" in memory.stat is larger than 1M.
* Then it sets memory.high to 1M and checks that at least 1/2
* of slab memory has been reclaimed.
*/
static int test_kmem_basic(const char *root)
{
int ret = KSFT_FAIL;
char *cg = NULL;
long slab0, slab1, current;
cg = cg_name(root, "kmem_basic_test");
if (!cg)
goto cleanup;
if (cg_create(cg))
goto cleanup;
if (cg_run(cg, alloc_dcache, (void *)100000))
goto cleanup;
slab0 = cg_read_key_long(cg, "memory.stat", "slab ");
if (slab0 < (1 << 20))
goto cleanup;
cg_write(cg, "memory.high", "1M");
slab1 = cg_read_key_long(cg, "memory.stat", "slab ");
if (slab1 <= 0)
goto cleanup;
current = cg_read_long(cg, "memory.current");
if (current <= 0)
goto cleanup;
if (slab1 < slab0 / 2 && current < slab0 / 2)
ret = KSFT_PASS;
cleanup:
cg_destroy(cg);
free(cg);
return ret;
}
static void *alloc_kmem_fn(void *arg)
{
alloc_dcache(NULL, (void *)100);
return NULL;
}
static int alloc_kmem_smp(const char *cgroup, void *arg)
{
int nr_threads = 2 * get_nprocs();
pthread_t *tinfo;
unsigned long i;
int ret = -1;
tinfo = calloc(nr_threads, sizeof(pthread_t));
if (tinfo == NULL)
return -1;
for (i = 0; i < nr_threads; i++) {
if (pthread_create(&tinfo[i], NULL, &alloc_kmem_fn,
(void *)i)) {
free(tinfo);
return -1;
}
}
for (i = 0; i < nr_threads; i++) {
ret = pthread_join(tinfo[i], NULL);
if (ret)
break;
}
free(tinfo);
return ret;
}
static int cg_run_in_subcgroups(const char *parent,
int (*fn)(const char *cgroup, void *arg),
void *arg, int times)
{
char *child;
int i;
for (i = 0; i < times; i++) {
child = cg_name_indexed(parent, "child", i);
if (!child)
return -1;
if (cg_create(child)) {
cg_destroy(child);
free(child);
return -1;
}
if (cg_run(child, fn, NULL)) {
cg_destroy(child);
free(child);
return -1;
}
cg_destroy(child);
free(child);
}
return 0;
}
/*
* The test creates and destroys a large number of cgroups. In each cgroup it
* allocates some slab memory (mostly negative dentries) using 2 * NR_CPUS
* threads. Then it checks the sanity of numbers on the parent level:
* the total size of the cgroups should be roughly equal to
* anon + file + slab + kernel_stack.
*/
static int test_kmem_memcg_deletion(const char *root)
{
long current, slab, anon, file, kernel_stack, sum;
int ret = KSFT_FAIL;
char *parent;
parent = cg_name(root, "kmem_memcg_deletion_test");
if (!parent)
goto cleanup;
if (cg_create(parent))
goto cleanup;
if (cg_write(parent, "cgroup.subtree_control", "+memory"))
goto cleanup;
if (cg_run_in_subcgroups(parent, alloc_kmem_smp, NULL, 100))
goto cleanup;
current = cg_read_long(parent, "memory.current");
slab = cg_read_key_long(parent, "memory.stat", "slab ");
anon = cg_read_key_long(parent, "memory.stat", "anon ");
file = cg_read_key_long(parent, "memory.stat", "file ");
kernel_stack = cg_read_key_long(parent, "memory.stat", "kernel_stack ");
if (current < 0 || slab < 0 || anon < 0 || file < 0 ||
kernel_stack < 0)
goto cleanup;
sum = slab + anon + file + kernel_stack;
if (abs(sum - current) < 4096 * 32 * 2 * get_nprocs()) {
ret = KSFT_PASS;
} else {
printf("memory.current = %ld\n", current);
printf("slab + anon + file + kernel_stack = %ld\n", sum);
printf("slab = %ld\n", slab);
printf("anon = %ld\n", anon);
printf("file = %ld\n", file);
printf("kernel_stack = %ld\n", kernel_stack);
}
cleanup:
cg_destroy(parent);
free(parent);
return ret;
}
/*
* The test reads the entire /proc/kpagecgroup. If the operation went
* successfully (and the kernel didn't panic), the test is treated as passed.
*/
static int test_kmem_proc_kpagecgroup(const char *root)
{
unsigned long buf[128];
int ret = KSFT_FAIL;
ssize_t len;
int fd;
fd = open("/proc/kpagecgroup", O_RDONLY);
if (fd < 0)
return ret;
do {
len = read(fd, buf, sizeof(buf));
} while (len > 0);
if (len == 0)
ret = KSFT_PASS;
close(fd);
return ret;
}
static void *pthread_wait_fn(void *arg)
{
sleep(100);
return NULL;
}
static int spawn_1000_threads(const char *cgroup, void *arg)
{
int nr_threads = 1000;
pthread_t *tinfo;
unsigned long i;
long stack;
int ret = -1;
tinfo = calloc(nr_threads, sizeof(pthread_t));
if (tinfo == NULL)
return -1;
for (i = 0; i < nr_threads; i++) {
if (pthread_create(&tinfo[i], NULL, &pthread_wait_fn,
(void *)i)) {
free(tinfo);
return(-1);
}
}
stack = cg_read_key_long(cgroup, "memory.stat", "kernel_stack ");
if (stack >= 4096 * 1000)
ret = 0;
free(tinfo);
return ret;
}
/*
* The test spawns a process, which spawns 1000 threads. Then it checks
* that memory.stat's kernel_stack is at least 1000 pages large.
*/
static int test_kmem_kernel_stacks(const char *root)
{
int ret = KSFT_FAIL;
char *cg = NULL;
cg = cg_name(root, "kmem_kernel_stacks_test");
if (!cg)
goto cleanup;
if (cg_create(cg))
goto cleanup;
if (cg_run(cg, spawn_1000_threads, NULL))
goto cleanup;
ret = KSFT_PASS;
cleanup:
cg_destroy(cg);
free(cg);
return ret;
}
/*
* This test sequentionally creates 30 child cgroups, allocates some
* kernel memory in each of them, and deletes them. Then it checks
* that the number of dying cgroups on the parent level is 0.
*/
static int test_kmem_dead_cgroups(const char *root)
{
int ret = KSFT_FAIL;
char *parent;
long dead;
int i;
parent = cg_name(root, "kmem_dead_cgroups_test");
if (!parent)
goto cleanup;
if (cg_create(parent))
goto cleanup;
if (cg_write(parent, "cgroup.subtree_control", "+memory"))
goto cleanup;
if (cg_run_in_subcgroups(parent, alloc_dcache, (void *)100, 30))
goto cleanup;
for (i = 0; i < 5; i++) {
dead = cg_read_key_long(parent, "cgroup.stat",
"nr_dying_descendants ");
if (dead == 0) {
ret = KSFT_PASS;
break;
}
/*
* Reclaiming cgroups might take some time,
* let's wait a bit and repeat.
*/
sleep(1);
}
cleanup:
cg_destroy(parent);
free(parent);
return ret;
}
#define T(x) { x, #x }
struct kmem_test {
int (*fn)(const char *root);
const char *name;
} tests[] = {
T(test_kmem_basic),
T(test_kmem_memcg_deletion),
T(test_kmem_proc_kpagecgroup),
T(test_kmem_kernel_stacks),
T(test_kmem_dead_cgroups),
};
#undef T
int main(int argc, char **argv)
{
char root[PATH_MAX];
int i, ret = EXIT_SUCCESS;
if (cg_find_unified_root(root, sizeof(root)))
ksft_exit_skip("cgroup v2 isn't mounted\n");
/*
* Check that memory controller is available:
* memory is listed in cgroup.controllers
*/
if (cg_read_strstr(root, "cgroup.controllers", "memory"))
ksft_exit_skip("memory controller isn't available\n");
if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
if (cg_write(root, "cgroup.subtree_control", "+memory"))
ksft_exit_skip("Failed to set memory controller\n");
for (i = 0; i < ARRAY_SIZE(tests); i++) {
switch (tests[i].fn(root)) {
case KSFT_PASS:
ksft_test_result_pass("%s\n", tests[i].name);
break;
case KSFT_SKIP:
ksft_test_result_skip("%s\n", tests[i].name);
break;
default:
ret = EXIT_FAILURE;
ksft_test_result_fail("%s\n", tests[i].name);
break;
}
}
return ret;
}

View File

@@ -0,0 +1,2 @@
# SPDX-License-Identifier: GPL-2.0+
mincore_selftest

View File

@@ -0,0 +1,6 @@
# SPDX-License-Identifier: GPL-2.0+
CFLAGS += -Wall
TEST_GEN_PROGS := mincore_selftest
include ../lib.mk

View File

@@ -0,0 +1,361 @@
// SPDX-License-Identifier: GPL-2.0+
/*
* kselftest suite for mincore().
*
* Copyright (C) 2020 Collabora, Ltd.
*/
#define _GNU_SOURCE
#include <stdio.h>
#include <errno.h>
#include <unistd.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <string.h>
#include <fcntl.h>
#include <string.h>
#include "../kselftest.h"
#include "../kselftest_harness.h"
/* Default test file size: 4MB */
#define MB (1UL << 20)
#define FILE_SIZE (4 * MB)
/*
* Tests the user interface. This test triggers most of the documented
* error conditions in mincore().
*/
TEST(basic_interface)
{
int retval;
int page_size;
unsigned char vec[1];
char *addr;
page_size = sysconf(_SC_PAGESIZE);
/* Query a 0 byte sized range */
retval = mincore(0, 0, vec);
EXPECT_EQ(0, retval);
/* Addresses in the specified range are invalid or unmapped */
errno = 0;
retval = mincore(NULL, page_size, vec);
EXPECT_EQ(-1, retval);
EXPECT_EQ(ENOMEM, errno);
errno = 0;
addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_ANONYMOUS, -1, 0);
ASSERT_NE(MAP_FAILED, addr) {
TH_LOG("mmap error: %s", strerror(errno));
}
/* <addr> argument is not page-aligned */
errno = 0;
retval = mincore(addr + 1, page_size, vec);
EXPECT_EQ(-1, retval);
EXPECT_EQ(EINVAL, errno);
/* <length> argument is too large */
errno = 0;
retval = mincore(addr, -1, vec);
EXPECT_EQ(-1, retval);
EXPECT_EQ(ENOMEM, errno);
/* <vec> argument points to an illegal address */
errno = 0;
retval = mincore(addr, page_size, NULL);
EXPECT_EQ(-1, retval);
EXPECT_EQ(EFAULT, errno);
munmap(addr, page_size);
}
/*
* Test mincore() behavior on a private anonymous page mapping.
* Check that the page is not loaded into memory right after the mapping
* but after accessing it (on-demand allocation).
* Then free the page and check that it's not memory-resident.
*/
TEST(check_anonymous_locked_pages)
{
unsigned char vec[1];
char *addr;
int retval;
int page_size;
page_size = sysconf(_SC_PAGESIZE);
/* Map one page and check it's not memory-resident */
errno = 0;
addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
ASSERT_NE(MAP_FAILED, addr) {
TH_LOG("mmap error: %s", strerror(errno));
}
retval = mincore(addr, page_size, vec);
ASSERT_EQ(0, retval);
ASSERT_EQ(0, vec[0]) {
TH_LOG("Page found in memory before use");
}
/* Touch the page and check again. It should now be in memory */
addr[0] = 1;
mlock(addr, page_size);
retval = mincore(addr, page_size, vec);
ASSERT_EQ(0, retval);
ASSERT_EQ(1, vec[0]) {
TH_LOG("Page not found in memory after use");
}
/*
* It shouldn't be memory-resident after unlocking it and
* marking it as unneeded.
*/
munlock(addr, page_size);
madvise(addr, page_size, MADV_DONTNEED);
retval = mincore(addr, page_size, vec);
ASSERT_EQ(0, retval);
ASSERT_EQ(0, vec[0]) {
TH_LOG("Page in memory after being zapped");
}
munmap(addr, page_size);
}
/*
* Check mincore() behavior on huge pages.
* This test will be skipped if the mapping fails (ie. if there are no
* huge pages available).
*
* Make sure the system has at least one free huge page, check
* "HugePages_Free" in /proc/meminfo.
* Increment /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages if
* needed.
*/
TEST(check_huge_pages)
{
unsigned char vec[1];
char *addr;
int retval;
int page_size;
page_size = sysconf(_SC_PAGESIZE);
errno = 0;
addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
-1, 0);
if (addr == MAP_FAILED) {
if (errno == ENOMEM)
SKIP(return, "No huge pages available.");
else
TH_LOG("mmap error: %s", strerror(errno));
}
retval = mincore(addr, page_size, vec);
ASSERT_EQ(0, retval);
ASSERT_EQ(0, vec[0]) {
TH_LOG("Page found in memory before use");
}
addr[0] = 1;
mlock(addr, page_size);
retval = mincore(addr, page_size, vec);
ASSERT_EQ(0, retval);
ASSERT_EQ(1, vec[0]) {
TH_LOG("Page not found in memory after use");
}
munlock(addr, page_size);
munmap(addr, page_size);
}
/*
* Test mincore() behavior on a file-backed page.
* No pages should be loaded into memory right after the mapping. Then,
* accessing any address in the mapping range should load the page
* containing the address and a number of subsequent pages (readahead).
*
* The actual readahead settings depend on the test environment, so we
* can't make a lot of assumptions about that. This test covers the most
* general cases.
*/
TEST(check_file_mmap)
{
unsigned char *vec;
int vec_size;
char *addr;
int retval;
int page_size;
int fd;
int i;
int ra_pages = 0;
page_size = sysconf(_SC_PAGESIZE);
vec_size = FILE_SIZE / page_size;
if (FILE_SIZE % page_size)
vec_size++;
vec = calloc(vec_size, sizeof(unsigned char));
ASSERT_NE(NULL, vec) {
TH_LOG("Can't allocate array");
}
errno = 0;
fd = open(".", O_TMPFILE | O_RDWR, 0600);
ASSERT_NE(-1, fd) {
TH_LOG("Can't create temporary file: %s",
strerror(errno));
}
errno = 0;
retval = fallocate(fd, 0, 0, FILE_SIZE);
ASSERT_EQ(0, retval) {
TH_LOG("Error allocating space for the temporary file: %s",
strerror(errno));
}
/*
* Map the whole file, the pages shouldn't be fetched yet.
*/
errno = 0;
addr = mmap(NULL, FILE_SIZE, PROT_READ | PROT_WRITE,
MAP_SHARED, fd, 0);
ASSERT_NE(MAP_FAILED, addr) {
TH_LOG("mmap error: %s", strerror(errno));
}
retval = mincore(addr, FILE_SIZE, vec);
ASSERT_EQ(0, retval);
for (i = 0; i < vec_size; i++) {
ASSERT_EQ(0, vec[i]) {
TH_LOG("Unexpected page in memory");
}
}
/*
* Touch a page in the middle of the mapping. We expect the next
* few pages (the readahead window) to be populated too.
*/
addr[FILE_SIZE / 2] = 1;
retval = mincore(addr, FILE_SIZE, vec);
ASSERT_EQ(0, retval);
ASSERT_EQ(1, vec[FILE_SIZE / 2 / page_size]) {
TH_LOG("Page not found in memory after use");
}
i = FILE_SIZE / 2 / page_size + 1;
while (i < vec_size && vec[i]) {
ra_pages++;
i++;
}
EXPECT_GT(ra_pages, 0) {
TH_LOG("No read-ahead pages found in memory");
}
EXPECT_LT(i, vec_size) {
TH_LOG("Read-ahead pages reached the end of the file");
}
/*
* End of the readahead window. The rest of the pages shouldn't
* be in memory.
*/
if (i < vec_size) {
while (i < vec_size && !vec[i])
i++;
EXPECT_EQ(vec_size, i) {
TH_LOG("Unexpected page in memory beyond readahead window");
}
}
munmap(addr, FILE_SIZE);
close(fd);
free(vec);
}
/*
* Test mincore() behavior on a page backed by a tmpfs file. This test
* performs the same steps as the previous one. However, we don't expect
* any readahead in this case.
*/
TEST(check_tmpfs_mmap)
{
unsigned char *vec;
int vec_size;
char *addr;
int retval;
int page_size;
int fd;
int i;
int ra_pages = 0;
page_size = sysconf(_SC_PAGESIZE);
vec_size = FILE_SIZE / page_size;
if (FILE_SIZE % page_size)
vec_size++;
vec = calloc(vec_size, sizeof(unsigned char));
ASSERT_NE(NULL, vec) {
TH_LOG("Can't allocate array");
}
errno = 0;
fd = open("/dev/shm", O_TMPFILE | O_RDWR, 0600);
ASSERT_NE(-1, fd) {
TH_LOG("Can't create temporary file: %s",
strerror(errno));
}
errno = 0;
retval = fallocate(fd, 0, 0, FILE_SIZE);
ASSERT_EQ(0, retval) {
TH_LOG("Error allocating space for the temporary file: %s",
strerror(errno));
}
/*
* Map the whole file, the pages shouldn't be fetched yet.
*/
errno = 0;
addr = mmap(NULL, FILE_SIZE, PROT_READ | PROT_WRITE,
MAP_SHARED, fd, 0);
ASSERT_NE(MAP_FAILED, addr) {
TH_LOG("mmap error: %s", strerror(errno));
}
retval = mincore(addr, FILE_SIZE, vec);
ASSERT_EQ(0, retval);
for (i = 0; i < vec_size; i++) {
ASSERT_EQ(0, vec[i]) {
TH_LOG("Unexpected page in memory");
}
}
/*
* Touch a page in the middle of the mapping. We expect only
* that page to be fetched into memory.
*/
addr[FILE_SIZE / 2] = 1;
retval = mincore(addr, FILE_SIZE, vec);
ASSERT_EQ(0, retval);
ASSERT_EQ(1, vec[FILE_SIZE / 2 / page_size]) {
TH_LOG("Page not found in memory after use");
}
i = FILE_SIZE / 2 / page_size + 1;
while (i < vec_size && vec[i]) {
ra_pages++;
i++;
}
ASSERT_EQ(ra_pages, 0) {
TH_LOG("Read-ahead pages found in memory");
}
munmap(addr, FILE_SIZE);
close(fd);
free(vec);
}
TEST_HARNESS_MAIN