Merge d5660df4a5 ("Merge branch 'akpm' (patches from Andrew)") into android-mainline

steps on the way to 5.10-rc1

Change-Id: Iddc84c25b6a9d71fa8542b927d6f69c364131c3d
Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
This commit is contained in:
Greg Kroah-Hartman
2020-10-23 13:08:50 +02:00
261 changed files with 9144 additions and 4313 deletions

View File

@@ -203,11 +203,13 @@ ForEachMacros:
- 'for_each_matching_node'
- 'for_each_matching_node_and_match'
- 'for_each_member'
- 'for_each_memblock'
- 'for_each_mem_region'
- 'for_each_memblock_type'
- 'for_each_memcg_cache_index'
- 'for_each_mem_pfn_range'
- '__for_each_mem_range'
- 'for_each_mem_range'
- '__for_each_mem_range_rev'
- 'for_each_mem_range_rev'
- 'for_each_migratetype_order'
- 'for_each_msi_entry'
@@ -271,6 +273,7 @@ ForEachMacros:
- 'for_each_registered_fb'
- 'for_each_requested_gpio'
- 'for_each_requested_gpio_in_range'
- 'for_each_reserved_mem_range'
- 'for_each_reserved_mem_region'
- 'for_each_rtd_codec_dais'
- 'for_each_rtd_codec_dais_rollback'

View File

@@ -15,7 +15,7 @@ Description:
actual protection), and Android and Linux distributions have been
explicitly writing a "0" to /sys/fs/selinux/checkreqprot during
initialization for some time. Support for setting checkreqprot to 1
will be removed in a future kernel release, at which point the kernel
will be removed no sooner than June 2021, at which point the kernel
will always cease using checkreqprot internally and will always
check the actual protections being applied upon mmap/mprotect calls.
The checkreqprot selinuxfs node will remain for backward compatibility

View File

@@ -1259,6 +1259,10 @@ PAGE_SIZE multiple when read back.
can show up in the middle. Don't rely on items remaining in a
fixed position; use the keys to look up specific values!
If the entry has no per-node counter(or not show in the
mempry.numa_stat). We use 'npn'(non-per-node) as the tag
to indicate that it will not show in the mempry.numa_stat.
anon
Amount of memory used in anonymous mappings such as
brk(), sbrk(), and mmap(MAP_ANONYMOUS)
@@ -1270,15 +1274,11 @@ PAGE_SIZE multiple when read back.
kernel_stack
Amount of memory allocated to kernel stacks.
slab
Amount of memory used for storing in-kernel data
structures.
percpu
percpu(npn)
Amount of memory used for storing per-cpu kernel
data structures.
sock
sock(npn)
Amount of memory used in network transmission buffers
shmem
@@ -1318,11 +1318,9 @@ PAGE_SIZE multiple when read back.
Part of "slab" that cannot be reclaimed on memory
pressure.
pgfault
Total number of page faults incurred
pgmajfault
Number of major page faults incurred
slab(npn)
Amount of memory used for storing in-kernel data
structures.
workingset_refault_anon
Number of refaults of previously evicted anonymous pages.
@@ -1348,37 +1346,68 @@ PAGE_SIZE multiple when read back.
workingset_nodereclaim
Number of times a shadow node has been reclaimed
pgrefill
pgfault(npn)
Total number of page faults incurred
pgmajfault(npn)
Number of major page faults incurred
pgrefill(npn)
Amount of scanned pages (in an active LRU list)
pgscan
pgscan(npn)
Amount of scanned pages (in an inactive LRU list)
pgsteal
pgsteal(npn)
Amount of reclaimed pages
pgactivate
pgactivate(npn)
Amount of pages moved to the active LRU list
pgdeactivate
pgdeactivate(npn)
Amount of pages moved to the inactive LRU list
pglazyfree
pglazyfree(npn)
Amount of pages postponed to be freed under memory pressure
pglazyfreed
pglazyfreed(npn)
Amount of reclaimed lazyfree pages
thp_fault_alloc
thp_fault_alloc(npn)
Number of transparent hugepages which were allocated to satisfy
a page fault. This counter is not present when CONFIG_TRANSPARENT_HUGEPAGE
is not set.
thp_collapse_alloc
thp_collapse_alloc(npn)
Number of transparent hugepages which were allocated to allow
collapsing an existing range of pages. This counter is not
present when CONFIG_TRANSPARENT_HUGEPAGE is not set.
memory.numa_stat
A read-only nested-keyed file which exists on non-root cgroups.
This breaks down the cgroup's memory footprint into different
types of memory, type-specific details, and other information
per node on the state of the memory management system.
This is useful for providing visibility into the NUMA locality
information within an memcg since the pages are allowed to be
allocated from any physical node. One of the use case is evaluating
application performance by combining this information with the
application's CPU allocation.
All memory amounts are in bytes.
The output format of memory.numa_stat is::
type N0=<bytes in node 0> N1=<bytes in node 1> ...
The entries are ordered to be human readable, and new entries
can show up in the middle. Don't rely on items remaining in a
fixed position; use the keys to look up specific values!
The entries can refer to the memory.stat.
memory.swap.current
A read-only single value file which exists on non-root
cgroups.

View File

@@ -170,57 +170,82 @@ document trapinfo
address the kernel panicked.
end
define dump_log_idx
set $idx = $arg0
if ($argc > 1)
set $prev_flags = $arg1
define dump_record
set var $desc = $arg0
set var $info = $arg1
if ($argc > 2)
set var $prev_flags = $arg2
else
set $prev_flags = 0
end
set $msg = ((struct printk_log *) (log_buf + $idx))
set $prefix = 1
set $newline = 1
set $log = log_buf + $idx + sizeof(*$msg)
# prev & LOG_CONT && !(msg->flags & LOG_PREIX)
if (($prev_flags & 8) && !($msg->flags & 4))
set $prefix = 0
set var $prev_flags = 0
end
# msg->flags & LOG_CONT
if ($msg->flags & 8)
set var $prefix = 1
set var $newline = 1
set var $begin = $desc->text_blk_lpos.begin % (1U << prb->text_data_ring.size_bits)
set var $next = $desc->text_blk_lpos.next % (1U << prb->text_data_ring.size_bits)
# handle data-less record
if ($begin & 1)
set var $text_len = 0
set var $log = ""
else
# handle wrapping data block
if ($begin > $next)
set var $begin = 0
end
# skip over descriptor id
set var $begin = $begin + sizeof(long)
# handle truncated message
if ($next - $begin < $info->text_len)
set var $text_len = $next - $begin
else
set var $text_len = $info->text_len
end
set var $log = &prb->text_data_ring.data[$begin]
end
# prev & LOG_CONT && !(info->flags & LOG_PREIX)
if (($prev_flags & 8) && !($info->flags & 4))
set var $prefix = 0
end
# info->flags & LOG_CONT
if ($info->flags & 8)
# (prev & LOG_CONT && !(prev & LOG_NEWLINE))
if (($prev_flags & 8) && !($prev_flags & 2))
set $prefix = 0
set var $prefix = 0
end
# (!(msg->flags & LOG_NEWLINE))
if (!($msg->flags & 2))
set $newline = 0
# (!(info->flags & LOG_NEWLINE))
if (!($info->flags & 2))
set var $newline = 0
end
end
if ($prefix)
printf "[%5lu.%06lu] ", $msg->ts_nsec / 1000000000, $msg->ts_nsec % 1000000000
printf "[%5lu.%06lu] ", $info->ts_nsec / 1000000000, $info->ts_nsec % 1000000000
end
if ($msg->text_len != 0)
eval "printf \"%%%d.%ds\", $log", $msg->text_len, $msg->text_len
if ($text_len)
eval "printf \"%%%d.%ds\", $log", $text_len, $text_len
end
if ($newline)
printf "\n"
end
if ($msg->dict_len > 0)
set $dict = $log + $msg->text_len
set $idx = 0
set $line = 1
while ($idx < $msg->dict_len)
if ($line)
printf " "
set $line = 0
end
set $c = $dict[$idx]
# handle dictionary data
set var $dict = &$info->dev_info.subsystem[0]
set var $dict_len = sizeof($info->dev_info.subsystem)
if ($dict[0] != '\0')
printf " SUBSYSTEM="
set var $idx = 0
while ($idx < $dict_len)
set var $c = $dict[$idx]
if ($c == '\0')
printf "\n"
set $line = 1
loop_break
else
if ($c < ' ' || $c >= 127 || $c == '\\')
printf "\\x%02x", $c
@@ -228,33 +253,67 @@ define dump_log_idx
printf "%c", $c
end
end
set $idx = $idx + 1
set var $idx = $idx + 1
end
printf "\n"
end
set var $dict = &$info->dev_info.device[0]
set var $dict_len = sizeof($info->dev_info.device)
if ($dict[0] != '\0')
printf " DEVICE="
set var $idx = 0
while ($idx < $dict_len)
set var $c = $dict[$idx]
if ($c == '\0')
loop_break
else
if ($c < ' ' || $c >= 127 || $c == '\\')
printf "\\x%02x", $c
else
printf "%c", $c
end
end
set var $idx = $idx + 1
end
printf "\n"
end
end
document dump_log_idx
Dump a single log given its index in the log buffer. The first
parameter is the index into log_buf, the second is optional and
specified the previous log buffer's flags, used for properly
formatting continued lines.
document dump_record
Dump a single record. The first parameter is the descriptor,
the second parameter is the info, the third parameter is
optional and specifies the previous record's flags, used for
properly formatting continued lines.
end
define dmesg
set $i = log_first_idx
set $end_idx = log_first_idx
set $prev_flags = 0
# definitions from kernel/printk/printk_ringbuffer.h
set var $desc_committed = 1
set var $desc_finalized = 2
set var $desc_sv_bits = sizeof(long) * 8
set var $desc_flags_shift = $desc_sv_bits - 2
set var $desc_flags_mask = 3 << $desc_flags_shift
set var $id_mask = ~$desc_flags_mask
set var $desc_count = 1U << prb->desc_ring.count_bits
set var $prev_flags = 0
set var $id = prb->desc_ring.tail_id.counter
set var $end_id = prb->desc_ring.head_id.counter
while (1)
set $msg = ((struct printk_log *) (log_buf + $i))
if ($msg->len == 0)
set $i = 0
else
dump_log_idx $i $prev_flags
set $i = $i + $msg->len
set $prev_flags = $msg->flags
set var $desc = &prb->desc_ring.descs[$id % $desc_count]
set var $info = &prb->desc_ring.infos[$id % $desc_count]
# skip non-committed record
set var $state = 3 & ($desc->state_var.counter >> $desc_flags_shift)
if ($state == $desc_committed || $state == $desc_finalized)
dump_record $desc $info $prev_flags
set var $prev_flags = $info->flags
end
if ($i == $end_idx)
set var $id = ($id + 1) & $id_mask
if ($id == $end_id)
loop_break
end
end

View File

@@ -189,50 +189,123 @@ from this.
Free areas descriptor. User-space tools use this value to iterate the
free_area ranges. MAX_ORDER is used by the zone buddy allocator.
log_first_idx
-------------
prb
---
Index of the first record stored in the buffer log_buf. Used by
user-space tools to read the strings in the log_buf.
A pointer to the printk ringbuffer (struct printk_ringbuffer). This
may be pointing to the static boot ringbuffer or the dynamically
allocated ringbuffer, depending on when the the core dump occurred.
Used by user-space tools to read the active kernel log buffer.
log_buf
-------
printk_rb_static
----------------
Console output is written to the ring buffer log_buf at index
log_first_idx. Used to get the kernel log.
A pointer to the static boot printk ringbuffer. If @prb has a
different value, this is useful for viewing the initial boot messages,
which may have been overwritten in the dynamically allocated
ringbuffer.
log_buf_len
-----------
log_buf's length.
clear_idx
clear_seq
---------
The index that the next printk() record to read after the last clear
command. It indicates the first record after the last SYSLOG_ACTION
_CLEAR, like issued by 'dmesg -c'. Used by user-space tools to dump
the dmesg log.
The sequence number of the printk() record after the last clear
command. It indicates the first record after the last
SYSLOG_ACTION_CLEAR, like issued by 'dmesg -c'. Used by user-space
tools to dump a subset of the dmesg log.
log_next_idx
------------
printk_ringbuffer
-----------------
The index of the next record to store in the buffer log_buf. Used to
compute the index of the current buffer position.
The size of a printk_ringbuffer structure. This structure contains all
information required for accessing the various components of the
kernel log buffer.
printk_log
----------
(printk_ringbuffer, desc_ring|text_data_ring|dict_data_ring|fail)
-----------------------------------------------------------------
The size of a structure printk_log. Used to compute the size of
messages, and extract dmesg log. It encapsulates header information for
log_buf, such as timestamp, syslog level, etc.
Offsets for the various components of the printk ringbuffer. Used by
user-space tools to view the kernel log buffer without requiring the
declaration of the structure.
(printk_log, ts_nsec|len|text_len|dict_len)
-------------------------------------------
prb_desc_ring
-------------
It represents field offsets in struct printk_log. User space tools
parse it and check whether the values of printk_log's members have been
changed.
The size of the prb_desc_ring structure. This structure contains
information about the set of record descriptors.
(prb_desc_ring, count_bits|descs|head_id|tail_id)
-------------------------------------------------
Offsets for the fields describing the set of record descriptors. Used
by user-space tools to be able to traverse the descriptors without
requiring the declaration of the structure.
prb_desc
--------
The size of the prb_desc structure. This structure contains
information about a single record descriptor.
(prb_desc, info|state_var|text_blk_lpos|dict_blk_lpos)
------------------------------------------------------
Offsets for the fields describing a record descriptors. Used by
user-space tools to be able to read descriptors without requiring
the declaration of the structure.
prb_data_blk_lpos
-----------------
The size of the prb_data_blk_lpos structure. This structure contains
information about where the text or dictionary data (data block) is
located within the respective data ring.
(prb_data_blk_lpos, begin|next)
-------------------------------
Offsets for the fields describing the location of a data block. Used
by user-space tools to be able to locate data blocks without
requiring the declaration of the structure.
printk_info
-----------
The size of the printk_info structure. This structure contains all
the meta-data for a record.
(printk_info, seq|ts_nsec|text_len|dict_len|caller_id)
------------------------------------------------------
Offsets for the fields providing the meta-data for a record. Used by
user-space tools to be able to read the information without requiring
the declaration of the structure.
prb_data_ring
-------------
The size of the prb_data_ring structure. This structure contains
information about a set of data blocks.
(prb_data_ring, size_bits|data|head_lpos|tail_lpos)
---------------------------------------------------
Offsets for the fields describing a set of data blocks. Used by
user-space tools to be able to access the data blocks without
requiring the declaration of the structure.
atomic_long_t
-------------
The size of the atomic_long_t structure. Used by user-space tools to
be able to copy the full structure, regardless of its
architecture-specific implementation.
(atomic_long_t, counter)
------------------------
Offset for the long value of an atomic_long_t variable. Used by
user-space tools to access the long value without requiring the
architecture-specific declaration.
(free_area.free_list, MIGRATE_TYPES)
------------------------------------

View File

@@ -131,7 +131,7 @@ hugepages
parameter is preceded by an invalid hugepagesz parameter, it will
be ignored.
default_hugepagesz
pecify the default huge page size. This parameter can
Specify the default huge page size. This parameter can
only be specified once on the command line. default_hugepagesz can
optionally be followed by the hugepages parameter to preallocate a
specific number of huge pages of default size. The number of default

View File

@@ -13,10 +13,10 @@ KASAN uses compile-time instrumentation to insert validity checks before every
memory access, and therefore requires a compiler version that supports that.
Generic KASAN is supported in both GCC and Clang. With GCC it requires version
8.3.0 or later. With Clang it requires version 7.0.0 or later, but detection of
8.3.0 or later. Any supported Clang version is compatible, but detection of
out-of-bounds accesses for global variables is only supported since Clang 11.
Tag-based KASAN is only supported in Clang and requires version 7.0.0 or later.
Tag-based KASAN is only supported in Clang.
Currently generic KASAN is supported for the x86_64, arm64, xtensa, s390 and
riscv architectures, and tag-based KASAN is supported only for arm64.
@@ -281,3 +281,73 @@ unmapped. This will require changes in arch-specific code.
This allows ``VMAP_STACK`` support on x86, and can simplify support of
architectures that do not have a fixed module region.
CONFIG_KASAN_KUNIT_TEST & CONFIG_TEST_KASAN_MODULE
--------------------------------------------------
``CONFIG_KASAN_KUNIT_TEST`` utilizes the KUnit Test Framework for testing.
This means each test focuses on a small unit of functionality and
there are a few ways these tests can be run.
Each test will print the KASAN report if an error is detected and then
print the number of the test and the status of the test:
pass::
ok 28 - kmalloc_double_kzfree
or, if kmalloc failed::
# kmalloc_large_oob_right: ASSERTION FAILED at lib/test_kasan.c:163
Expected ptr is not null, but is
not ok 4 - kmalloc_large_oob_right
or, if a KASAN report was expected, but not found::
# kmalloc_double_kzfree: EXPECTATION FAILED at lib/test_kasan.c:629
Expected kasan_data->report_expected == kasan_data->report_found, but
kasan_data->report_expected == 1
kasan_data->report_found == 0
not ok 28 - kmalloc_double_kzfree
All test statuses are tracked as they run and an overall status will
be printed at the end::
ok 1 - kasan
or::
not ok 1 - kasan
(1) Loadable Module
~~~~~~~~~~~~~~~~~~~~
With ``CONFIG_KUNIT`` enabled, ``CONFIG_KASAN_KUNIT_TEST`` can be built as
a loadable module and run on any architecture that supports KASAN
using something like insmod or modprobe. The module is called ``test_kasan``.
(2) Built-In
~~~~~~~~~~~~~
With ``CONFIG_KUNIT`` built-in, ``CONFIG_KASAN_KUNIT_TEST`` can be built-in
on any architecure that supports KASAN. These and any other KUnit
tests enabled will run and print the results at boot as a late-init
call.
(3) Using kunit_tool
~~~~~~~~~~~~~~~~~~~~~
With ``CONFIG_KUNIT`` and ``CONFIG_KASAN_KUNIT_TEST`` built-in, we can also
use kunit_tool to see the results of these along with other KUnit
tests in a more readable way. This will not print the KASAN reports
of tests that passed. Use `KUnit documentation <https://www.kernel.org/doc/html/latest/dev-tools/kunit/index.html>`_ for more up-to-date
information on kunit_tool.
.. _KUnit: https://www.kernel.org/doc/html/latest/dev-tools/kunit/index.html
``CONFIG_TEST_KASAN_MODULE`` is a set of KASAN tests that could not be
converted to KUnit. These tests can be run only as a module with
``CONFIG_TEST_KASAN_MODULE`` built as a loadable module and
``CONFIG_KASAN`` built-in. The type of error expected and the
function being run is printed before the expression expected to give
an error. Then the error is printed, if found, and that test
should be interpretted to pass only if the error was the one expected
by the test.

View File

@@ -229,7 +229,7 @@ Testing with kmemleak-test
To check if you have all set up to use kmemleak, you can use the kmemleak-test
module, a module that deliberately leaks memory. Set CONFIG_DEBUG_KMEMLEAK_TEST
as module (it can't be used as bult-in) and boot the kernel with kmemleak
as module (it can't be used as built-in) and boot the kernel with kmemleak
enabled. Load the module and perform a scan with::
# modprobe kmemleak-test

View File

@@ -21,6 +21,7 @@ This document describes the Linux kernel Makefiles.
--- 3.10 Special Rules
--- 3.11 $(CC) support functions
--- 3.12 $(LD) support functions
--- 3.13 Script Invocation
=== 4 Host Program support
--- 4.1 Simple Host Program
@@ -605,6 +606,25 @@ more details, with real examples.
#Makefile
LDFLAGS_vmlinux += $(call ld-option, -X)
3.13 Script invocation
----------------------
Make rules may invoke scripts to build the kernel. The rules shall
always provide the appropriate interpreter to execute the script. They
shall not rely on the execute bits being set, and shall not invoke the
script directly. For the convenience of manual script invocation, such
as invoking ./scripts/checkpatch.pl, it is recommended to set execute
bits on the scripts nonetheless.
Kbuild provides variables $(CONFIG_SHELL), $(AWK), $(PERL),
$(PYTHON) and $(PYTHON3) to refer to interpreters for the respective
scripts.
Example::
#Makefile
cmd_depmod = $(CONFIG_SHELL) $(srctree)/scripts/depmod.sh $(DEPMOD) \
$(KERNELRELEASE)
4 Host Program support
======================

View File

@@ -64,7 +64,7 @@ Active MM
actually get cases where you have a address space that is _only_ used by
lazy users. That is often a short-lived state, because once that thread
gets scheduled away in favour of a real thread, the "zombie" mm gets
released because "mm_users" becomes zero.
released because "mm_count" becomes zero.
Also, a new rule is that _nobody_ ever has "init_mm" as a real MM any
more. "init_mm" should be considered just a "lazy context when no other

View File

@@ -173,6 +173,10 @@ NUMA
numa=noacpi
Don't parse the SRAT table for NUMA setup
numa=nohmat
Don't parse the HMAT table for NUMA setup, or soft-reserved memory
partitioning.
numa=fake=<size>[MG]
If given as a memory unit, fills all system RAM with nodes of
size interleaved over physical nodes.

View File

@@ -9734,8 +9734,8 @@ M: Catalin Marinas <catalin.marinas@arm.com>
S: Maintained
F: Documentation/dev-tools/kmemleak.rst
F: include/linux/kmemleak.h
F: mm/kmemleak-test.c
F: mm/kmemleak.c
F: samples/kmemleak/kmemleak-test.c
KMOD KERNEL MODULE LOADER - USERMODE HELPER
M: Luis Chamberlain <mcgrof@kernel.org>
@@ -13983,6 +13983,7 @@ PRINTK
M: Petr Mladek <pmladek@suse.com>
M: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
R: Steven Rostedt <rostedt@goodmis.org>
R: John Ogness <john.ogness@linutronix.de>
S: Maintained
F: include/linux/printk.h
F: kernel/printk/
@@ -15633,6 +15634,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/selinux.git
F: Documentation/ABI/obsolete/sysfs-selinux-checkreqprot
F: Documentation/ABI/obsolete/sysfs-selinux-disable
F: Documentation/admin-guide/LSM/SELinux.rst
F: include/trace/events/avc.h
F: include/uapi/linux/selinux_netlink.h
F: scripts/selinux/
F: security/selinux/

View File

@@ -926,15 +926,6 @@ KBUILD_CFLAGS += $(call cc-disable-warning, maybe-uninitialized)
# disable invalid "can't wrap" optimizations for signed / pointers
KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow)
# clang sets -fmerge-all-constants by default as optimization, but this
# is non-conforming behavior for C and in fact breaks the kernel, so we
# need to disable it here generally.
KBUILD_CFLAGS += $(call cc-option,-fno-merge-all-constants)
# for gcc -fno-merge-all-constants disables everything, but it is fine
# to have actual conforming behavior enabled.
KBUILD_CFLAGS += $(call cc-option,-fmerge-constants)
# Make sure -fstack-check isn't enabled (like gentoo apparently did)
KBUILD_CFLAGS += $(call cc-option,-fno-stack-check,)

View File

@@ -450,10 +450,23 @@ config ARCH_WANT_OLD_COMPAT_IPC
select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
bool
config HAVE_ARCH_SECCOMP_FILTER
config HAVE_ARCH_SECCOMP
bool
help
An arch should select this symbol to support seccomp mode 1 (the fixed
syscall policy), and must provide an overrides for __NR_seccomp_sigreturn,
and compat syscalls if the asm-generic/seccomp.h defaults need adjustment:
- __NR_seccomp_read_32
- __NR_seccomp_write_32
- __NR_seccomp_exit_32
- __NR_seccomp_sigreturn_32
config HAVE_ARCH_SECCOMP_FILTER
bool
select HAVE_ARCH_SECCOMP
help
An arch should select this symbol if it provides all of these things:
- all the requirements for HAVE_ARCH_SECCOMP
- syscall_get_arch()
- syscall_get_arguments()
- syscall_rollback()
@@ -464,6 +477,23 @@ config HAVE_ARCH_SECCOMP_FILTER
results in the system call being skipped immediately.
- seccomp syscall wired up
config SECCOMP
prompt "Enable seccomp to safely execute untrusted bytecode"
def_bool y
depends on HAVE_ARCH_SECCOMP
help
This kernel feature is useful for number crunching applications
that may need to handle untrusted bytecode during their
execution. By using pipes or other transports made available
to the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in their
own address space using seccomp. Once seccomp is enabled via
prctl(PR_SET_SECCOMP) or the seccomp() syscall, it cannot be
disabled and the task is only allowed to execute a few safe
syscalls defined by each seccomp mode.
If unsure, say Y.
config SECCOMP_FILTER
def_bool y
depends on HAVE_ARCH_SECCOMP_FILTER && SECCOMP && NET

View File

@@ -68,6 +68,7 @@ config ARM
select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU
select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU
select HAVE_ARCH_MMAP_RND_BITS if MMU
select HAVE_ARCH_SECCOMP
select HAVE_ARCH_SECCOMP_FILTER if AEABI && !OABI_COMPAT
select HAVE_ARCH_THREAD_STRUCT_WHITELIST
select HAVE_ARCH_TRACEHOOK
@@ -84,7 +85,7 @@ config ARM
select HAVE_FAST_GUP if ARM_LPAE
select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL && !CC_IS_CLANG
select HAVE_FUNCTION_TRACER if !XIP_KERNEL && (CC_IS_GCC || CLANG_VERSION >= 100000)
select HAVE_FUNCTION_TRACER if !XIP_KERNEL
select HAVE_GCC_PLUGINS
select HAVE_HW_BREAKPOINT if PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7)
select HAVE_IDE if PCI || ISA || PCMCIA
@@ -1618,20 +1619,6 @@ config UACCESS_WITH_MEMCPY
However, if the CPU data cache is using a write-allocate mode,
this option is unlikely to provide any performance gain.
config SECCOMP
bool
prompt "Enable seccomp to safely compute untrusted bytecode"
help
This kernel feature is useful for number crunching applications
that may need to compute untrusted bytecode during their
execution. By using pipes or other transports made available to
the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in
their own address space using seccomp. Once seccomp is
enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
and the task is only allowed to execute a few safe syscalls
defined by each seccomp mode.
config PARAVIRT
bool "Enable paravirtualization code"
help

View File

@@ -59,6 +59,7 @@ __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr)
#ifdef CONFIG_ARM_LPAE
struct page *page = virt_to_page(pmdp);
pgtable_pmd_page_dtor(page);
tlb_remove_table(tlb, page);
#endif
}

View File

@@ -843,19 +843,25 @@ early_param("mem", early_mem);
static void __init request_standard_resources(const struct machine_desc *mdesc)
{
struct memblock_region *region;
phys_addr_t start, end, res_end;
struct resource *res;
u64 i;
kernel_code.start = virt_to_phys(_text);
kernel_code.end = virt_to_phys(__init_begin - 1);
kernel_data.start = virt_to_phys(_sdata);
kernel_data.end = virt_to_phys(_end - 1);
for_each_memblock(memory, region) {
phys_addr_t start = __pfn_to_phys(memblock_region_memory_base_pfn(region));
phys_addr_t end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1;
for_each_mem_range(i, &start, &end) {
unsigned long boot_alias_start;
/*
* In memblock, end points to the first byte after the
* range while in resourses, end points to the last byte in
* the range.
*/
res_end = end - 1;
/*
* Some systems have a special memory alias which is only
* used for booting. We need to advertise this region to
@@ -869,7 +875,7 @@ static void __init request_standard_resources(const struct machine_desc *mdesc)
__func__, sizeof(*res));
res->name = "System RAM (boot alias)";
res->start = boot_alias_start;
res->end = phys_to_idmap(end);
res->end = phys_to_idmap(res_end);
res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
request_resource(&iomem_resource, res);
}
@@ -880,7 +886,7 @@ static void __init request_standard_resources(const struct machine_desc *mdesc)
sizeof(*res));
res->name = "System RAM";
res->start = start;
res->end = end;
res->end = res_end;
res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
request_resource(&iomem_resource, res);

View File

@@ -299,16 +299,14 @@ free_memmap(unsigned long start_pfn, unsigned long end_pfn)
*/
static void __init free_unused_memmap(void)
{
unsigned long start, prev_end = 0;
struct memblock_region *reg;
unsigned long start, end, prev_end = 0;
int i;
/*
* This relies on each bank being in address order.
* The banks are sorted previously in bootmem_init().
*/
for_each_memblock(memory, reg) {
start = memblock_region_memory_base_pfn(reg);
for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, NULL) {
#ifdef CONFIG_SPARSEMEM
/*
* Take care not to free memmap entries that don't exist
@@ -336,8 +334,7 @@ static void __init free_unused_memmap(void)
* memmap entries are valid from the bank end aligned to
* MAX_ORDER_NR_PAGES.
*/
prev_end = ALIGN(memblock_region_memory_end_pfn(reg),
MAX_ORDER_NR_PAGES);
prev_end = ALIGN(end, MAX_ORDER_NR_PAGES);
}
#ifdef CONFIG_SPARSEMEM
@@ -347,61 +344,29 @@ static void __init free_unused_memmap(void)
#endif
}
#ifdef CONFIG_HIGHMEM
static inline void free_area_high(unsigned long pfn, unsigned long end)
{
for (; pfn < end; pfn++)
free_highmem_page(pfn_to_page(pfn));
}
#endif
static void __init free_highpages(void)
{
#ifdef CONFIG_HIGHMEM
unsigned long max_low = max_low_pfn;
struct memblock_region *mem, *res;
phys_addr_t range_start, range_end;
u64 i;
/* set highmem page free */
for_each_memblock(memory, mem) {
unsigned long start = memblock_region_memory_base_pfn(mem);
unsigned long end = memblock_region_memory_end_pfn(mem);
for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE,
&range_start, &range_end, NULL) {
unsigned long start = PHYS_PFN(range_start);
unsigned long end = PHYS_PFN(range_end);
/* Ignore complete lowmem entries */
if (end <= max_low)
continue;
if (memblock_is_nomap(mem))
continue;
/* Truncate partial highmem entries */
if (start < max_low)
start = max_low;
/* Find and exclude any reserved regions */
for_each_memblock(reserved, res) {
unsigned long res_start, res_end;
res_start = memblock_region_reserved_base_pfn(res);
res_end = memblock_region_reserved_end_pfn(res);
if (res_end < start)
continue;
if (res_start < start)
res_start = start;
if (res_start > end)
res_start = end;
if (res_end > end)
res_end = end;
if (res_start != start)
free_area_high(start, res_start);
start = res_end;
if (start == end)
break;
}
/* And now free anything which remains */
if (start < end)
free_area_high(start, end);
for (; start < end; start++)
free_highmem_page(pfn_to_page(start));
}
#endif
}

View File

@@ -1154,9 +1154,8 @@ phys_addr_t arm_lowmem_limit __initdata = 0;
void __init adjust_lowmem_bounds(void)
{
phys_addr_t memblock_limit = 0;
u64 vmalloc_limit;
struct memblock_region *reg;
phys_addr_t block_start, block_end, memblock_limit = 0;
u64 vmalloc_limit, i;
phys_addr_t lowmem_limit = 0;
/*
@@ -1172,26 +1171,18 @@ void __init adjust_lowmem_bounds(void)
* The first usable region must be PMD aligned. Mark its start
* as MEMBLOCK_NOMAP if it isn't
*/
for_each_memblock(memory, reg) {
if (!memblock_is_nomap(reg)) {
if (!IS_ALIGNED(reg->base, PMD_SIZE)) {
for_each_mem_range(i, &block_start, &block_end) {
if (!IS_ALIGNED(block_start, PMD_SIZE)) {
phys_addr_t len;
len = round_up(reg->base, PMD_SIZE) - reg->base;
memblock_mark_nomap(reg->base, len);
len = round_up(block_start, PMD_SIZE) - block_start;
memblock_mark_nomap(block_start, len);
}
break;
}
}
for_each_memblock(memory, reg) {
phys_addr_t block_start = reg->base;
phys_addr_t block_end = reg->base + reg->size;
if (memblock_is_nomap(reg))
continue;
if (reg->base < vmalloc_limit) {
for_each_mem_range(i, &block_start, &block_end) {
if (block_start < vmalloc_limit) {
if (block_end > lowmem_limit)
/*
* Compare as u64 to ensure vmalloc_limit does
@@ -1440,19 +1431,15 @@ static void __init kmap_init(void)
static void __init map_lowmem(void)
{
struct memblock_region *reg;
phys_addr_t kernel_x_start = round_down(__pa(KERNEL_START), SECTION_SIZE);
phys_addr_t kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE);
phys_addr_t start, end;
u64 i;
/* Map all the lowmem memory banks. */
for_each_memblock(memory, reg) {
phys_addr_t start = reg->base;
phys_addr_t end = start + reg->size;
for_each_mem_range(i, &start, &end) {
struct map_desc map;
if (memblock_is_nomap(reg))
continue;
if (end > arm_lowmem_limit)
end = arm_lowmem_limit;
if (start >= end)

View File

@@ -231,12 +231,12 @@ static int __init allocate_region(phys_addr_t base, phys_addr_t size,
void __init pmsav7_adjust_lowmem_bounds(void)
{
phys_addr_t specified_mem_size = 0, total_mem_size = 0;
struct memblock_region *reg;
bool first = true;
phys_addr_t mem_start;
phys_addr_t mem_end;
phys_addr_t reg_start, reg_end;
unsigned int mem_max_regions;
int num, i;
int num;
u64 i;
/* Free-up PMSAv7_PROBE_REGION */
mpu_min_region_order = __mpu_min_region_order();
@@ -262,20 +262,19 @@ void __init pmsav7_adjust_lowmem_bounds(void)
mem_max_regions -= num;
#endif
for_each_memblock(memory, reg) {
if (first) {
for_each_mem_range(i, &reg_start, &reg_end) {
if (i == 0) {
phys_addr_t phys_offset = PHYS_OFFSET;
/*
* Initially only use memory continuous from
* PHYS_OFFSET */
if (reg->base != phys_offset)
if (reg_start != phys_offset)
panic("First memory bank must be contiguous from PHYS_OFFSET");
mem_start = reg->base;
mem_end = reg->base + reg->size;
specified_mem_size = reg->size;
first = false;
mem_start = reg_start;
mem_end = reg_end;
specified_mem_size = mem_end - mem_start;
} else {
/*
* memblock auto merges contiguous blocks, remove
@@ -283,8 +282,8 @@ void __init pmsav7_adjust_lowmem_bounds(void)
* blocks separately while iterating)
*/
pr_notice("Ignoring RAM after %pa, memory at %pa ignored\n",
&mem_end, &reg->base);
memblock_remove(reg->base, 0 - reg->base);
&mem_end, &reg_start);
memblock_remove(reg_start, 0 - reg_start);
break;
}
}

View File

@@ -94,20 +94,19 @@ static __init bool is_region_fixed(int number)
void __init pmsav8_adjust_lowmem_bounds(void)
{
phys_addr_t mem_end;
struct memblock_region *reg;
bool first = true;
phys_addr_t reg_start, reg_end;
u64 i;
for_each_memblock(memory, reg) {
if (first) {
for_each_mem_range(i, &reg_start, &reg_end) {
if (i == 0) {
phys_addr_t phys_offset = PHYS_OFFSET;
/*
* Initially only use memory continuous from
* PHYS_OFFSET */
if (reg->base != phys_offset)
if (reg_start != phys_offset)
panic("First memory bank must be contiguous from PHYS_OFFSET");
mem_end = reg->base + reg->size;
first = false;
mem_end = reg_end;
} else {
/*
* memblock auto merges contiguous blocks, remove
@@ -115,8 +114,8 @@ void __init pmsav8_adjust_lowmem_bounds(void)
* blocks separately while iterating)
*/
pr_notice("Ignoring RAM after %pa, memory at %pa ignored\n",
&mem_end, &reg->base);
memblock_remove(reg->base, 0 - reg->base);
&mem_end, &reg_start);
memblock_remove(reg_start, 0 - reg_start);
break;
}
}

View File

@@ -25,11 +25,12 @@
unsigned long xen_get_swiotlb_free_pages(unsigned int order)
{
struct memblock_region *reg;
phys_addr_t base;
gfp_t flags = __GFP_NOWARN|__GFP_KSWAPD_RECLAIM;
u64 i;
for_each_memblock(memory, reg) {
if (reg->base < (phys_addr_t)0xffffffff) {
for_each_mem_range(i, &base, NULL) {
if (base < (phys_addr_t)0xffffffff) {
if (IS_ENABLED(CONFIG_ZONE_DMA32))
flags |= __GFP_DMA32;
else

View File

@@ -1041,19 +1041,6 @@ config ARCH_ENABLE_SPLIT_PMD_PTLOCK
config CC_HAVE_SHADOW_CALL_STACK
def_bool $(cc-option, -fsanitize=shadow-call-stack -ffixed-x18)
config SECCOMP
bool "Enable seccomp to safely compute untrusted bytecode"
help
This kernel feature is useful for number crunching applications
that may need to compute untrusted bytecode during their
execution. By using pipes or other transports made available to
the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in
their own address space using seccomp. Once seccomp is
enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
and the task is only allowed to execute a few safe syscalls
defined by each seccomp mode.
config PARAVIRT
bool "Enable paravirtualization code"
help
@@ -1612,8 +1599,6 @@ config ARM64_BTI_KERNEL
depends on CC_HAS_BRANCH_PROT_PAC_RET_BTI
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94697
depends on !CC_IS_GCC || GCC_VERSION >= 100100
# https://reviews.llvm.org/rGb8ae3fdfa579dbf366b1bb1cbfdbf8c51db7fa55
depends on !CC_IS_CLANG || CLANG_VERSION >= 100001
depends on !(CC_IS_CLANG && GCOV_KERNEL)
depends on (!FUNCTION_GRAPH_TRACER || DYNAMIC_FTRACE_WITH_REGS)
help

View File

@@ -215,8 +215,7 @@ static int prepare_elf_headers(void **addr, unsigned long *sz)
phys_addr_t start, end;
nr_ranges = 1; /* for exclusion of crashkernel region */
for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE,
MEMBLOCK_NONE, &start, &end, NULL)
for_each_mem_range(i, &start, &end)
nr_ranges++;
cmem = kmalloc(struct_size(cmem, ranges, nr_ranges), GFP_KERNEL);
@@ -225,8 +224,7 @@ static int prepare_elf_headers(void **addr, unsigned long *sz)
cmem->max_nr_ranges = nr_ranges;
cmem->nr_ranges = 0;
for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE,
MEMBLOCK_NONE, &start, &end, NULL) {
for_each_mem_range(i, &start, &end) {
cmem->ranges[cmem->nr_ranges].start = start;
cmem->ranges[cmem->nr_ranges].end = end - 1;
cmem->nr_ranges++;

View File

@@ -217,7 +217,7 @@ static void __init request_standard_resources(void)
if (!standard_resources)
panic("%s: Failed to allocate %zu bytes\n", __func__, res_size);
for_each_memblock(memory, region) {
for_each_mem_region(region) {
res = &standard_resources[i++];
if (memblock_is_nomap(region)) {
res->name = "reserved";
@@ -257,7 +257,7 @@ static int __init reserve_memblock_reserved_regions(void)
if (!memblock_is_region_reserved(mem->start, mem_size))
continue;
for_each_reserved_mem_region(j, &r_start, &r_end) {
for_each_reserved_mem_range(j, &r_start, &r_end) {
resource_size_t start, end;
start = max(PFN_PHYS(PFN_DOWN(r_start)), mem->start);

View File

@@ -43,13 +43,6 @@ ifneq ($(c-gettimeofday-y),)
CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y)
endif
# Clang versions less than 8 do not support -mcmodel=tiny
ifeq ($(CONFIG_CC_IS_CLANG), y)
ifeq ($(shell test $(CONFIG_CLANG_VERSION) -lt 80000; echo $$?),0)
CFLAGS_REMOVE_vgettimeofday.o += -mcmodel=tiny
endif
endif
# Disable gcov profiling for VDSO code
GCOV_PROFILE := n

View File

@@ -471,12 +471,10 @@ static inline void free_memmap(unsigned long start_pfn, unsigned long end_pfn)
*/
static void __init free_unused_memmap(void)
{
unsigned long start, prev_end = 0;
struct memblock_region *reg;
for_each_memblock(memory, reg) {
start = __phys_to_pfn(reg->base);
unsigned long start, end, prev_end = 0;
int i;
for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, NULL) {
#ifdef CONFIG_SPARSEMEM
/*
* Take care not to free memmap entries that don't exist due
@@ -496,8 +494,7 @@ static void __init free_unused_memmap(void)
* memmap entries are valid from the bank end aligned to
* MAX_ORDER_NR_PAGES.
*/
prev_end = ALIGN(__phys_to_pfn(reg->base + reg->size),
MAX_ORDER_NR_PAGES);
prev_end = ALIGN(end, MAX_ORDER_NR_PAGES);
}
#ifdef CONFIG_SPARSEMEM

View File

@@ -212,8 +212,8 @@ void __init kasan_init(void)
{
u64 kimg_shadow_start, kimg_shadow_end;
u64 mod_shadow_start, mod_shadow_end;
struct memblock_region *reg;
int i;
phys_addr_t pa_start, pa_end;
u64 i;
kimg_shadow_start = (u64)kasan_mem_to_shadow(_text) & PAGE_MASK;
kimg_shadow_end = PAGE_ALIGN((u64)kasan_mem_to_shadow(_end));
@@ -246,9 +246,9 @@ void __init kasan_init(void)
kasan_populate_early_shadow((void *)mod_shadow_end,
(void *)kimg_shadow_start);
for_each_memblock(memory, reg) {
void *start = (void *)__phys_to_virt(reg->base);
void *end = (void *)__phys_to_virt(reg->base + reg->size);
for_each_mem_range(i, &pa_start, &pa_end) {
void *start = (void *)__phys_to_virt(pa_start);
void *end = (void *)__phys_to_virt(pa_end);
if (start >= end)
break;

View File

@@ -473,8 +473,9 @@ static void __init map_mem(pgd_t *pgdp)
{
phys_addr_t kernel_start = __pa_symbol(_text);
phys_addr_t kernel_end = __pa_symbol(__init_begin);
struct memblock_region *reg;
phys_addr_t start, end;
int flags = 0;
u64 i;
if (rodata_full || debug_pagealloc_enabled())
flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
@@ -493,15 +494,9 @@ static void __init map_mem(pgd_t *pgdp)
#endif
/* map all the memory banks */
for_each_memblock(memory, reg) {
phys_addr_t start = reg->base;
phys_addr_t end = start + reg->size;
for_each_mem_range(i, &start, &end) {
if (start >= end)
break;
if (memblock_is_nomap(reg))
continue;
/*
* The linear map must allow allocation tags reading/writing
* if MTE is present. Otherwise, it has the same attributes as

View File

@@ -354,7 +354,7 @@ static int __init numa_register_nodes(void)
struct memblock_region *mblk;
/* Check that valid nid is set to memblks */
for_each_memblock(memory, mblk) {
for_each_mem_region(mblk) {
int mblk_nid = memblock_get_region_node(mblk);
if (mblk_nid == NUMA_NO_NODE || mblk_nid >= MAX_NUMNODES) {
@@ -427,19 +427,16 @@ out_free_distance:
*/
static int __init dummy_numa_init(void)
{
phys_addr_t start = memblock_start_of_DRAM();
phys_addr_t end = memblock_end_of_DRAM();
int ret;
struct memblock_region *mblk;
if (numa_off)
pr_info("NUMA disabled\n"); /* Forced off on command line. */
pr_info("Faking a node at [mem %#018Lx-%#018Lx]\n",
memblock_start_of_DRAM(), memblock_end_of_DRAM() - 1);
for_each_memblock(memory, mblk) {
ret = numa_add_memblk(0, mblk->base, mblk->base + mblk->size);
if (!ret)
continue;
pr_info("Faking a node at [mem %#018Lx-%#018Lx]\n", start, end - 1);
ret = numa_add_memblk(0, start, end);
if (ret) {
pr_err("NUMA init failed\n");
return ret;
}

View File

@@ -287,7 +287,8 @@ notrace void __init machine_init(unsigned long dt_ptr)
void __init setup_arch(char **cmdline_p)
{
struct memblock_region *reg;
phys_addr_t start, end;
u64 i;
printk(KERN_INFO "Initializing kernel\n");
@@ -351,9 +352,9 @@ void __init setup_arch(char **cmdline_p)
disable_caching(ram_start, ram_end - 1);
/* Set caching of external RAM used by Linux */
for_each_memblock(memory, reg)
enable_caching(CACHE_REGION_START(reg->base),
CACHE_REGION_START(reg->base + reg->size - 1));
for_each_mem_range(i, &start, &end)
enable_caching(CACHE_REGION_START(start),
CACHE_REGION_START(end - 1));
#ifdef CONFIG_BLK_DEV_INITRD
/*

View File

@@ -309,16 +309,3 @@ endmenu
source "arch/csky/Kconfig.platforms"
source "kernel/Kconfig.hz"
config SECCOMP
bool "Enable seccomp to safely compute untrusted bytecode"
help
This kernel feature is useful for number crunching applications
that may need to compute untrusted bytecode during their
execution. By using pipes or other transports made available to
the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in
their own address space using seccomp. Once seccomp is
enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
and the task is only allowed to execute a few safe syscalls
defined by each seccomp mode.

View File

@@ -74,17 +74,15 @@ static void __init bootmem_init(void)
memory_end = memory_start = 0;
/* Find main memory where is the kernel */
for_each_memblock(memory, region) {
memory_start = region->base;
memory_end = region->base + region->size;
}
memory_start = memblock_start_of_DRAM();
memory_end = memblock_end_of_DRAM();
if (!memory_end)
panic("No memory!");
/* setup bootmem globals (we use no_bootmem, but mm still depends on this) */
min_low_pfn = PFN_UP(memory_start);
max_low_pfn = PFN_DOWN(memblock_end_of_DRAM());
max_low_pfn = PFN_DOWN(memory_end);
max_pfn = max_low_pfn;
memblock_reserve(__pa(_stext), _end - _stext);

View File

@@ -26,6 +26,7 @@ config MICROBLAZE
select GENERIC_SCHED_CLOCK
select HAVE_ARCH_HASH
select HAVE_ARCH_KGDB
select HAVE_ARCH_SECCOMP
select HAVE_DEBUG_KMEMLEAK
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
@@ -120,23 +121,6 @@ config CMDLINE_FORCE
Set this to have arguments from the default kernel command string
override those passed by the boot loader.
config SECCOMP
bool "Enable seccomp to safely compute untrusted bytecode"
depends on PROC_FS
default y
help
This kernel feature is useful for number crunching applications
that may need to compute untrusted bytecode during their
execution. By using pipes or other transports made available to
the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in
their own address space using seccomp. Once seccomp is
enabled via /proc/<pid>/seccomp, it cannot be disabled
and the task is only allowed to execute a few safe syscalls
defined by each seccomp mode.
If unsure, say Y. Only embedded should say N here.
endmenu
menu "Kernel features"

View File

@@ -108,15 +108,15 @@ static void __init paging_init(void)
void __init setup_memory(void)
{
struct memblock_region *reg;
#ifndef CONFIG_MMU
u32 kernel_align_start, kernel_align_size;
phys_addr_t start, end;
u64 i;
/* Find main memory where is the kernel */
for_each_memblock(memory, reg) {
memory_start = (u32)reg->base;
lowmem_size = reg->size;
for_each_mem_range(i, &start, &end) {
memory_start = start;
lowmem_size = end - start;
if ((memory_start <= (u32)_text) &&
((u32)_text <= (memory_start + lowmem_size - 1))) {
memory_size = lowmem_size;
@@ -164,17 +164,6 @@ void __init setup_memory(void)
pr_info("%s: max_low_pfn: %#lx\n", __func__, max_low_pfn);
pr_info("%s: max_pfn: %#lx\n", __func__, max_pfn);
/* Add active regions with valid PFNs */
for_each_memblock(memory, reg) {
unsigned long start_pfn, end_pfn;
start_pfn = memblock_region_memory_base_pfn(reg);
end_pfn = memblock_region_memory_end_pfn(reg);
memblock_set_node(start_pfn << PAGE_SHIFT,
(end_pfn - start_pfn) << PAGE_SHIFT,
&memblock.memory, 0);
}
paging_init();
}

View File

@@ -3006,23 +3006,6 @@ config PHYSICAL_START
specified in the "crashkernel=YM@XM" command line boot parameter
passed to the panic-ed kernel).
config SECCOMP
bool "Enable seccomp to safely compute untrusted bytecode"
depends on PROC_FS
default y
help
This kernel feature is useful for number crunching applications
that may need to compute untrusted bytecode during their
execution. By using pipes or other transports made available to
the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in
their own address space using seccomp. Once seccomp is
enabled via /proc/<pid>/seccomp, it cannot be disabled
and the task is only allowed to execute a few safe syscalls
defined by each seccomp mode.
If unsure, say Y. Only embedded should say N here.
config MIPS_O32_FP64_SUPPORT
bool "Support for O32 binaries using 64-bit FP" if !CPU_MIPSR6
depends on 32BIT || MIPS32_O32

View File

@@ -190,25 +190,25 @@ char *octeon_swiotlb;
void __init plat_swiotlb_setup(void)
{
struct memblock_region *mem;
phys_addr_t start, end;
phys_addr_t max_addr;
phys_addr_t addr_size;
size_t swiotlbsize;
unsigned long swiotlb_nslabs;
u64 i;
max_addr = 0;
addr_size = 0;
for_each_memblock(memory, mem) {
for_each_mem_range(i, &start, &end) {
/* These addresses map low for PCI. */
if (mem->base > 0x410000000ull && !OCTEON_IS_OCTEON2())
if (start > 0x410000000ull && !OCTEON_IS_OCTEON2())
continue;
addr_size += mem->size;
if (max_addr < mem->base + mem->size)
max_addr = mem->base + mem->size;
addr_size += (end - start);
if (max_addr < end)
max_addr = end;
}
swiotlbsize = PAGE_SIZE;

View File

@@ -300,8 +300,9 @@ static void __init bootmem_init(void)
static void __init bootmem_init(void)
{
struct memblock_region *mem;
phys_addr_t ramstart, ramend;
phys_addr_t start, end;
u64 i;
ramstart = memblock_start_of_DRAM();
ramend = memblock_end_of_DRAM();
@@ -338,18 +339,13 @@ static void __init bootmem_init(void)
min_low_pfn = ARCH_PFN_OFFSET;
max_pfn = PFN_DOWN(ramend);
for_each_memblock(memory, mem) {
unsigned long start = memblock_region_memory_base_pfn(mem);
unsigned long end = memblock_region_memory_end_pfn(mem);
for_each_mem_range(i, &start, &end) {
/*
* Skip highmem here so we get an accurate max_low_pfn if low
* memory stops short of high memory.
* If the region overlaps HIGHMEM_START, end is clipped so
* max_pfn excludes the highmem portion.
*/
if (memblock_is_nomap(mem))
continue;
if (start >= PFN_DOWN(HIGHMEM_START))
continue;
if (end > PFN_DOWN(HIGHMEM_START))
@@ -450,13 +446,12 @@ early_param("memmap", early_parse_memmap);
unsigned long setup_elfcorehdr, setup_elfcorehdr_size;
static int __init early_parse_elfcorehdr(char *p)
{
struct memblock_region *mem;
phys_addr_t start, end;
u64 i;
setup_elfcorehdr = memparse(p, &p);
for_each_memblock(memory, mem) {
unsigned long start = mem->base;
unsigned long end = start + mem->size;
for_each_mem_range(i, &start, &end) {
if (setup_elfcorehdr >= start && setup_elfcorehdr < end) {
/*
* Reserve from the elf core header to the end of
@@ -720,7 +715,8 @@ static void __init arch_mem_init(char **cmdline_p)
static void __init resource_init(void)
{
struct memblock_region *region;
phys_addr_t start, end;
u64 i;
if (UNCAC_BASE != IO_BASE)
return;
@@ -732,9 +728,7 @@ static void __init resource_init(void)
bss_resource.start = __pa_symbol(&__bss_start);
bss_resource.end = __pa_symbol(&__bss_stop) - 1;
for_each_memblock(memory, region) {
phys_addr_t start = PFN_PHYS(memblock_region_memory_base_pfn(region));
phys_addr_t end = PFN_PHYS(memblock_region_memory_end_pfn(region)) - 1;
for_each_mem_range(i, &start, &end) {
struct resource *res;
res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES);
@@ -743,7 +737,12 @@ static void __init resource_init(void)
sizeof(struct resource));
res->start = start;
res->end = end;
/*
* In memblock, end points to the first byte after the
* range while in resourses, end points to the last byte in
* the range.
*/
res->end = end - 1;
res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
res->name = "System RAM";

View File

@@ -70,7 +70,7 @@ static void nlm_fixup_mem(void)
const int pref_backup = 512;
struct memblock_region *mem;
for_each_memblock(memory, mem) {
for_each_mem_region(mem) {
memblock_remove(mem->base + mem->size - pref_backup,
pref_backup);
}

View File

@@ -249,12 +249,8 @@ static void __init setup_memory(void)
memory_end = memory_start = 0;
/* Find main memory where is the kernel */
for_each_memblock(memory, region) {
memory_start = region->base;
memory_end = region->base + region->size;
pr_info("%s: Memory: 0x%x-0x%x\n", __func__,
memory_start, memory_end);
}
memory_start = memblock_start_of_DRAM();
memory_end = memblock_end_of_DRAM();
if (!memory_end) {
panic("No memory!");

View File

@@ -48,17 +48,12 @@ static void __init setup_memory(void)
unsigned long ram_start_pfn;
unsigned long ram_end_pfn;
phys_addr_t memory_start, memory_end;
struct memblock_region *region;
memory_end = memory_start = 0;
/* Find main memory where is the kernel, we assume its the only one */
for_each_memblock(memory, region) {
memory_start = region->base;
memory_end = region->base + region->size;
printk(KERN_INFO "%s: Memory: 0x%x-0x%x\n", __func__,
memory_start, memory_end);
}
memory_start = memblock_start_of_DRAM();
memory_end = memblock_end_of_DRAM();
if (!memory_end) {
panic("No memory!");

View File

@@ -64,6 +64,7 @@ extern const char _s_kernel_ro[], _e_kernel_ro[];
*/
static void __init map_ram(void)
{
phys_addr_t start, end;
unsigned long v, p, e;
pgprot_t prot;
pgd_t *pge;
@@ -71,6 +72,7 @@ static void __init map_ram(void)
pud_t *pue;
pmd_t *pme;
pte_t *pte;
u64 i;
/* These mark extents of read-only kernel pages...
* ...from vmlinux.lds.S
*/
@@ -78,9 +80,9 @@ static void __init map_ram(void)
v = PAGE_OFFSET;
for_each_memblock(memory, region) {
p = (u32) region->base & PAGE_MASK;
e = p + (u32) region->size;
for_each_mem_range(i, &start, &end) {
p = (u32) start & PAGE_MASK;
e = (u32) end;
v = (u32) __va(p);
pge = pgd_offset_k(v);

View File

@@ -378,19 +378,3 @@ endmenu
source "drivers/parisc/Kconfig"
config SECCOMP
def_bool y
prompt "Enable seccomp to safely compute untrusted bytecode"
help
This kernel feature is useful for number crunching applications
that may need to compute untrusted bytecode during their
execution. By using pipes or other transports made available to
the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in
their own address space using seccomp. Once seccomp is
enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
and the task is only allowed to execute a few safe syscalls
defined by each seccomp mode.
If unsure, say Y. Only embedded should say N here.

View File

@@ -946,23 +946,6 @@ config ARCH_WANTS_FREEZER_CONTROL
source "kernel/power/Kconfig"
config SECCOMP
bool "Enable seccomp to safely compute untrusted bytecode"
depends on PROC_FS
default y
help
This kernel feature is useful for number crunching applications
that may need to compute untrusted bytecode during their
execution. By using pipes or other transports made available to
the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in
their own address space using seccomp. Once seccomp is
enabled via /proc/<pid>/seccomp, it cannot be disabled
and the task is only allowed to execute a few safe syscalls
defined by each seccomp mode.
If unsure, say Y. Only embedded should say N here.
config PPC_MEM_KEYS
prompt "PowerPC Memory Protection Keys"
def_bool y

View File

@@ -191,13 +191,13 @@ int is_fadump_active(void)
*/
static bool is_fadump_mem_area_contiguous(u64 d_start, u64 d_end)
{
struct memblock_region *reg;
phys_addr_t reg_start, reg_end;
bool ret = false;
u64 start, end;
u64 i, start, end;
for_each_memblock(memory, reg) {
start = max_t(u64, d_start, reg->base);
end = min_t(u64, d_end, (reg->base + reg->size));
for_each_mem_range(i, &reg_start, &reg_end) {
start = max_t(u64, d_start, reg_start);
end = min_t(u64, d_end, reg_end);
if (d_start < end) {
/* Memory hole from d_start to start */
if (start > d_start)
@@ -422,34 +422,34 @@ static int __init add_boot_mem_regions(unsigned long mstart,
static int __init fadump_get_boot_mem_regions(void)
{
unsigned long base, size, cur_size, hole_size, last_end;
unsigned long size, cur_size, hole_size, last_end;
unsigned long mem_size = fw_dump.boot_memory_size;
struct memblock_region *reg;
phys_addr_t reg_start, reg_end;
int ret = 1;
u64 i;
fw_dump.boot_mem_regs_cnt = 0;
last_end = 0;
hole_size = 0;
cur_size = 0;
for_each_memblock(memory, reg) {
base = reg->base;
size = reg->size;
hole_size += (base - last_end);
for_each_mem_range(i, &reg_start, &reg_end) {
size = reg_end - reg_start;
hole_size += (reg_start - last_end);
if ((cur_size + size) >= mem_size) {
size = (mem_size - cur_size);
ret = add_boot_mem_regions(base, size);
ret = add_boot_mem_regions(reg_start, size);
break;
}
mem_size -= size;
cur_size += size;
ret = add_boot_mem_regions(base, size);
ret = add_boot_mem_regions(reg_start, size);
if (!ret)
break;
last_end = base + size;
last_end = reg_end;
}
fw_dump.boot_mem_top = PAGE_ALIGN(fw_dump.boot_memory_size + hole_size);
@@ -985,9 +985,8 @@ static int fadump_init_elfcore_header(char *bufp)
*/
static int fadump_setup_crash_memory_ranges(void)
{
struct memblock_region *reg;
u64 start, end;
int i, ret;
u64 i, start, end;
int ret;
pr_debug("Setup crash memory ranges.\n");
crash_mrange_info.mem_range_cnt = 0;
@@ -1005,10 +1004,7 @@ static int fadump_setup_crash_memory_ranges(void)
return ret;
}
for_each_memblock(memory, reg) {
start = (u64)reg->base;
end = start + (u64)reg->size;
for_each_mem_range(i, &start, &end) {
/*
* skip the memory chunk that is already added
* (0 through boot_memory_top).
@@ -1242,14 +1238,17 @@ static void fadump_free_reserved_memory(unsigned long start_pfn,
*/
static void fadump_release_reserved_area(u64 start, u64 end)
{
unsigned long reg_spfn, reg_epfn;
u64 tstart, tend, spfn, epfn;
struct memblock_region *reg;
int i;
spfn = PHYS_PFN(start);
epfn = PHYS_PFN(end);
for_each_memblock(memory, reg) {
tstart = max_t(u64, spfn, memblock_region_memory_base_pfn(reg));
tend = min_t(u64, epfn, memblock_region_memory_end_pfn(reg));
for_each_mem_pfn_range(i, MAX_NUMNODES, &reg_spfn, &reg_epfn, NULL) {
tstart = max_t(u64, spfn, reg_spfn);
tend = min_t(u64, epfn, reg_epfn);
if (tstart < tend) {
fadump_free_reserved_memory(tstart, tend);
@@ -1684,12 +1683,10 @@ int __init fadump_reserve_mem(void)
/* Preserve everything above the base address */
static void __init fadump_reserve_crash_area(u64 base)
{
struct memblock_region *reg;
u64 mstart, msize;
u64 i, mstart, mend, msize;
for_each_memblock(memory, reg) {
mstart = reg->base;
msize = reg->size;
for_each_mem_range(i, &mstart, &mend) {
msize = mend - mstart;
if ((mstart + msize) < base)
continue;

View File

@@ -138,15 +138,13 @@ out:
*/
static int get_crash_memory_ranges(struct crash_mem **mem_ranges)
{
struct memblock_region *reg;
phys_addr_t base, end;
struct crash_mem *tmem;
u64 i;
int ret;
for_each_memblock(memory, reg) {
u64 base, size;
base = (u64)reg->base;
size = (u64)reg->size;
for_each_mem_range(i, &base, &end) {
u64 size = end - base;
/* Skip backup memory region, which needs a separate entry */
if (base == BACKUP_SRC_START) {
@@ -250,8 +248,7 @@ static int __locate_mem_hole_top_down(struct kexec_buf *kbuf,
phys_addr_t start, end;
u64 i;
for_each_mem_range_rev(i, &memblock.memory, NULL, NUMA_NO_NODE,
MEMBLOCK_NONE, &start, &end, NULL) {
for_each_mem_range_rev(i, &start, &end) {
/*
* memblock uses [start, end) convention while it is
* [start, end] here. Fix the off-by-one to have the
@@ -350,8 +347,7 @@ static int __locate_mem_hole_bottom_up(struct kexec_buf *kbuf,
phys_addr_t start, end;
u64 i;
for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE,
MEMBLOCK_NONE, &start, &end, NULL) {
for_each_mem_range(i, &start, &end) {
/*
* memblock uses [start, end) convention while it is
* [start, end] here. Fix the off-by-one to have the

View File

@@ -95,23 +95,15 @@ EXPORT_SYMBOL_GPL(kvm_free_hpt_cma);
void __init kvm_cma_reserve(void)
{
unsigned long align_size;
struct memblock_region *reg;
phys_addr_t selected_size = 0;
phys_addr_t selected_size;
/*
* We need CMA reservation only when we are in HV mode
*/
if (!cpu_has_feature(CPU_FTR_HVMODE))
return;
/*
* We cannot use memblock_phys_mem_size() here, because
* memblock_analyze() has not been called yet.
*/
for_each_memblock(memory, reg)
selected_size += memblock_region_memory_end_pfn(reg) -
memblock_region_memory_base_pfn(reg);
selected_size = (selected_size * kvm_cma_resv_ratio / 100) << PAGE_SHIFT;
selected_size = PAGE_ALIGN(memblock_phys_mem_size() * kvm_cma_resv_ratio / 100);
if (selected_size) {
pr_info("%s: reserving %ld MiB for global area\n", __func__,
(unsigned long)selected_size / SZ_1M);

View File

@@ -687,9 +687,9 @@ static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm)
struct kvmppc_uvmem_page_pvt *pvt;
unsigned long pfn_last, pfn_first;
pfn_first = kvmppc_uvmem_pgmap.res.start >> PAGE_SHIFT;
pfn_first = kvmppc_uvmem_pgmap.range.start >> PAGE_SHIFT;
pfn_last = pfn_first +
(resource_size(&kvmppc_uvmem_pgmap.res) >> PAGE_SHIFT);
(range_len(&kvmppc_uvmem_pgmap.range) >> PAGE_SHIFT);
spin_lock(&kvmppc_uvmem_bitmap_lock);
bit = find_first_zero_bit(kvmppc_uvmem_bitmap,
@@ -1007,7 +1007,7 @@ static vm_fault_t kvmppc_uvmem_migrate_to_ram(struct vm_fault *vmf)
static void kvmppc_uvmem_page_free(struct page *page)
{
unsigned long pfn = page_to_pfn(page) -
(kvmppc_uvmem_pgmap.res.start >> PAGE_SHIFT);
(kvmppc_uvmem_pgmap.range.start >> PAGE_SHIFT);
struct kvmppc_uvmem_page_pvt *pvt;
spin_lock(&kvmppc_uvmem_bitmap_lock);
@@ -1170,7 +1170,9 @@ int kvmppc_uvmem_init(void)
}
kvmppc_uvmem_pgmap.type = MEMORY_DEVICE_PRIVATE;
kvmppc_uvmem_pgmap.res = *res;
kvmppc_uvmem_pgmap.range.start = res->start;
kvmppc_uvmem_pgmap.range.end = res->end;
kvmppc_uvmem_pgmap.nr_range = 1;
kvmppc_uvmem_pgmap.ops = &kvmppc_uvmem_ops;
/* just one global instance: */
kvmppc_uvmem_pgmap.owner = &kvmppc_uvmem_pgmap;
@@ -1205,7 +1207,7 @@ void kvmppc_uvmem_free(void)
return;
memunmap_pages(&kvmppc_uvmem_pgmap);
release_mem_region(kvmppc_uvmem_pgmap.res.start,
resource_size(&kvmppc_uvmem_pgmap.res));
release_mem_region(kvmppc_uvmem_pgmap.range.start,
range_len(&kvmppc_uvmem_pgmap.range));
kfree(kvmppc_uvmem_bitmap);
}

View File

@@ -867,8 +867,8 @@ static void __init htab_initialize(void)
unsigned long table;
unsigned long pteg_count;
unsigned long prot;
unsigned long base = 0, size = 0;
struct memblock_region *reg;
phys_addr_t base = 0, size = 0, end;
u64 i;
DBG(" -> htab_initialize()\n");
@@ -960,9 +960,9 @@ static void __init htab_initialize(void)
#endif /* CONFIG_DEBUG_PAGEALLOC */
/* create bolted the linear mapping in the hash table */
for_each_memblock(memory, reg) {
base = (unsigned long)__va(reg->base);
size = reg->size;
for_each_mem_range(i, &base, &end) {
size = end - base;
base = (unsigned long)__va(base);
DBG("creating mapping for region: %lx..%lx (prot: %lx)\n",
base, size, prot);

View File

@@ -329,7 +329,8 @@ static int __meminit create_physical_mapping(unsigned long start,
static void __init radix_init_pgtable(void)
{
unsigned long rts_field;
struct memblock_region *reg;
phys_addr_t start, end;
u64 i;
/* We don't support slb for radix */
mmu_slb_size = 0;
@@ -337,20 +338,19 @@ static void __init radix_init_pgtable(void)
/*
* Create the linear mapping
*/
for_each_memblock(memory, reg) {
for_each_mem_range(i, &start, &end) {
/*
* The memblock allocator is up at this point, so the
* page tables will be allocated within the range. No
* need or a node (which we don't have yet).
*/
if ((reg->base + reg->size) >= RADIX_VMALLOC_START) {
if (end >= RADIX_VMALLOC_START) {
pr_warn("Outside the supported range\n");
continue;
}
WARN_ON(create_physical_mapping(reg->base,
reg->base + reg->size,
WARN_ON(create_physical_mapping(start, end,
radix_mem_block_size,
-1, PAGE_KERNEL));
}

View File

@@ -138,11 +138,11 @@ void __init kasan_mmu_init(void)
void __init kasan_init(void)
{
struct memblock_region *reg;
phys_addr_t base, end;
u64 i;
for_each_memblock(memory, reg) {
phys_addr_t base = reg->base;
phys_addr_t top = min(base + reg->size, total_lowmem);
for_each_mem_range(i, &base, &end) {
phys_addr_t top = min(end, total_lowmem);
int ret;
if (base >= top)

View File

@@ -184,15 +184,16 @@ void __init initmem_init(void)
/* mark pages that don't exist as nosave */
static int __init mark_nonram_nosave(void)
{
struct memblock_region *reg, *prev = NULL;
unsigned long spfn, epfn, prev = 0;
int i;
for_each_memblock(memory, reg) {
if (prev &&
memblock_region_memory_end_pfn(prev) < memblock_region_memory_base_pfn(reg))
register_nosave_region(memblock_region_memory_end_pfn(prev),
memblock_region_memory_base_pfn(reg));
prev = reg;
for_each_mem_pfn_range(i, MAX_NUMNODES, &spfn, &epfn, NULL) {
if (prev && prev < spfn)
register_nosave_region(prev, spfn);
prev = epfn;
}
return 0;
}
#else /* CONFIG_NEED_MULTIPLE_NODES */
@@ -584,20 +585,24 @@ void flush_icache_user_page(struct vm_area_struct *vma, struct page *page,
*/
static int __init add_system_ram_resources(void)
{
struct memblock_region *reg;
phys_addr_t start, end;
u64 i;
for_each_memblock(memory, reg) {
for_each_mem_range(i, &start, &end) {
struct resource *res;
unsigned long base = reg->base;
unsigned long size = reg->size;
res = kzalloc(sizeof(struct resource), GFP_KERNEL);
WARN_ON(!res);
if (res) {
res->name = "System RAM";
res->start = base;
res->end = base + size - 1;
res->start = start;
/*
* In memblock, end points to the first byte after
* the range while in resourses, end points to the
* last byte in the range.
*/
res->end = end - 1;
res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
WARN_ON(request_resource(&iomem_resource, res) < 0);
}

View File

@@ -804,17 +804,14 @@ static void __init setup_nonnuma(void)
unsigned long total_ram = memblock_phys_mem_size();
unsigned long start_pfn, end_pfn;
unsigned int nid = 0;
struct memblock_region *reg;
int i;
printk(KERN_DEBUG "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
top_of_ram, total_ram);
printk(KERN_DEBUG "Memory hole size: %ldMB\n",
(top_of_ram - total_ram) >> 20);
for_each_memblock(memory, reg) {
start_pfn = memblock_region_memory_base_pfn(reg);
end_pfn = memblock_region_memory_end_pfn(reg);
for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) {
fake_numa_create_new_node(end_pfn, &nid);
memblock_set_node(PFN_PHYS(start_pfn),
PFN_PHYS(end_pfn - start_pfn),

View File

@@ -123,11 +123,11 @@ static void __init __mapin_ram_chunk(unsigned long offset, unsigned long top)
void __init mapin_ram(void)
{
struct memblock_region *reg;
phys_addr_t base, end;
u64 i;
for_each_memblock(memory, reg) {
phys_addr_t base = reg->base;
phys_addr_t top = min(base + reg->size, total_lowmem);
for_each_mem_range(i, &base, &end) {
phys_addr_t top = min(end, total_lowmem);
if (base >= top)
continue;

View File

@@ -334,19 +334,6 @@ menu "Kernel features"
source "kernel/Kconfig.hz"
config SECCOMP
bool "Enable seccomp to safely compute untrusted bytecode"
help
This kernel feature is useful for number crunching applications
that may need to compute untrusted bytecode during their
execution. By using pipes or other transports made available to
the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in
their own address space using seccomp. Once seccomp is
enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
and the task is only allowed to execute a few safe syscalls
defined by each seccomp mode.
config RISCV_SBI_V01
bool "SBI v0.1 support"
default y

View File

@@ -145,21 +145,21 @@ static phys_addr_t dtb_early_pa __initdata;
void __init setup_bootmem(void)
{
struct memblock_region *reg;
phys_addr_t mem_size = 0;
phys_addr_t total_mem = 0;
phys_addr_t mem_start, end = 0;
phys_addr_t mem_start, start, end = 0;
phys_addr_t vmlinux_end = __pa_symbol(&_end);
phys_addr_t vmlinux_start = __pa_symbol(&_start);
u64 i;
/* Find the memory region containing the kernel */
for_each_memblock(memory, reg) {
end = reg->base + reg->size;
for_each_mem_range(i, &start, &end) {
phys_addr_t size = end - start;
if (!total_mem)
mem_start = reg->base;
if (reg->base <= vmlinux_start && vmlinux_end <= end)
BUG_ON(reg->size == 0);
total_mem = total_mem + reg->size;
mem_start = start;
if (start <= vmlinux_start && vmlinux_end <= end)
BUG_ON(size == 0);
total_mem = total_mem + size;
}
/*
@@ -191,15 +191,6 @@ void __init setup_bootmem(void)
early_init_fdt_scan_reserved_mem();
memblock_allow_resize();
memblock_dump_all();
for_each_memblock(memory, reg) {
unsigned long start_pfn = memblock_region_memory_base_pfn(reg);
unsigned long end_pfn = memblock_region_memory_end_pfn(reg);
memblock_set_node(PFN_PHYS(start_pfn),
PFN_PHYS(end_pfn - start_pfn),
&memblock.memory, 0);
}
}
#ifdef CONFIG_MMU
@@ -464,7 +455,7 @@ static void __init setup_vm_final(void)
{
uintptr_t va, map_size;
phys_addr_t pa, start, end;
struct memblock_region *reg;
u64 i;
/* Set mmu_enabled flag */
mmu_enabled = true;
@@ -475,14 +466,9 @@ static void __init setup_vm_final(void)
PGDIR_SIZE, PAGE_TABLE);
/* Map all memory banks */
for_each_memblock(memory, reg) {
start = reg->base;
end = start + reg->size;
for_each_mem_range(i, &start, &end) {
if (start >= end)
break;
if (memblock_is_nomap(reg))
continue;
if (start <= __pa(PAGE_OFFSET) &&
__pa(PAGE_OFFSET) < end)
start = __pa(PAGE_OFFSET);
@@ -545,7 +531,7 @@ static void __init resource_init(void)
{
struct memblock_region *region;
for_each_memblock(memory, region) {
for_each_mem_region(region) {
struct resource *res;
res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES);

View File

@@ -85,16 +85,16 @@ static void __init populate(void *start, void *end)
void __init kasan_init(void)
{
struct memblock_region *reg;
unsigned long i;
phys_addr_t _start, _end;
u64 i;
kasan_populate_early_shadow((void *)KASAN_SHADOW_START,
(void *)kasan_mem_to_shadow((void *)
VMALLOC_END));
for_each_memblock(memory, reg) {
void *start = (void *)__va(reg->base);
void *end = (void *)__va(reg->base + reg->size);
for_each_mem_range(i, &_start, &_end) {
void *start = (void *)_start;
void *end = (void *)_end;
if (start >= end)
break;

View File

@@ -792,23 +792,6 @@ config CRASH_DUMP
endmenu
config SECCOMP
def_bool y
prompt "Enable seccomp to safely compute untrusted bytecode"
depends on PROC_FS
help
This kernel feature is useful for number crunching applications
that may need to compute untrusted bytecode during their
execution. By using pipes or other transports made available to
the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in
their own address space using seccomp. Once seccomp is
enabled via /proc/<pid>/seccomp, it cannot be disabled
and the task is only allowed to execute a few safe syscalls
defined by each seccomp mode.
If unsure, say Y.
config CCW
def_bool y

View File

@@ -484,8 +484,9 @@ static struct resource __initdata *standard_resources[] = {
static void __init setup_resources(void)
{
struct resource *res, *std_res, *sub_res;
struct memblock_region *reg;
phys_addr_t start, end;
int j;
u64 i;
code_resource.start = (unsigned long) _text;
code_resource.end = (unsigned long) _etext - 1;
@@ -494,7 +495,7 @@ static void __init setup_resources(void)
bss_resource.start = (unsigned long) __bss_start;
bss_resource.end = (unsigned long) __bss_stop - 1;
for_each_memblock(memory, reg) {
for_each_mem_range(i, &start, &end) {
res = memblock_alloc(sizeof(*res), 8);
if (!res)
panic("%s: Failed to allocate %zu bytes align=0x%x\n",
@@ -502,8 +503,13 @@ static void __init setup_resources(void)
res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
res->name = "System RAM";
res->start = reg->base;
res->end = reg->base + reg->size - 1;
res->start = start;
/*
* In memblock, end points to the first byte after the
* range while in resourses, end points to the last byte in
* the range.
*/
res->end = end - 1;
request_resource(&iomem_resource, res);
for (j = 0; j < ARRAY_SIZE(standard_resources); j++) {
@@ -776,7 +782,7 @@ static void __init memblock_add_mem_detect_info(void)
unsigned long start, end;
int i;
memblock_dbg("physmem info source: %s (%hhd)\n",
pr_debug("physmem info source: %s (%hhd)\n",
get_mem_info_source(), mem_detect.info_source);
/* keep memblock lists close to the kernel */
memblock_set_bottom_up(true);
@@ -819,14 +825,15 @@ static void __init reserve_kernel(void)
static void __init setup_memory(void)
{
struct memblock_region *reg;
phys_addr_t start, end;
u64 i;
/*
* Init storage key for present memory
*/
for_each_memblock(memory, reg) {
storage_key_init_range(reg->base, reg->base + reg->size);
}
for_each_mem_range(i, &start, &end)
storage_key_init_range(start, end);
psw_set_key(PAGE_DEFAULT_KEY);
/* Only cosmetics */

View File

@@ -183,9 +183,9 @@ static void mark_kernel_pgd(void)
void __init cmma_init_nodat(void)
{
struct memblock_region *reg;
struct page *page;
unsigned long start, end, ix;
int i;
if (cmma_flag < 2)
return;
@@ -193,9 +193,7 @@ void __init cmma_init_nodat(void)
mark_kernel_pgd();
/* Set all kernel pages not used for page tables to stable/no-dat */
for_each_memblock(memory, reg) {
start = memblock_region_memory_base_pfn(reg);
end = memblock_region_memory_end_pfn(reg);
for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, NULL) {
page = pfn_to_page(start);
for (ix = start; ix < end; ix++, page++) {
if (__test_and_clear_bit(PG_arch_1, &page->flags))

View File

@@ -555,10 +555,11 @@ int vmem_add_mapping(unsigned long start, unsigned long size)
*/
void __init vmem_map_init(void)
{
struct memblock_region *reg;
phys_addr_t base, end;
u64 i;
for_each_memblock(memory, reg)
vmem_add_range(reg->base, reg->size);
for_each_mem_range(i, &base, &end)
vmem_add_range(base, end - base);
__set_memory((unsigned long)_stext,
(unsigned long)(_etext - _stext) >> PAGE_SHIFT,
SET_MEMORY_RO | SET_MEMORY_X);

View File

@@ -600,22 +600,6 @@ config PHYSICAL_START
where the fail safe kernel needs to run at a different address
than the panic-ed kernel.
config SECCOMP
bool "Enable seccomp to safely compute untrusted bytecode"
depends on PROC_FS
help
This kernel feature is useful for number crunching applications
that may need to compute untrusted bytecode during their
execution. By using pipes or other transports made available to
the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in
their own address space using seccomp. Once seccomp is
enabled via prctl, it cannot be disabled and the task is only
allowed to execute a few safe syscalls defined by each seccomp
mode.
If unsure, say N.
config SMP
bool "Symmetric multi-processing support"
depends on SYS_SUPPORTS_SMP

View File

@@ -226,15 +226,12 @@ void __init allocate_pgdat(unsigned int nid)
static void __init do_init_bootmem(void)
{
struct memblock_region *reg;
unsigned long start_pfn, end_pfn;
int i;
/* Add active regions with valid PFNs. */
for_each_memblock(memory, reg) {
unsigned long start_pfn, end_pfn;
start_pfn = memblock_region_memory_base_pfn(reg);
end_pfn = memblock_region_memory_end_pfn(reg);
for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL)
__add_active_range(0, start_pfn, end_pfn);
}
/* All of system RAM sits in node 0 for the non-NUMA case */
allocate_pgdat(0);

View File

@@ -23,6 +23,7 @@ config SPARC
select HAVE_OPROFILE
select HAVE_ARCH_KGDB if !SMP || SPARC64
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_SECCOMP if SPARC64
select HAVE_EXIT_THREAD
select HAVE_PCI
select SYSCTL_EXCEPTION_TRACE
@@ -227,23 +228,6 @@ config EARLYFB
help
Say Y here to enable a faster early framebuffer boot console.
config SECCOMP
bool "Enable seccomp to safely compute untrusted bytecode"
depends on SPARC64 && PROC_FS
default y
help
This kernel feature is useful for number crunching applications
that may need to compute untrusted bytecode during their
execution. By using pipes or other transports made available to
the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in
their own address space using seccomp. Once seccomp is
enabled via /proc/<pid>/seccomp, it cannot be disabled
and the task is only allowed to execute a few safe syscalls
defined by each seccomp mode.
If unsure, say Y. Only embedded should say N here.
config HOTPLUG_CPU
bool "Support for hot-pluggable CPUs"
depends on SPARC64 && SMP

View File

@@ -1192,18 +1192,14 @@ int of_node_to_nid(struct device_node *dp)
static void __init add_node_ranges(void)
{
struct memblock_region *reg;
phys_addr_t start, end;
unsigned long prev_max;
u64 i;
memblock_resized:
prev_max = memblock.memory.max;
for_each_memblock(memory, reg) {
unsigned long size = reg->size;
unsigned long start, end;
start = reg->base;
end = start + size;
for_each_mem_range(i, &start, &end) {
while (start < end) {
unsigned long this_end;
int nid;
@@ -1211,7 +1207,7 @@ memblock_resized:
this_end = memblock_nid_range(start, end, &nid);
numadbg("Setting memblock NUMA node nid[%d] "
"start[%lx] end[%lx]\n",
"start[%llx] end[%lx]\n",
nid, start, this_end);
memblock_set_node(start, this_end - start,

View File

@@ -173,22 +173,6 @@ config PGTABLE_LEVELS
default 3 if 3_LEVEL_PGTABLES
default 2
config SECCOMP
def_bool y
prompt "Enable seccomp to safely compute untrusted bytecode"
help
This kernel feature is useful for number crunching applications
that may need to compute untrusted bytecode during their
execution. By using pipes or other transports made available to
the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in
their own address space using seccomp. Once seccomp is
enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
and the task is only allowed to execute a few safe syscalls
defined by each seccomp mode.
If unsure, say Y.
config UML_TIME_TRAVEL_SUPPORT
bool
prompt "Support time-travel mode (e.g. for test execution)"

View File

@@ -1970,22 +1970,6 @@ config EFI_MIXED
If unsure, say N.
config SECCOMP
def_bool y
prompt "Enable seccomp to safely compute untrusted bytecode"
help
This kernel feature is useful for number crunching applications
that may need to compute untrusted bytecode during their
execution. By using pipes or other transports made available to
the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in
their own address space using seccomp. Once seccomp is
enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
and the task is only allowed to execute a few safe syscalls
defined by each seccomp mode.
If unsure, say Y. Only embedded should say N here.
source "kernel/Kconfig.hz"
config KEXEC

View File

@@ -5,15 +5,6 @@
#include "pgtable.h"
#include "../string.h"
/*
* __force_order is used by special_insns.h asm code to force instruction
* serialization.
*
* It is not referenced from the code, but GCC < 5 with -fPIE would fail
* due to an undefined symbol. Define it to make these ancient GCCs work.
*/
unsigned long __force_order;
#define BIOS_START_MIN 0x20000U /* 128K, less than this is insane */
#define BIOS_START_MAX 0x9f000U /* 640K, absolute maximum */

View File

@@ -3,6 +3,7 @@
#define _ASM_X86_NUMA_H
#include <linux/nodemask.h>
#include <linux/errno.h>
#include <asm/topology.h>
#include <asm/apicdef.h>
@@ -77,7 +78,12 @@ void debug_cpumask_set_cpu(int cpu, int node, bool enable);
#ifdef CONFIG_NUMA_EMU
#define FAKE_NODE_MIN_SIZE ((u64)32 << 20)
#define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL))
void numa_emu_cmdline(char *);
int numa_emu_cmdline(char *str);
#else /* CONFIG_NUMA_EMU */
static inline int numa_emu_cmdline(char *str)
{
return -EINVAL;
}
#endif /* CONFIG_NUMA_EMU */
#endif /* _ASM_X86_NUMA_H */

View File

@@ -11,45 +11,47 @@
#include <linux/jump_label.h>
/*
* Volatile isn't enough to prevent the compiler from reordering the
* read/write functions for the control registers and messing everything up.
* A memory clobber would solve the problem, but would prevent reordering of
* all loads stores around it, which can hurt performance. Solution is to
* use a variable and mimic reads and writes to it to enforce serialization
* The compiler should not reorder volatile asm statements with respect to each
* other: they should execute in program order. However GCC 4.9.x and 5.x have
* a bug (which was fixed in 8.1, 7.3 and 6.5) where they might reorder
* volatile asm. The write functions are not affected since they have memory
* clobbers preventing reordering. To prevent reads from being reordered with
* respect to writes, use a dummy memory operand.
*/
extern unsigned long __force_order;
#define __FORCE_ORDER "m"(*(unsigned int *)0x1000UL)
void native_write_cr0(unsigned long val);
static inline unsigned long native_read_cr0(void)
{
unsigned long val;
asm volatile("mov %%cr0,%0\n\t" : "=r" (val), "=m" (__force_order));
asm volatile("mov %%cr0,%0\n\t" : "=r" (val) : __FORCE_ORDER);
return val;
}
static __always_inline unsigned long native_read_cr2(void)
{
unsigned long val;
asm volatile("mov %%cr2,%0\n\t" : "=r" (val), "=m" (__force_order));
asm volatile("mov %%cr2,%0\n\t" : "=r" (val) : __FORCE_ORDER);
return val;
}
static __always_inline void native_write_cr2(unsigned long val)
{
asm volatile("mov %0,%%cr2": : "r" (val), "m" (__force_order));
asm volatile("mov %0,%%cr2": : "r" (val) : "memory");
}
static inline unsigned long __native_read_cr3(void)
{
unsigned long val;
asm volatile("mov %%cr3,%0\n\t" : "=r" (val), "=m" (__force_order));
asm volatile("mov %%cr3,%0\n\t" : "=r" (val) : __FORCE_ORDER);
return val;
}
static inline void native_write_cr3(unsigned long val)
{
asm volatile("mov %0,%%cr3": : "r" (val), "m" (__force_order));
asm volatile("mov %0,%%cr3": : "r" (val) : "memory");
}
static inline unsigned long native_read_cr4(void)
@@ -64,10 +66,10 @@ static inline unsigned long native_read_cr4(void)
asm volatile("1: mov %%cr4, %0\n"
"2:\n"
_ASM_EXTABLE(1b, 2b)
: "=r" (val), "=m" (__force_order) : "0" (0));
: "=r" (val) : "0" (0), __FORCE_ORDER);
#else
/* CR4 always exists on x86_64. */
asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order));
asm volatile("mov %%cr4,%0\n\t" : "=r" (val) : __FORCE_ORDER);
#endif
return val;
}

View File

@@ -418,7 +418,7 @@ do { \
"2:\n" \
".section .fixup,\"ax\"\n" \
"3: mov %[efault],%[errout]\n" \
" xor"itype" %[output],%[output]\n" \
" xorl %k[output],%k[output]\n" \
" jmp 2b\n" \
".previous\n" \
_ASM_EXTABLE_UA(1b, 3b) \

View File

@@ -360,7 +360,7 @@ void native_write_cr0(unsigned long val)
unsigned long bits_missing = 0;
set_register:
asm volatile("mov %0,%%cr0": "+r" (val), "+m" (__force_order));
asm volatile("mov %0,%%cr0": "+r" (val) : : "memory");
if (static_branch_likely(&cr_pinning)) {
if (unlikely((val & X86_CR0_WP) != X86_CR0_WP)) {
@@ -379,7 +379,7 @@ void native_write_cr4(unsigned long val)
unsigned long bits_changed = 0;
set_register:
asm volatile("mov %0,%%cr4": "+r" (val), "+m" (cr4_pinned_bits));
asm volatile("mov %0,%%cr4": "+r" (val) : : "memory");
if (static_branch_likely(&cr_pinning)) {
if (unlikely((val & cr4_pinned_mask) != cr4_pinned_bits)) {

View File

@@ -305,6 +305,20 @@ static int __init cpcompare(const void *a, const void *b)
return (ap->addr != ap->entry->addr) - (bp->addr != bp->entry->addr);
}
static bool e820_nomerge(enum e820_type type)
{
/*
* These types may indicate distinct platform ranges aligned to
* numa node, protection domain, performance domain, or other
* boundaries. Do not merge them.
*/
if (type == E820_TYPE_PRAM)
return true;
if (type == E820_TYPE_SOFT_RESERVED)
return true;
return false;
}
int __init e820__update_table(struct e820_table *table)
{
struct e820_entry *entries = table->entries;
@@ -380,7 +394,7 @@ int __init e820__update_table(struct e820_table *table)
}
/* Continue building up new map based on this information: */
if (current_type != last_type || current_type == E820_TYPE_PRAM) {
if (current_type != last_type || e820_nomerge(current_type)) {
if (last_type != 0) {
new_entries[new_nr_entries].size = change_point[chg_idx]->addr - last_addr;
/* Move forward only if the new size was non-zero: */

View File

@@ -264,16 +264,12 @@ static void __init relocate_initrd(void)
u64 area_size = PAGE_ALIGN(ramdisk_size);
/* We need to move the initrd down into directly mapped mem */
relocated_ramdisk = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
area_size, PAGE_SIZE);
relocated_ramdisk = memblock_phys_alloc_range(area_size, PAGE_SIZE, 0,
PFN_PHYS(max_pfn_mapped));
if (!relocated_ramdisk)
panic("Cannot find place for new RAMDISK of size %lld\n",
ramdisk_size);
/* Note: this includes all the mem currently occupied by
the initrd, we rely on that fact to keep the data intact. */
memblock_reserve(relocated_ramdisk, area_size);
initrd_start = relocated_ramdisk + PAGE_OFFSET;
initrd_end = initrd_start + ramdisk_size;
printk(KERN_INFO "Allocated new RAMDISK: [mem %#010llx-%#010llx]\n",
@@ -300,13 +296,13 @@ static void __init early_reserve_initrd(void)
memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image);
}
static void __init reserve_initrd(void)
{
/* Assume only end is not page aligned */
u64 ramdisk_image = get_ramdisk_image();
u64 ramdisk_size = get_ramdisk_size();
u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
u64 mapped_size;
if (!boot_params.hdr.type_of_loader ||
!ramdisk_image || !ramdisk_size)
@@ -314,12 +310,6 @@ static void __init reserve_initrd(void)
initrd_start = 0;
mapped_size = memblock_mem_size(max_pfn_mapped);
if (ramdisk_size >= (mapped_size>>1))
panic("initrd too large to handle, "
"disabling initrd (%lld needed, %lld available)\n",
ramdisk_size, mapped_size>>1);
printk(KERN_INFO "RAMDISK: [mem %#010llx-%#010llx]\n", ramdisk_image,
ramdisk_end - 1);
@@ -431,13 +421,13 @@ static int __init reserve_crashkernel_low(void)
{
#ifdef CONFIG_X86_64
unsigned long long base, low_base = 0, low_size = 0;
unsigned long total_low_mem;
unsigned long low_mem_limit;
int ret;
total_low_mem = memblock_mem_size(1UL << (32 - PAGE_SHIFT));
low_mem_limit = min(memblock_phys_mem_size(), CRASH_ADDR_LOW_MAX);
/* crashkernel=Y,low */
ret = parse_crashkernel_low(boot_command_line, total_low_mem, &low_size, &base);
ret = parse_crashkernel_low(boot_command_line, low_mem_limit, &low_size, &base);
if (ret) {
/*
* two parts from kernel/dma/swiotlb.c:
@@ -455,23 +445,17 @@ static int __init reserve_crashkernel_low(void)
return 0;
}
low_base = memblock_find_in_range(0, 1ULL << 32, low_size, CRASH_ALIGN);
low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, CRASH_ADDR_LOW_MAX);
if (!low_base) {
pr_err("Cannot reserve %ldMB crashkernel low memory, please try smaller size.\n",
(unsigned long)(low_size >> 20));
return -ENOMEM;
}
ret = memblock_reserve(low_base, low_size);
if (ret) {
pr_err("%s: Error reserving crashkernel low memblock.\n", __func__);
return ret;
}
pr_info("Reserving %ldMB of low memory at %ldMB for crashkernel (System low RAM: %ldMB)\n",
pr_info("Reserving %ldMB of low memory at %ldMB for crashkernel (low RAM limit: %ldMB)\n",
(unsigned long)(low_size >> 20),
(unsigned long)(low_base >> 20),
(unsigned long)(total_low_mem >> 20));
(unsigned long)(low_mem_limit >> 20));
crashk_low_res.start = low_base;
crashk_low_res.end = low_base + low_size - 1;
@@ -515,13 +499,13 @@ static void __init reserve_crashkernel(void)
* unless "crashkernel=size[KMG],high" is specified.
*/
if (!high)
crash_base = memblock_find_in_range(CRASH_ALIGN,
CRASH_ADDR_LOW_MAX,
crash_size, CRASH_ALIGN);
crash_base = memblock_phys_alloc_range(crash_size,
CRASH_ALIGN, CRASH_ALIGN,
CRASH_ADDR_LOW_MAX);
if (!crash_base)
crash_base = memblock_find_in_range(CRASH_ALIGN,
CRASH_ADDR_HIGH_MAX,
crash_size, CRASH_ALIGN);
crash_base = memblock_phys_alloc_range(crash_size,
CRASH_ALIGN, CRASH_ALIGN,
CRASH_ADDR_HIGH_MAX);
if (!crash_base) {
pr_info("crashkernel reservation failed - No suitable area found.\n");
return;
@@ -529,19 +513,13 @@ static void __init reserve_crashkernel(void)
} else {
unsigned long long start;
start = memblock_find_in_range(crash_base,
crash_base + crash_size,
crash_size, 1 << 20);
start = memblock_phys_alloc_range(crash_size, SZ_1M, crash_base,
crash_base + crash_size);
if (start != crash_base) {
pr_info("crashkernel reservation failed - memory is in use.\n");
return;
}
}
ret = memblock_reserve(crash_base, crash_size);
if (ret) {
pr_err("%s: Error reserving crashkernel memblock.\n", __func__);
return;
}
if (crash_base >= (1ULL << 32) && reserve_crashkernel_low()) {
memblock_free(crash_base, crash_size);

View File

@@ -37,14 +37,12 @@ static __init int numa_setup(char *opt)
return -EINVAL;
if (!strncmp(opt, "off", 3))
numa_off = 1;
#ifdef CONFIG_NUMA_EMU
if (!strncmp(opt, "fake=", 5))
numa_emu_cmdline(opt + 5);
#endif
#ifdef CONFIG_ACPI_NUMA
return numa_emu_cmdline(opt + 5);
if (!strncmp(opt, "noacpi", 6))
acpi_numa = -1;
#endif
disable_srat();
if (!strncmp(opt, "nohmat", 6))
disable_hmat();
return 0;
}
early_param("numa", numa_setup);
@@ -516,7 +514,7 @@ static void __init numa_clear_kernel_node_hotplug(void)
* memory ranges, because quirks such as trim_snb_memory()
* reserve specific pages for Sandy Bridge graphics. ]
*/
for_each_memblock(reserved, mb_region) {
for_each_reserved_mem_region(mb_region) {
int nid = memblock_get_region_node(mb_region);
if (nid != MAX_NUMNODES)
@@ -919,7 +917,6 @@ int phys_to_target_node(phys_addr_t start)
return meminfo_to_nid(&numa_reserved_meminfo, start);
}
EXPORT_SYMBOL_GPL(phys_to_target_node);
int memory_add_physaddr_to_nid(u64 start)
{

View File

@@ -13,9 +13,10 @@
static int emu_nid_to_phys[MAX_NUMNODES];
static char *emu_cmdline __initdata;
void __init numa_emu_cmdline(char *str)
int __init numa_emu_cmdline(char *str)
{
emu_cmdline = str;
return 0;
}
static int __init emu_find_memblk_by_nid(int nid, const struct numa_meminfo *mi)

View File

@@ -1300,7 +1300,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
* any NUMA information the kernel tries to get from ACPI will
* be meaningless. Prevent it from trying.
*/
acpi_numa = -1;
disable_srat();
#endif
WARN_ON(xen_cpuhp_setup(xen_cpu_up_prepare_pv, xen_cpu_dead_pv));

View File

@@ -217,20 +217,6 @@ config HOTPLUG_CPU
Say N if you want to disable CPU hotplug.
config SECCOMP
bool
prompt "Enable seccomp to safely compute untrusted bytecode"
help
This kernel feature is useful for number crunching applications
that may need to compute untrusted bytecode during their
execution. By using pipes or other transports made available to
the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in
their own address space using seccomp. Once seccomp is
enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
and the task is only allowed to execute a few safe syscalls
defined by each seccomp mode.
config FAST_SYSCALL_XTENSA
bool "Enable fast atomic syscalls"
default n

View File

@@ -79,67 +79,32 @@ void __init zones_init(void)
free_area_init(max_zone_pfn);
}
#ifdef CONFIG_HIGHMEM
static void __init free_area_high(unsigned long pfn, unsigned long end)
{
for (; pfn < end; pfn++)
free_highmem_page(pfn_to_page(pfn));
}
static void __init free_highpages(void)
{
#ifdef CONFIG_HIGHMEM
unsigned long max_low = max_low_pfn;
struct memblock_region *mem, *res;
phys_addr_t range_start, range_end;
u64 i;
reset_all_zones_managed_pages();
/* set highmem page free */
for_each_memblock(memory, mem) {
unsigned long start = memblock_region_memory_base_pfn(mem);
unsigned long end = memblock_region_memory_end_pfn(mem);
for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE,
&range_start, &range_end, NULL) {
unsigned long start = PHYS_PFN(range_start);
unsigned long end = PHYS_PFN(range_end);
/* Ignore complete lowmem entries */
if (end <= max_low)
continue;
if (memblock_is_nomap(mem))
continue;
/* Truncate partial highmem entries */
if (start < max_low)
start = max_low;
/* Find and exclude any reserved regions */
for_each_memblock(reserved, res) {
unsigned long res_start, res_end;
res_start = memblock_region_reserved_base_pfn(res);
res_end = memblock_region_reserved_end_pfn(res);
if (res_end < start)
continue;
if (res_start < start)
res_start = start;
if (res_start > end)
res_start = end;
if (res_end > end)
res_end = end;
if (res_start != start)
free_area_high(start, res_start);
start = res_end;
if (start == end)
break;
for (; start < end; start++)
free_highmem_page(pfn_to_page(start));
}
/* And now free anything which remains */
if (start < end)
free_area_high(start, end);
}
}
#else
static void __init free_highpages(void)
{
}
#endif
}
/*
* Initialize memory pages.

View File

@@ -24,8 +24,15 @@
#include <linux/mutex.h>
#include <linux/node.h>
#include <linux/sysfs.h>
#include <linux/dax.h>
static u8 hmat_revision;
static int hmat_disable __initdata;
void __init disable_hmat(void)
{
hmat_disable = 1;
}
static LIST_HEAD(targets);
static LIST_HEAD(initiators);
@@ -634,66 +641,6 @@ static void hmat_register_target_perf(struct memory_target *target)
node_set_perf_attrs(mem_nid, &target->hmem_attrs, 0);
}
static void hmat_register_target_device(struct memory_target *target,
struct resource *r)
{
/* define a clean / non-busy resource for the platform device */
struct resource res = {
.start = r->start,
.end = r->end,
.flags = IORESOURCE_MEM,
};
struct platform_device *pdev;
struct memregion_info info;
int rc, id;
rc = region_intersects(res.start, resource_size(&res), IORESOURCE_MEM,
IORES_DESC_SOFT_RESERVED);
if (rc != REGION_INTERSECTS)
return;
id = memregion_alloc(GFP_KERNEL);
if (id < 0) {
pr_err("memregion allocation failure for %pr\n", &res);
return;
}
pdev = platform_device_alloc("hmem", id);
if (!pdev) {
pr_err("hmem device allocation failure for %pr\n", &res);
goto out_pdev;
}
pdev->dev.numa_node = acpi_map_pxm_to_online_node(target->memory_pxm);
info = (struct memregion_info) {
.target_node = acpi_map_pxm_to_node(target->memory_pxm),
};
rc = platform_device_add_data(pdev, &info, sizeof(info));
if (rc < 0) {
pr_err("hmem memregion_info allocation failure for %pr\n", &res);
goto out_pdev;
}
rc = platform_device_add_resources(pdev, &res, 1);
if (rc < 0) {
pr_err("hmem resource allocation failure for %pr\n", &res);
goto out_resource;
}
rc = platform_device_add(pdev);
if (rc < 0) {
dev_err(&pdev->dev, "device add failed for %pr\n", &res);
goto out_resource;
}
return;
out_resource:
put_device(&pdev->dev);
out_pdev:
memregion_free(id);
}
static void hmat_register_target_devices(struct memory_target *target)
{
struct resource *res;
@@ -705,8 +652,11 @@ static void hmat_register_target_devices(struct memory_target *target)
if (!IS_ENABLED(CONFIG_DEV_DAX_HMEM))
return;
for (res = target->memregions.child; res; res = res->sibling)
hmat_register_target_device(target, res);
for (res = target->memregions.child; res; res = res->sibling) {
int target_nid = acpi_map_pxm_to_node(target->memory_pxm);
hmem_register_device(target_nid, res);
}
}
static void hmat_register_target(struct memory_target *target)
@@ -814,7 +764,7 @@ static __init int hmat_init(void)
enum acpi_hmat_type i;
acpi_status status;
if (srat_disabled())
if (srat_disabled() || hmat_disable)
return 0;
status = acpi_get_table(ACPI_SIG_SRAT, 0, &tbl);

View File

@@ -27,7 +27,12 @@ static int node_to_pxm_map[MAX_NUMNODES]
= { [0 ... MAX_NUMNODES - 1] = PXM_INVAL };
unsigned char acpi_srat_revision __initdata;
int acpi_numa __initdata;
static int acpi_numa __initdata;
void __init disable_srat(void)
{
acpi_numa = -1;
}
int pxm_to_node(int pxm)
{
@@ -163,7 +168,7 @@ static int __init slit_valid(struct acpi_table_slit *slit)
void __init bad_srat(void)
{
pr_err("SRAT: SRAT not used.\n");
acpi_numa = -1;
disable_srat();
}
int __init srat_disabled(void)

View File

@@ -3324,7 +3324,7 @@ struct device *device_find_child_by_name(struct device *parent,
klist_iter_init(&parent->p->klist_children, &i);
while ((child = next_device(&i)))
if (!strcmp(dev_name(child), name) && get_device(child))
if (sysfs_streq(dev_name(child), name) && get_device(child))
break;
klist_iter_exit(&i);
return child;
@@ -4061,22 +4061,21 @@ void device_shutdown(void)
*/
#ifdef CONFIG_PRINTK
static int
create_syslog_header(const struct device *dev, char *hdr, size_t hdrlen)
static void
set_dev_info(const struct device *dev, struct dev_printk_info *dev_info)
{
const char *subsys;
size_t pos = 0;
memset(dev_info, 0, sizeof(*dev_info));
if (dev->class)
subsys = dev->class->name;
else if (dev->bus)
subsys = dev->bus->name;
else
return 0;
return;
pos += snprintf(hdr + pos, hdrlen - pos, "SUBSYSTEM=%s", subsys);
if (pos >= hdrlen)
goto overflow;
strscpy(dev_info->subsystem, subsys, sizeof(dev_info->subsystem));
/*
* Add device identifier DEVICE=:
@@ -4092,41 +4091,28 @@ create_syslog_header(const struct device *dev, char *hdr, size_t hdrlen)
c = 'b';
else
c = 'c';
pos++;
pos += snprintf(hdr + pos, hdrlen - pos,
"DEVICE=%c%u:%u",
c, MAJOR(dev->devt), MINOR(dev->devt));
snprintf(dev_info->device, sizeof(dev_info->device),
"%c%u:%u", c, MAJOR(dev->devt), MINOR(dev->devt));
} else if (strcmp(subsys, "net") == 0) {
struct net_device *net = to_net_dev(dev);
pos++;
pos += snprintf(hdr + pos, hdrlen - pos,
"DEVICE=n%u", net->ifindex);
snprintf(dev_info->device, sizeof(dev_info->device),
"n%u", net->ifindex);
} else {
pos++;
pos += snprintf(hdr + pos, hdrlen - pos,
"DEVICE=+%s:%s", subsys, dev_name(dev));
snprintf(dev_info->device, sizeof(dev_info->device),
"+%s:%s", subsys, dev_name(dev));
}
if (pos >= hdrlen)
goto overflow;
return pos;
overflow:
dev_WARN(dev, "device/subsystem name too long");
return 0;
}
int dev_vprintk_emit(int level, const struct device *dev,
const char *fmt, va_list args)
{
char hdr[128];
size_t hdrlen;
struct dev_printk_info dev_info;
hdrlen = create_syslog_header(dev, hdr, sizeof(hdr));
set_dev_info(dev, &dev_info);
return vprintk_emit(0, level, hdrlen ? hdr : NULL, hdrlen, fmt, args);
return vprintk_emit(0, level, &dev_info, fmt, args);
}
EXPORT_SYMBOL(dev_vprintk_emit);

View File

@@ -610,23 +610,23 @@ static unsigned int armada_xp_mbus_win_remap_offset(int win)
static void __init
mvebu_mbus_find_bridge_hole(uint64_t *start, uint64_t *end)
{
struct memblock_region *r;
uint64_t s = 0;
phys_addr_t reg_start, reg_end;
uint64_t i, s = 0;
for_each_memblock(memory, r) {
for_each_mem_range(i, &reg_start, &reg_end) {
/*
* This part of the memory is above 4 GB, so we don't
* care for the MBus bridge hole.
*/
if (r->base >= 0x100000000ULL)
if (reg_start >= 0x100000000ULL)
continue;
/*
* The MBus bridge hole is at the end of the RAM under
* the 4 GB limit.
*/
if (r->base + r->size > s)
s = r->base + r->size;
if (reg_end > s)
s = reg_end;
}
*start = s;

View File

@@ -35,6 +35,7 @@ config DEV_DAX_PMEM
config DEV_DAX_HMEM
tristate "HMEM DAX: direct access to 'specific purpose' memory"
depends on EFI_SOFT_RESERVE
select NUMA_KEEP_MEMINFO if (NUMA && X86)
default DEV_DAX
help
EFI 2.8 platforms, and others, may advertise 'specific purpose'
@@ -48,6 +49,11 @@ config DEV_DAX_HMEM
Say M if unsure.
config DEV_DAX_HMEM_DEVICES
depends on NUMA_KEEP_MEMINFO # for phys_to_target_node()
depends on DEV_DAX_HMEM && DAX=y
def_bool y
config DEV_DAX_KMEM
tristate "KMEM DAX: volatile-use of persistent memory"
default DEV_DAX

View File

@@ -2,11 +2,10 @@
obj-$(CONFIG_DAX) += dax.o
obj-$(CONFIG_DEV_DAX) += device_dax.o
obj-$(CONFIG_DEV_DAX_KMEM) += kmem.o
obj-$(CONFIG_DEV_DAX_HMEM) += dax_hmem.o
dax-y := super.o
dax-y += bus.o
device_dax-y := device.o
dax_hmem-y := hmem.o
obj-y += pmem/
obj-y += hmem/

File diff suppressed because it is too large Load Diff

View File

@@ -3,29 +3,33 @@
#ifndef __DAX_BUS_H__
#define __DAX_BUS_H__
#include <linux/device.h>
#include <linux/range.h>
struct dev_dax;
struct resource;
struct dax_device;
struct dax_region;
void dax_region_put(struct dax_region *dax_region);
#define IORESOURCE_DAX_STATIC (1UL << 0)
struct dax_region *alloc_dax_region(struct device *parent, int region_id,
struct resource *res, int target_node, unsigned int align,
unsigned long long flags);
struct range *range, int target_node, unsigned int align,
unsigned long flags);
enum dev_dax_subsys {
DEV_DAX_BUS,
DEV_DAX_BUS = 0, /* zeroed dev_dax_data picks this by default */
DEV_DAX_CLASS,
};
struct dev_dax *__devm_create_dev_dax(struct dax_region *dax_region, int id,
struct dev_pagemap *pgmap, enum dev_dax_subsys subsys);
struct dev_dax_data {
struct dax_region *dax_region;
struct dev_pagemap *pgmap;
enum dev_dax_subsys subsys;
resource_size_t size;
int id;
};
static inline struct dev_dax *devm_create_dev_dax(struct dax_region *dax_region,
int id, struct dev_pagemap *pgmap)
{
return __devm_create_dev_dax(dax_region, id, pgmap, DEV_DAX_BUS);
}
struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data);
/* to be deleted when DEV_DAX_CLASS is removed */
struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys);
@@ -34,6 +38,8 @@ struct dax_device_driver {
struct device_driver drv;
struct list_head ids;
int match_always;
int (*probe)(struct dev_dax *dev);
int (*remove)(struct dev_dax *dev);
};
int __dax_driver_register(struct dax_device_driver *dax_drv,
@@ -44,7 +50,7 @@ void dax_driver_unregister(struct dax_device_driver *dax_drv);
void kill_dev_dax(struct dev_dax *dev_dax);
#if IS_ENABLED(CONFIG_DEV_DAX_PMEM_COMPAT)
int dev_dax_probe(struct device *dev);
int dev_dax_probe(struct dev_dax *dev_dax);
#endif
/*

View File

@@ -7,6 +7,7 @@
#include <linux/device.h>
#include <linux/cdev.h>
#include <linux/idr.h>
/* private routines between core files */
struct dax_device;
@@ -22,8 +23,10 @@ void dax_bus_exit(void);
* @kref: to pin while other agents have a need to do lookups
* @dev: parent device backing this region
* @align: allocation and mapping alignment for child dax devices
* @res: physical address range of the region
* @pfn_flags: identify whether the pfns are paged back or not
* @ida: instance id allocator
* @res: resource tree to track instance allocations
* @seed: allow userspace to find the first unbound seed device
* @youngest: allow userspace to find the most recently created device
*/
struct dax_region {
int id;
@@ -31,8 +34,16 @@ struct dax_region {
struct kref kref;
struct device *dev;
unsigned int align;
struct ida ida;
struct resource res;
unsigned long long pfn_flags;
struct device *seed;
struct device *youngest;
};
struct dax_mapping {
struct device dev;
int range_id;
int id;
};
/**
@@ -41,22 +52,57 @@ struct dax_region {
* @region - parent region
* @dax_dev - core dax functionality
* @target_node: effective numa node if dev_dax memory range is onlined
* @id: ida allocated id
* @ida: mapping id allocator
* @dev - device core
* @pgmap - pgmap for memmap setup / lifetime (driver owned)
* @dax_mem_res: physical address range of hotadded DAX memory
* @dax_mem_name: name for hotadded DAX memory via add_memory_driver_managed()
* @nr_range: size of @ranges
* @ranges: resource-span + pgoff tuples for the instance
*/
struct dev_dax {
struct dax_region *region;
struct dax_device *dax_dev;
unsigned int align;
int target_node;
int id;
struct ida ida;
struct device dev;
struct dev_pagemap pgmap;
struct resource *dax_kmem_res;
struct dev_pagemap *pgmap;
int nr_range;
struct dev_dax_range {
unsigned long pgoff;
struct range range;
struct dax_mapping *mapping;
} *ranges;
};
static inline struct dev_dax *to_dev_dax(struct device *dev)
{
return container_of(dev, struct dev_dax, dev);
}
static inline struct dax_mapping *to_dax_mapping(struct device *dev)
{
return container_of(dev, struct dax_mapping, dev);
}
phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff, unsigned long size);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static inline bool dax_align_valid(unsigned long align)
{
if (align == PUD_SIZE && IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD))
return true;
if (align == PMD_SIZE && has_transparent_hugepage())
return true;
if (align == PAGE_SIZE)
return true;
return false;
}
#else
static inline bool dax_align_valid(unsigned long align)
{
return align == PAGE_SIZE;
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif

View File

@@ -17,7 +17,6 @@
static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma,
const char *func)
{
struct dax_region *dax_region = dev_dax->region;
struct device *dev = &dev_dax->dev;
unsigned long mask;
@@ -32,7 +31,7 @@ static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma,
return -EINVAL;
}
mask = dax_region->align - 1;
mask = dev_dax->align - 1;
if (vma->vm_start & mask || vma->vm_end & mask) {
dev_info_ratelimited(dev,
"%s: %s: fail, unaligned vma (%#lx - %#lx, %#lx)\n",
@@ -41,14 +40,6 @@ static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma,
return -EINVAL;
}
if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) == PFN_DEV
&& (vma->vm_flags & VM_DONTCOPY) == 0) {
dev_info_ratelimited(dev,
"%s: %s: fail, dax range requires MADV_DONTFORK\n",
current->comm, func);
return -EINVAL;
}
if (!vma_is_dax(vma)) {
dev_info_ratelimited(dev,
"%s: %s: fail, vma is not DAX capable\n",
@@ -63,15 +54,22 @@ static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma,
__weak phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff,
unsigned long size)
{
struct resource *res = &dev_dax->region->res;
int i;
for (i = 0; i < dev_dax->nr_range; i++) {
struct dev_dax_range *dax_range = &dev_dax->ranges[i];
struct range *range = &dax_range->range;
unsigned long long pgoff_end;
phys_addr_t phys;
phys = pgoff * PAGE_SIZE + res->start;
if (phys >= res->start && phys <= res->end) {
if (phys + size - 1 <= res->end)
pgoff_end = dax_range->pgoff + PHYS_PFN(range_len(range)) - 1;
if (pgoff < dax_range->pgoff || pgoff > pgoff_end)
continue;
phys = PFN_PHYS(pgoff - dax_range->pgoff) + range->start;
if (phys + size - 1 <= range->end)
return phys;
break;
}
return -1;
}
@@ -79,21 +77,19 @@ static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax,
struct vm_fault *vmf, pfn_t *pfn)
{
struct device *dev = &dev_dax->dev;
struct dax_region *dax_region;
phys_addr_t phys;
unsigned int fault_size = PAGE_SIZE;
if (check_vma(dev_dax, vmf->vma, __func__))
return VM_FAULT_SIGBUS;
dax_region = dev_dax->region;
if (dax_region->align > PAGE_SIZE) {
if (dev_dax->align > PAGE_SIZE) {
dev_dbg(dev, "alignment (%#x) > fault size (%#x)\n",
dax_region->align, fault_size);
dev_dax->align, fault_size);
return VM_FAULT_SIGBUS;
}
if (fault_size != dax_region->align)
if (fault_size != dev_dax->align)
return VM_FAULT_SIGBUS;
phys = dax_pgoff_to_phys(dev_dax, vmf->pgoff, PAGE_SIZE);
@@ -102,7 +98,7 @@ static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax,
return VM_FAULT_SIGBUS;
}
*pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
*pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
return vmf_insert_mixed(vmf->vma, vmf->address, *pfn);
}
@@ -112,7 +108,6 @@ static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax,
{
unsigned long pmd_addr = vmf->address & PMD_MASK;
struct device *dev = &dev_dax->dev;
struct dax_region *dax_region;
phys_addr_t phys;
pgoff_t pgoff;
unsigned int fault_size = PMD_SIZE;
@@ -120,22 +115,15 @@ static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax,
if (check_vma(dev_dax, vmf->vma, __func__))
return VM_FAULT_SIGBUS;
dax_region = dev_dax->region;
if (dax_region->align > PMD_SIZE) {
if (dev_dax->align > PMD_SIZE) {
dev_dbg(dev, "alignment (%#x) > fault size (%#x)\n",
dax_region->align, fault_size);
dev_dax->align, fault_size);
return VM_FAULT_SIGBUS;
}
/* dax pmd mappings require pfn_t_devmap() */
if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) != (PFN_DEV|PFN_MAP)) {
dev_dbg(dev, "region lacks devmap flags\n");
if (fault_size < dev_dax->align)
return VM_FAULT_SIGBUS;
}
if (fault_size < dax_region->align)
return VM_FAULT_SIGBUS;
else if (fault_size > dax_region->align)
else if (fault_size > dev_dax->align)
return VM_FAULT_FALLBACK;
/* if we are outside of the VMA */
@@ -150,7 +138,7 @@ static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax,
return VM_FAULT_SIGBUS;
}
*pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
*pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
return vmf_insert_pfn_pmd(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE);
}
@@ -161,7 +149,6 @@ static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
{
unsigned long pud_addr = vmf->address & PUD_MASK;
struct device *dev = &dev_dax->dev;
struct dax_region *dax_region;
phys_addr_t phys;
pgoff_t pgoff;
unsigned int fault_size = PUD_SIZE;
@@ -170,22 +157,15 @@ static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
if (check_vma(dev_dax, vmf->vma, __func__))
return VM_FAULT_SIGBUS;
dax_region = dev_dax->region;
if (dax_region->align > PUD_SIZE) {
if (dev_dax->align > PUD_SIZE) {
dev_dbg(dev, "alignment (%#x) > fault size (%#x)\n",
dax_region->align, fault_size);
dev_dax->align, fault_size);
return VM_FAULT_SIGBUS;
}
/* dax pud mappings require pfn_t_devmap() */
if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) != (PFN_DEV|PFN_MAP)) {
dev_dbg(dev, "region lacks devmap flags\n");
if (fault_size < dev_dax->align)
return VM_FAULT_SIGBUS;
}
if (fault_size < dax_region->align)
return VM_FAULT_SIGBUS;
else if (fault_size > dax_region->align)
else if (fault_size > dev_dax->align)
return VM_FAULT_FALLBACK;
/* if we are outside of the VMA */
@@ -200,7 +180,7 @@ static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
return VM_FAULT_SIGBUS;
}
*pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
*pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
return vmf_insert_pfn_pud(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE);
}
@@ -280,9 +260,8 @@ static int dev_dax_split(struct vm_area_struct *vma, unsigned long addr)
{
struct file *filp = vma->vm_file;
struct dev_dax *dev_dax = filp->private_data;
struct dax_region *dax_region = dev_dax->region;
if (!IS_ALIGNED(addr, dax_region->align))
if (!IS_ALIGNED(addr, dev_dax->align))
return -EINVAL;
return 0;
}
@@ -291,9 +270,8 @@ static unsigned long dev_dax_pagesize(struct vm_area_struct *vma)
{
struct file *filp = vma->vm_file;
struct dev_dax *dev_dax = filp->private_data;
struct dax_region *dax_region = dev_dax->region;
return dax_region->align;
return dev_dax->align;
}
static const struct vm_operations_struct dax_vm_ops = {
@@ -332,13 +310,11 @@ static unsigned long dax_get_unmapped_area(struct file *filp,
{
unsigned long off, off_end, off_align, len_align, addr_align, align;
struct dev_dax *dev_dax = filp ? filp->private_data : NULL;
struct dax_region *dax_region;
if (!dev_dax || addr)
goto out;
dax_region = dev_dax->region;
align = dax_region->align;
align = dev_dax->align;
off = pgoff << PAGE_SHIFT;
off_end = off + len;
off_align = round_up(off, align);
@@ -412,25 +388,45 @@ static void dev_dax_kill(void *dev_dax)
kill_dev_dax(dev_dax);
}
int dev_dax_probe(struct device *dev)
int dev_dax_probe(struct dev_dax *dev_dax)
{
struct dev_dax *dev_dax = to_dev_dax(dev);
struct dax_device *dax_dev = dev_dax->dax_dev;
struct resource *res = &dev_dax->region->res;
struct device *dev = &dev_dax->dev;
struct dev_pagemap *pgmap;
struct inode *inode;
struct cdev *cdev;
void *addr;
int rc;
int rc, i;
/* 1:1 map region resource range to device-dax instance range */
if (!devm_request_mem_region(dev, res->start, resource_size(res),
dev_name(dev))) {
dev_warn(dev, "could not reserve region %pR\n", res);
return -EBUSY;
pgmap = dev_dax->pgmap;
if (dev_WARN_ONCE(dev, pgmap && dev_dax->nr_range > 1,
"static pgmap / multi-range device conflict\n"))
return -EINVAL;
if (!pgmap) {
pgmap = devm_kzalloc(dev, sizeof(*pgmap) + sizeof(struct range)
* (dev_dax->nr_range - 1), GFP_KERNEL);
if (!pgmap)
return -ENOMEM;
pgmap->nr_range = dev_dax->nr_range;
}
dev_dax->pgmap.type = MEMORY_DEVICE_GENERIC;
addr = devm_memremap_pages(dev, &dev_dax->pgmap);
for (i = 0; i < dev_dax->nr_range; i++) {
struct range *range = &dev_dax->ranges[i].range;
if (!devm_request_mem_region(dev, range->start,
range_len(range), dev_name(dev))) {
dev_warn(dev, "mapping%d: %#llx-%#llx could not reserve range\n",
i, range->start, range->end);
return -EBUSY;
}
/* don't update the range for static pgmap */
if (!dev_dax->pgmap)
pgmap->ranges[i] = *range;
}
pgmap->type = MEMORY_DEVICE_GENERIC;
addr = devm_memremap_pages(dev, pgmap);
if (IS_ERR(addr))
return PTR_ERR(addr);
@@ -456,17 +452,15 @@ int dev_dax_probe(struct device *dev)
}
EXPORT_SYMBOL_GPL(dev_dax_probe);
static int dev_dax_remove(struct device *dev)
static int dev_dax_remove(struct dev_dax *dev_dax)
{
/* all probe actions are unwound by devm */
return 0;
}
static struct dax_device_driver device_dax_driver = {
.drv = {
.probe = dev_dax_probe,
.remove = dev_dax_remove,
},
.match_always = 1,
};

View File

@@ -0,0 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_DEV_DAX_HMEM) += dax_hmem.o
obj-$(CONFIG_DEV_DAX_HMEM_DEVICES) += device_hmem.o
device_hmem-y := device.o
dax_hmem-y := hmem.o

100
drivers/dax/hmem/device.c Normal file
View File

@@ -0,0 +1,100 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/platform_device.h>
#include <linux/memregion.h>
#include <linux/module.h>
#include <linux/dax.h>
#include <linux/mm.h>
static bool nohmem;
module_param_named(disable, nohmem, bool, 0444);
void hmem_register_device(int target_nid, struct resource *r)
{
/* define a clean / non-busy resource for the platform device */
struct resource res = {
.start = r->start,
.end = r->end,
.flags = IORESOURCE_MEM,
};
struct platform_device *pdev;
struct memregion_info info;
int rc, id;
if (nohmem)
return;
rc = region_intersects(res.start, resource_size(&res), IORESOURCE_MEM,
IORES_DESC_SOFT_RESERVED);
if (rc != REGION_INTERSECTS)
return;
id = memregion_alloc(GFP_KERNEL);
if (id < 0) {
pr_err("memregion allocation failure for %pr\n", &res);
return;
}
pdev = platform_device_alloc("hmem", id);
if (!pdev) {
pr_err("hmem device allocation failure for %pr\n", &res);
goto out_pdev;
}
pdev->dev.numa_node = numa_map_to_online_node(target_nid);
info = (struct memregion_info) {
.target_node = target_nid,
};
rc = platform_device_add_data(pdev, &info, sizeof(info));
if (rc < 0) {
pr_err("hmem memregion_info allocation failure for %pr\n", &res);
goto out_pdev;
}
rc = platform_device_add_resources(pdev, &res, 1);
if (rc < 0) {
pr_err("hmem resource allocation failure for %pr\n", &res);
goto out_resource;
}
rc = platform_device_add(pdev);
if (rc < 0) {
dev_err(&pdev->dev, "device add failed for %pr\n", &res);
goto out_resource;
}
return;
out_resource:
put_device(&pdev->dev);
out_pdev:
memregion_free(id);
}
static __init int hmem_register_one(struct resource *res, void *data)
{
/*
* If the resource is not a top-level resource it was already
* assigned to a device by the HMAT parsing.
*/
if (res->parent != &iomem_resource) {
pr_info("HMEM: skip %pr, already claimed\n", res);
return 0;
}
hmem_register_device(phys_to_target_node(res->start), res);
return 0;
}
static __init int hmem_init(void)
{
walk_iomem_res_desc(IORES_DESC_SOFT_RESERVED,
IORESOURCE_MEM, 0, -1, NULL, hmem_register_one);
return 0;
}
/*
* As this is a fallback for address ranges unclaimed by the ACPI HMAT
* parsing it must be at an initcall level greater than hmat_init().
*/
late_initcall(hmem_init);

View File

@@ -3,30 +3,39 @@
#include <linux/memregion.h>
#include <linux/module.h>
#include <linux/pfn_t.h>
#include "bus.h"
#include "../bus.h"
static bool region_idle;
module_param_named(region_idle, region_idle, bool, 0644);
static int dax_hmem_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct dev_pagemap pgmap = { };
struct dax_region *dax_region;
struct memregion_info *mri;
struct dev_dax_data data;
struct dev_dax *dev_dax;
struct resource *res;
struct range range;
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!res)
return -ENOMEM;
mri = dev->platform_data;
memcpy(&pgmap.res, res, sizeof(*res));
dax_region = alloc_dax_region(dev, pdev->id, res, mri->target_node,
PMD_SIZE, PFN_DEV|PFN_MAP);
range.start = res->start;
range.end = res->end;
dax_region = alloc_dax_region(dev, pdev->id, &range, mri->target_node,
PMD_SIZE, 0);
if (!dax_region)
return -ENOMEM;
dev_dax = devm_create_dev_dax(dax_region, 0, &pgmap);
data = (struct dev_dax_data) {
.dax_region = dax_region,
.id = -1,
.size = region_idle ? 0 : resource_size(res),
};
dev_dax = devm_create_dev_dax(&data);
if (IS_ERR(dev_dax))
return PTR_ERR(dev_dax);

View File

@@ -19,17 +19,28 @@ static const char *kmem_name;
/* Set if any memory will remain added when the driver will be unloaded. */
static bool any_hotremove_failed;
int dev_dax_kmem_probe(struct device *dev)
static int dax_kmem_range(struct dev_dax *dev_dax, int i, struct range *r)
{
struct dev_dax *dev_dax = to_dev_dax(dev);
struct resource *res = &dev_dax->region->res;
resource_size_t kmem_start;
resource_size_t kmem_size;
resource_size_t kmem_end;
struct resource *new_res;
const char *new_res_name;
struct dev_dax_range *dax_range = &dev_dax->ranges[i];
struct range *range = &dax_range->range;
/* memory-block align the hotplug range */
r->start = ALIGN(range->start, memory_block_size_bytes());
r->end = ALIGN_DOWN(range->end + 1, memory_block_size_bytes()) - 1;
if (r->start >= r->end) {
r->start = range->start;
r->end = range->end;
return -ENOSPC;
}
return 0;
}
static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
{
struct device *dev = &dev_dax->dev;
int i, mapped = 0;
char *res_name;
int numa_node;
int rc;
/*
* Ensure good NUMA information for the persistent memory.
@@ -39,31 +50,39 @@ int dev_dax_kmem_probe(struct device *dev)
*/
numa_node = dev_dax->target_node;
if (numa_node < 0) {
dev_warn(dev, "rejecting DAX region %pR with invalid node: %d\n",
res, numa_node);
dev_warn(dev, "rejecting DAX region with invalid node: %d\n",
numa_node);
return -EINVAL;
}
/* Hotplug starting at the beginning of the next block: */
kmem_start = ALIGN(res->start, memory_block_size_bytes());
kmem_size = resource_size(res);
/* Adjust the size down to compensate for moving up kmem_start: */
kmem_size -= kmem_start - res->start;
/* Align the size down to cover only complete blocks: */
kmem_size &= ~(memory_block_size_bytes() - 1);
kmem_end = kmem_start + kmem_size;
new_res_name = kstrdup(dev_name(dev), GFP_KERNEL);
if (!new_res_name)
res_name = kstrdup(dev_name(dev), GFP_KERNEL);
if (!res_name)
return -ENOMEM;
for (i = 0; i < dev_dax->nr_range; i++) {
struct resource *res;
struct range range;
int rc;
rc = dax_kmem_range(dev_dax, i, &range);
if (rc) {
dev_info(dev, "mapping%d: %#llx-%#llx too small after alignment\n",
i, range.start, range.end);
continue;
}
/* Region is permanently reserved if hotremove fails. */
new_res = request_mem_region(kmem_start, kmem_size, new_res_name);
if (!new_res) {
dev_warn(dev, "could not reserve region [%pa-%pa]\n",
&kmem_start, &kmem_end);
kfree(new_res_name);
res = request_mem_region(range.start, range_len(&range), res_name);
if (!res) {
dev_warn(dev, "mapping%d: %#llx-%#llx could not reserve region\n",
i, range.start, range.end);
/*
* Once some memory has been onlined we can't
* assume that it can be un-onlined safely.
*/
if (mapped)
continue;
kfree(res_name);
return -EBUSY;
}
@@ -73,34 +92,38 @@ int dev_dax_kmem_probe(struct device *dev)
* inherit flags from the parent since it may set new flags
* unknown to us that will break add_memory() below.
*/
new_res->flags = IORESOURCE_SYSTEM_RAM;
res->flags = IORESOURCE_SYSTEM_RAM;
/*
* Ensure that future kexec'd kernels will not treat this as RAM
* automatically.
* Ensure that future kexec'd kernels will not treat
* this as RAM automatically.
*/
rc = add_memory_driver_managed(numa_node, new_res->start,
resource_size(new_res), kmem_name);
rc = add_memory_driver_managed(numa_node, range.start,
range_len(&range), kmem_name);
if (rc) {
release_resource(new_res);
kfree(new_res);
kfree(new_res_name);
dev_warn(dev, "mapping%d: %#llx-%#llx memory add failed\n",
i, range.start, range.end);
release_mem_region(range.start, range_len(&range));
if (mapped)
continue;
kfree(res_name);
return rc;
}
dev_dax->dax_kmem_res = new_res;
mapped++;
}
dev_set_drvdata(dev, res_name);
return 0;
}
#ifdef CONFIG_MEMORY_HOTREMOVE
static int dev_dax_kmem_remove(struct device *dev)
static int dev_dax_kmem_remove(struct dev_dax *dev_dax)
{
struct dev_dax *dev_dax = to_dev_dax(dev);
struct resource *res = dev_dax->dax_kmem_res;
resource_size_t kmem_start = res->start;
resource_size_t kmem_size = resource_size(res);
const char *res_name = res->name;
int rc;
int i, success = 0;
struct device *dev = &dev_dax->dev;
const char *res_name = dev_get_drvdata(dev);
/*
* We have one shot for removing memory, if some memory blocks were not
@@ -108,25 +131,36 @@ static int dev_dax_kmem_remove(struct device *dev)
* there is no way to hotremove this memory until reboot because device
* unbind will succeed even if we return failure.
*/
rc = remove_memory(dev_dax->target_node, kmem_start, kmem_size);
if (rc) {
for (i = 0; i < dev_dax->nr_range; i++) {
struct range range;
int rc;
rc = dax_kmem_range(dev_dax, i, &range);
if (rc)
continue;
rc = remove_memory(dev_dax->target_node, range.start,
range_len(&range));
if (rc == 0) {
release_mem_region(range.start, range_len(&range));
success++;
continue;
}
any_hotremove_failed = true;
dev_err(dev,
"DAX region %pR cannot be hotremoved until the next reboot\n",
res);
return rc;
"mapping%d: %#llx-%#llx cannot be hotremoved until the next reboot\n",
i, range.start, range.end);
}
/* Release and free dax resources */
release_resource(res);
kfree(res);
if (success >= dev_dax->nr_range) {
kfree(res_name);
dev_dax->dax_kmem_res = NULL;
dev_set_drvdata(dev, NULL);
}
return 0;
}
#else
static int dev_dax_kmem_remove(struct device *dev)
static int dev_dax_kmem_remove(struct dev_dax *dev_dax)
{
/*
* Without hotremove purposely leak the request_mem_region() for the
@@ -141,10 +175,8 @@ static int dev_dax_kmem_remove(struct device *dev)
#endif /* CONFIG_MEMORY_HOTREMOVE */
static struct dax_device_driver device_dax_kmem_driver = {
.drv = {
.probe = dev_dax_kmem_probe,
.remove = dev_dax_kmem_remove,
},
};
static int __init dax_kmem_init(void)

View File

@@ -22,7 +22,7 @@ static int dax_pmem_compat_probe(struct device *dev)
return -ENOMEM;
device_lock(&dev_dax->dev);
rc = dev_dax_probe(&dev_dax->dev);
rc = dev_dax_probe(dev_dax);
device_unlock(&dev_dax->dev);
devres_close_group(&dev_dax->dev, dev_dax);

View File

@@ -9,11 +9,12 @@
struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys)
{
struct resource res;
struct range range;
int rc, id, region_id;
resource_size_t offset;
struct nd_pfn_sb *pfn_sb;
struct dev_dax *dev_dax;
struct dev_dax_data data;
struct nd_namespace_io *nsio;
struct dax_region *dax_region;
struct dev_pagemap pgmap = { };
@@ -49,16 +50,23 @@ struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys)
if (rc != 2)
return ERR_PTR(-EINVAL);
/* adjust the dax_region resource to the start of data */
memcpy(&res, &pgmap.res, sizeof(res));
res.start += offset;
dax_region = alloc_dax_region(dev, region_id, &res,
/* adjust the dax_region range to the start of data */
range = pgmap.range;
range.start += offset,
dax_region = alloc_dax_region(dev, region_id, &range,
nd_region->target_node, le32_to_cpu(pfn_sb->align),
PFN_DEV|PFN_MAP);
IORESOURCE_DAX_STATIC);
if (!dax_region)
return ERR_PTR(-ENOMEM);
dev_dax = __devm_create_dev_dax(dax_region, id, &pgmap, subsys);
data = (struct dev_dax_data) {
.dax_region = dax_region,
.id = id,
.pgmap = &pgmap,
.subsys = subsys,
.size = range_len(&range),
};
dev_dax = devm_create_dev_dax(&data);
/* child dev_dax instances now own the lifetime of the dax_region */
dax_region_put(dax_region);

View File

@@ -38,7 +38,7 @@ void __init efi_fake_memmap_early(void)
m_start = mem->range.start;
m_end = mem->range.end;
for_each_efi_memory_desc(md) {
u64 start, end;
u64 start, end, size;
if (md->type != EFI_CONVENTIONAL_MEMORY)
continue;
@@ -58,11 +58,17 @@ void __init efi_fake_memmap_early(void)
*/
start = max(start, m_start);
end = min(end, m_end);
size = end - start + 1;
if (end <= start)
continue;
e820__range_update(start, end - start + 1, E820_TYPE_RAM,
E820_TYPE_SOFT_RESERVED);
/*
* Ensure each efi_fake_mem instance results in
* a unique e820 resource
*/
e820__range_remove(start, size, E820_TYPE_RAM, 1);
e820__range_add(start, size, E820_TYPE_SOFT_RESERVED);
e820__update_table(e820_table);
}
}

View File

@@ -258,8 +258,8 @@ shmem_writeback(struct drm_i915_gem_object *obj)
for (i = 0; i < obj->base.size >> PAGE_SHIFT; i++) {
struct page *page;
page = find_lock_entry(mapping, i);
if (!page || xa_is_value(page))
page = find_lock_page(mapping, i);
if (!page)
continue;
if (!page_mapped(page) && clear_page_dirty_for_io(page)) {

View File

@@ -101,7 +101,7 @@ unsigned long nouveau_dmem_page_addr(struct page *page)
{
struct nouveau_dmem_chunk *chunk = nouveau_page_to_chunk(page);
unsigned long off = (page_to_pfn(page) << PAGE_SHIFT) -
chunk->pagemap.res.start;
chunk->pagemap.range.start;
return chunk->bo->offset + off;
}
@@ -249,7 +249,9 @@ nouveau_dmem_chunk_alloc(struct nouveau_drm *drm, struct page **ppage)
chunk->drm = drm;
chunk->pagemap.type = MEMORY_DEVICE_PRIVATE;
chunk->pagemap.res = *res;
chunk->pagemap.range.start = res->start;
chunk->pagemap.range.end = res->end;
chunk->pagemap.nr_range = 1;
chunk->pagemap.ops = &nouveau_dmem_pagemap_ops;
chunk->pagemap.owner = drm->dev;
@@ -273,7 +275,7 @@ nouveau_dmem_chunk_alloc(struct nouveau_drm *drm, struct page **ppage)
list_add(&chunk->list, &drm->dmem->chunks);
mutex_unlock(&drm->dmem->mutex);
pfn_first = chunk->pagemap.res.start >> PAGE_SHIFT;
pfn_first = chunk->pagemap.range.start >> PAGE_SHIFT;
page = pfn_to_page(pfn_first);
spin_lock(&drm->dmem->lock);
for (i = 0; i < DMEM_CHUNK_NPAGES - 1; ++i, ++page) {
@@ -294,8 +296,7 @@ out_bo_unpin:
out_bo_free:
nouveau_bo_ref(NULL, &chunk->bo);
out_release:
release_mem_region(chunk->pagemap.res.start,
resource_size(&chunk->pagemap.res));
release_mem_region(chunk->pagemap.range.start, range_len(&chunk->pagemap.range));
out_free:
kfree(chunk);
out:
@@ -382,8 +383,8 @@ nouveau_dmem_fini(struct nouveau_drm *drm)
nouveau_bo_ref(NULL, &chunk->bo);
list_del(&chunk->list);
memunmap_pages(&chunk->pagemap);
release_mem_region(chunk->pagemap.res.start,
resource_size(&chunk->pagemap.res));
release_mem_region(chunk->pagemap.range.start,
range_len(&chunk->pagemap.range));
kfree(chunk);
}

View File

@@ -2198,7 +2198,7 @@ static bool gic_check_reserved_range(phys_addr_t addr, unsigned long size)
addr_end = addr + size - 1;
for_each_reserved_mem_region(i, &start, &end) {
for_each_reserved_mem_range(i, &start, &end) {
if (addr >= start && addr_end <= end)
return true;
}

View File

@@ -211,7 +211,7 @@ static void __add_badblock_range(struct badblocks *bb, u64 ns_offset, u64 len)
}
static void badblocks_populate(struct badrange *badrange,
struct badblocks *bb, const struct resource *res)
struct badblocks *bb, const struct range *range)
{
struct badrange_entry *bre;
@@ -222,34 +222,34 @@ static void badblocks_populate(struct badrange *badrange,
u64 bre_end = bre->start + bre->length - 1;
/* Discard intervals with no intersection */
if (bre_end < res->start)
if (bre_end < range->start)
continue;
if (bre->start > res->end)
if (bre->start > range->end)
continue;
/* Deal with any overlap after start of the namespace */
if (bre->start >= res->start) {
if (bre->start >= range->start) {
u64 start = bre->start;
u64 len;
if (bre_end <= res->end)
if (bre_end <= range->end)
len = bre->length;
else
len = res->start + resource_size(res)
len = range->start + range_len(range)
- bre->start;
__add_badblock_range(bb, start - res->start, len);
__add_badblock_range(bb, start - range->start, len);
continue;
}
/*
* Deal with overlap for badrange starting before
* the namespace.
*/
if (bre->start < res->start) {
if (bre->start < range->start) {
u64 len;
if (bre_end < res->end)
len = bre->start + bre->length - res->start;
if (bre_end < range->end)
len = bre->start + bre->length - range->start;
else
len = resource_size(res);
len = range_len(range);
__add_badblock_range(bb, 0, len);
}
}
@@ -267,7 +267,7 @@ static void badblocks_populate(struct badrange *badrange,
* and add badblocks entries for all matching sub-ranges
*/
void nvdimm_badblocks_populate(struct nd_region *nd_region,
struct badblocks *bb, const struct resource *res)
struct badblocks *bb, const struct range *range)
{
struct nvdimm_bus *nvdimm_bus;
@@ -279,7 +279,7 @@ void nvdimm_badblocks_populate(struct nd_region *nd_region,
nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev);
nvdimm_bus_lock(&nvdimm_bus->dev);
badblocks_populate(&nvdimm_bus->badrange, bb, res);
badblocks_populate(&nvdimm_bus->badrange, bb, range);
nvdimm_bus_unlock(&nvdimm_bus->dev);
}
EXPORT_SYMBOL_GPL(nvdimm_badblocks_populate);

Some files were not shown because too many files have changed in this diff Show More