Merge d5660df4a5 ("Merge branch 'akpm' (patches from Andrew)") into android-mainline

steps on the way to 5.10-rc1

Change-Id: Iddc84c25b6a9d71fa8542b927d6f69c364131c3d
Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
This commit is contained in:
Greg Kroah-Hartman
2020-10-23 13:08:50 +02:00
261 changed files with 9144 additions and 4313 deletions

View File

@@ -203,11 +203,13 @@ ForEachMacros:
- 'for_each_matching_node' - 'for_each_matching_node'
- 'for_each_matching_node_and_match' - 'for_each_matching_node_and_match'
- 'for_each_member' - 'for_each_member'
- 'for_each_memblock' - 'for_each_mem_region'
- 'for_each_memblock_type' - 'for_each_memblock_type'
- 'for_each_memcg_cache_index' - 'for_each_memcg_cache_index'
- 'for_each_mem_pfn_range' - 'for_each_mem_pfn_range'
- '__for_each_mem_range'
- 'for_each_mem_range' - 'for_each_mem_range'
- '__for_each_mem_range_rev'
- 'for_each_mem_range_rev' - 'for_each_mem_range_rev'
- 'for_each_migratetype_order' - 'for_each_migratetype_order'
- 'for_each_msi_entry' - 'for_each_msi_entry'
@@ -271,6 +273,7 @@ ForEachMacros:
- 'for_each_registered_fb' - 'for_each_registered_fb'
- 'for_each_requested_gpio' - 'for_each_requested_gpio'
- 'for_each_requested_gpio_in_range' - 'for_each_requested_gpio_in_range'
- 'for_each_reserved_mem_range'
- 'for_each_reserved_mem_region' - 'for_each_reserved_mem_region'
- 'for_each_rtd_codec_dais' - 'for_each_rtd_codec_dais'
- 'for_each_rtd_codec_dais_rollback' - 'for_each_rtd_codec_dais_rollback'

View File

@@ -15,7 +15,7 @@ Description:
actual protection), and Android and Linux distributions have been actual protection), and Android and Linux distributions have been
explicitly writing a "0" to /sys/fs/selinux/checkreqprot during explicitly writing a "0" to /sys/fs/selinux/checkreqprot during
initialization for some time. Support for setting checkreqprot to 1 initialization for some time. Support for setting checkreqprot to 1
will be removed in a future kernel release, at which point the kernel will be removed no sooner than June 2021, at which point the kernel
will always cease using checkreqprot internally and will always will always cease using checkreqprot internally and will always
check the actual protections being applied upon mmap/mprotect calls. check the actual protections being applied upon mmap/mprotect calls.
The checkreqprot selinuxfs node will remain for backward compatibility The checkreqprot selinuxfs node will remain for backward compatibility

View File

@@ -1259,6 +1259,10 @@ PAGE_SIZE multiple when read back.
can show up in the middle. Don't rely on items remaining in a can show up in the middle. Don't rely on items remaining in a
fixed position; use the keys to look up specific values! fixed position; use the keys to look up specific values!
If the entry has no per-node counter(or not show in the
mempry.numa_stat). We use 'npn'(non-per-node) as the tag
to indicate that it will not show in the mempry.numa_stat.
anon anon
Amount of memory used in anonymous mappings such as Amount of memory used in anonymous mappings such as
brk(), sbrk(), and mmap(MAP_ANONYMOUS) brk(), sbrk(), and mmap(MAP_ANONYMOUS)
@@ -1270,15 +1274,11 @@ PAGE_SIZE multiple when read back.
kernel_stack kernel_stack
Amount of memory allocated to kernel stacks. Amount of memory allocated to kernel stacks.
slab percpu(npn)
Amount of memory used for storing in-kernel data
structures.
percpu
Amount of memory used for storing per-cpu kernel Amount of memory used for storing per-cpu kernel
data structures. data structures.
sock sock(npn)
Amount of memory used in network transmission buffers Amount of memory used in network transmission buffers
shmem shmem
@@ -1318,11 +1318,9 @@ PAGE_SIZE multiple when read back.
Part of "slab" that cannot be reclaimed on memory Part of "slab" that cannot be reclaimed on memory
pressure. pressure.
pgfault slab(npn)
Total number of page faults incurred Amount of memory used for storing in-kernel data
structures.
pgmajfault
Number of major page faults incurred
workingset_refault_anon workingset_refault_anon
Number of refaults of previously evicted anonymous pages. Number of refaults of previously evicted anonymous pages.
@@ -1348,37 +1346,68 @@ PAGE_SIZE multiple when read back.
workingset_nodereclaim workingset_nodereclaim
Number of times a shadow node has been reclaimed Number of times a shadow node has been reclaimed
pgrefill pgfault(npn)
Total number of page faults incurred
pgmajfault(npn)
Number of major page faults incurred
pgrefill(npn)
Amount of scanned pages (in an active LRU list) Amount of scanned pages (in an active LRU list)
pgscan pgscan(npn)
Amount of scanned pages (in an inactive LRU list) Amount of scanned pages (in an inactive LRU list)
pgsteal pgsteal(npn)
Amount of reclaimed pages Amount of reclaimed pages
pgactivate pgactivate(npn)
Amount of pages moved to the active LRU list Amount of pages moved to the active LRU list
pgdeactivate pgdeactivate(npn)
Amount of pages moved to the inactive LRU list Amount of pages moved to the inactive LRU list
pglazyfree pglazyfree(npn)
Amount of pages postponed to be freed under memory pressure Amount of pages postponed to be freed under memory pressure
pglazyfreed pglazyfreed(npn)
Amount of reclaimed lazyfree pages Amount of reclaimed lazyfree pages
thp_fault_alloc thp_fault_alloc(npn)
Number of transparent hugepages which were allocated to satisfy Number of transparent hugepages which were allocated to satisfy
a page fault. This counter is not present when CONFIG_TRANSPARENT_HUGEPAGE a page fault. This counter is not present when CONFIG_TRANSPARENT_HUGEPAGE
is not set. is not set.
thp_collapse_alloc thp_collapse_alloc(npn)
Number of transparent hugepages which were allocated to allow Number of transparent hugepages which were allocated to allow
collapsing an existing range of pages. This counter is not collapsing an existing range of pages. This counter is not
present when CONFIG_TRANSPARENT_HUGEPAGE is not set. present when CONFIG_TRANSPARENT_HUGEPAGE is not set.
memory.numa_stat
A read-only nested-keyed file which exists on non-root cgroups.
This breaks down the cgroup's memory footprint into different
types of memory, type-specific details, and other information
per node on the state of the memory management system.
This is useful for providing visibility into the NUMA locality
information within an memcg since the pages are allowed to be
allocated from any physical node. One of the use case is evaluating
application performance by combining this information with the
application's CPU allocation.
All memory amounts are in bytes.
The output format of memory.numa_stat is::
type N0=<bytes in node 0> N1=<bytes in node 1> ...
The entries are ordered to be human readable, and new entries
can show up in the middle. Don't rely on items remaining in a
fixed position; use the keys to look up specific values!
The entries can refer to the memory.stat.
memory.swap.current memory.swap.current
A read-only single value file which exists on non-root A read-only single value file which exists on non-root
cgroups. cgroups.

View File

@@ -170,57 +170,82 @@ document trapinfo
address the kernel panicked. address the kernel panicked.
end end
define dump_log_idx define dump_record
set $idx = $arg0 set var $desc = $arg0
if ($argc > 1) set var $info = $arg1
set $prev_flags = $arg1 if ($argc > 2)
set var $prev_flags = $arg2
else else
set $prev_flags = 0 set var $prev_flags = 0
end
set $msg = ((struct printk_log *) (log_buf + $idx))
set $prefix = 1
set $newline = 1
set $log = log_buf + $idx + sizeof(*$msg)
# prev & LOG_CONT && !(msg->flags & LOG_PREIX)
if (($prev_flags & 8) && !($msg->flags & 4))
set $prefix = 0
end end
# msg->flags & LOG_CONT set var $prefix = 1
if ($msg->flags & 8) set var $newline = 1
set var $begin = $desc->text_blk_lpos.begin % (1U << prb->text_data_ring.size_bits)
set var $next = $desc->text_blk_lpos.next % (1U << prb->text_data_ring.size_bits)
# handle data-less record
if ($begin & 1)
set var $text_len = 0
set var $log = ""
else
# handle wrapping data block
if ($begin > $next)
set var $begin = 0
end
# skip over descriptor id
set var $begin = $begin + sizeof(long)
# handle truncated message
if ($next - $begin < $info->text_len)
set var $text_len = $next - $begin
else
set var $text_len = $info->text_len
end
set var $log = &prb->text_data_ring.data[$begin]
end
# prev & LOG_CONT && !(info->flags & LOG_PREIX)
if (($prev_flags & 8) && !($info->flags & 4))
set var $prefix = 0
end
# info->flags & LOG_CONT
if ($info->flags & 8)
# (prev & LOG_CONT && !(prev & LOG_NEWLINE)) # (prev & LOG_CONT && !(prev & LOG_NEWLINE))
if (($prev_flags & 8) && !($prev_flags & 2)) if (($prev_flags & 8) && !($prev_flags & 2))
set $prefix = 0 set var $prefix = 0
end end
# (!(msg->flags & LOG_NEWLINE)) # (!(info->flags & LOG_NEWLINE))
if (!($msg->flags & 2)) if (!($info->flags & 2))
set $newline = 0 set var $newline = 0
end end
end end
if ($prefix) if ($prefix)
printf "[%5lu.%06lu] ", $msg->ts_nsec / 1000000000, $msg->ts_nsec % 1000000000 printf "[%5lu.%06lu] ", $info->ts_nsec / 1000000000, $info->ts_nsec % 1000000000
end end
if ($msg->text_len != 0) if ($text_len)
eval "printf \"%%%d.%ds\", $log", $msg->text_len, $msg->text_len eval "printf \"%%%d.%ds\", $log", $text_len, $text_len
end end
if ($newline) if ($newline)
printf "\n" printf "\n"
end end
if ($msg->dict_len > 0)
set $dict = $log + $msg->text_len # handle dictionary data
set $idx = 0
set $line = 1 set var $dict = &$info->dev_info.subsystem[0]
while ($idx < $msg->dict_len) set var $dict_len = sizeof($info->dev_info.subsystem)
if ($line) if ($dict[0] != '\0')
printf " " printf " SUBSYSTEM="
set $line = 0 set var $idx = 0
end while ($idx < $dict_len)
set $c = $dict[$idx] set var $c = $dict[$idx]
if ($c == '\0') if ($c == '\0')
printf "\n" loop_break
set $line = 1
else else
if ($c < ' ' || $c >= 127 || $c == '\\') if ($c < ' ' || $c >= 127 || $c == '\\')
printf "\\x%02x", $c printf "\\x%02x", $c
@@ -228,33 +253,67 @@ define dump_log_idx
printf "%c", $c printf "%c", $c
end end
end end
set $idx = $idx + 1 set var $idx = $idx + 1
end
printf "\n"
end
set var $dict = &$info->dev_info.device[0]
set var $dict_len = sizeof($info->dev_info.device)
if ($dict[0] != '\0')
printf " DEVICE="
set var $idx = 0
while ($idx < $dict_len)
set var $c = $dict[$idx]
if ($c == '\0')
loop_break
else
if ($c < ' ' || $c >= 127 || $c == '\\')
printf "\\x%02x", $c
else
printf "%c", $c
end
end
set var $idx = $idx + 1
end end
printf "\n" printf "\n"
end end
end end
document dump_log_idx document dump_record
Dump a single log given its index in the log buffer. The first Dump a single record. The first parameter is the descriptor,
parameter is the index into log_buf, the second is optional and the second parameter is the info, the third parameter is
specified the previous log buffer's flags, used for properly optional and specifies the previous record's flags, used for
formatting continued lines. properly formatting continued lines.
end end
define dmesg define dmesg
set $i = log_first_idx # definitions from kernel/printk/printk_ringbuffer.h
set $end_idx = log_first_idx set var $desc_committed = 1
set $prev_flags = 0 set var $desc_finalized = 2
set var $desc_sv_bits = sizeof(long) * 8
set var $desc_flags_shift = $desc_sv_bits - 2
set var $desc_flags_mask = 3 << $desc_flags_shift
set var $id_mask = ~$desc_flags_mask
set var $desc_count = 1U << prb->desc_ring.count_bits
set var $prev_flags = 0
set var $id = prb->desc_ring.tail_id.counter
set var $end_id = prb->desc_ring.head_id.counter
while (1) while (1)
set $msg = ((struct printk_log *) (log_buf + $i)) set var $desc = &prb->desc_ring.descs[$id % $desc_count]
if ($msg->len == 0) set var $info = &prb->desc_ring.infos[$id % $desc_count]
set $i = 0
else # skip non-committed record
dump_log_idx $i $prev_flags set var $state = 3 & ($desc->state_var.counter >> $desc_flags_shift)
set $i = $i + $msg->len if ($state == $desc_committed || $state == $desc_finalized)
set $prev_flags = $msg->flags dump_record $desc $info $prev_flags
set var $prev_flags = $info->flags
end end
if ($i == $end_idx)
set var $id = ($id + 1) & $id_mask
if ($id == $end_id)
loop_break loop_break
end end
end end

View File

@@ -189,50 +189,123 @@ from this.
Free areas descriptor. User-space tools use this value to iterate the Free areas descriptor. User-space tools use this value to iterate the
free_area ranges. MAX_ORDER is used by the zone buddy allocator. free_area ranges. MAX_ORDER is used by the zone buddy allocator.
log_first_idx prb
------------- ---
Index of the first record stored in the buffer log_buf. Used by A pointer to the printk ringbuffer (struct printk_ringbuffer). This
user-space tools to read the strings in the log_buf. may be pointing to the static boot ringbuffer or the dynamically
allocated ringbuffer, depending on when the the core dump occurred.
Used by user-space tools to read the active kernel log buffer.
log_buf printk_rb_static
------- ----------------
Console output is written to the ring buffer log_buf at index A pointer to the static boot printk ringbuffer. If @prb has a
log_first_idx. Used to get the kernel log. different value, this is useful for viewing the initial boot messages,
which may have been overwritten in the dynamically allocated
ringbuffer.
log_buf_len clear_seq
-----------
log_buf's length.
clear_idx
--------- ---------
The index that the next printk() record to read after the last clear The sequence number of the printk() record after the last clear
command. It indicates the first record after the last SYSLOG_ACTION command. It indicates the first record after the last
_CLEAR, like issued by 'dmesg -c'. Used by user-space tools to dump SYSLOG_ACTION_CLEAR, like issued by 'dmesg -c'. Used by user-space
the dmesg log. tools to dump a subset of the dmesg log.
log_next_idx printk_ringbuffer
------------ -----------------
The index of the next record to store in the buffer log_buf. Used to The size of a printk_ringbuffer structure. This structure contains all
compute the index of the current buffer position. information required for accessing the various components of the
kernel log buffer.
printk_log (printk_ringbuffer, desc_ring|text_data_ring|dict_data_ring|fail)
---------- -----------------------------------------------------------------
The size of a structure printk_log. Used to compute the size of Offsets for the various components of the printk ringbuffer. Used by
messages, and extract dmesg log. It encapsulates header information for user-space tools to view the kernel log buffer without requiring the
log_buf, such as timestamp, syslog level, etc. declaration of the structure.
(printk_log, ts_nsec|len|text_len|dict_len) prb_desc_ring
------------------------------------------- -------------
It represents field offsets in struct printk_log. User space tools The size of the prb_desc_ring structure. This structure contains
parse it and check whether the values of printk_log's members have been information about the set of record descriptors.
changed.
(prb_desc_ring, count_bits|descs|head_id|tail_id)
-------------------------------------------------
Offsets for the fields describing the set of record descriptors. Used
by user-space tools to be able to traverse the descriptors without
requiring the declaration of the structure.
prb_desc
--------
The size of the prb_desc structure. This structure contains
information about a single record descriptor.
(prb_desc, info|state_var|text_blk_lpos|dict_blk_lpos)
------------------------------------------------------
Offsets for the fields describing a record descriptors. Used by
user-space tools to be able to read descriptors without requiring
the declaration of the structure.
prb_data_blk_lpos
-----------------
The size of the prb_data_blk_lpos structure. This structure contains
information about where the text or dictionary data (data block) is
located within the respective data ring.
(prb_data_blk_lpos, begin|next)
-------------------------------
Offsets for the fields describing the location of a data block. Used
by user-space tools to be able to locate data blocks without
requiring the declaration of the structure.
printk_info
-----------
The size of the printk_info structure. This structure contains all
the meta-data for a record.
(printk_info, seq|ts_nsec|text_len|dict_len|caller_id)
------------------------------------------------------
Offsets for the fields providing the meta-data for a record. Used by
user-space tools to be able to read the information without requiring
the declaration of the structure.
prb_data_ring
-------------
The size of the prb_data_ring structure. This structure contains
information about a set of data blocks.
(prb_data_ring, size_bits|data|head_lpos|tail_lpos)
---------------------------------------------------
Offsets for the fields describing a set of data blocks. Used by
user-space tools to be able to access the data blocks without
requiring the declaration of the structure.
atomic_long_t
-------------
The size of the atomic_long_t structure. Used by user-space tools to
be able to copy the full structure, regardless of its
architecture-specific implementation.
(atomic_long_t, counter)
------------------------
Offset for the long value of an atomic_long_t variable. Used by
user-space tools to access the long value without requiring the
architecture-specific declaration.
(free_area.free_list, MIGRATE_TYPES) (free_area.free_list, MIGRATE_TYPES)
------------------------------------ ------------------------------------

View File

@@ -131,7 +131,7 @@ hugepages
parameter is preceded by an invalid hugepagesz parameter, it will parameter is preceded by an invalid hugepagesz parameter, it will
be ignored. be ignored.
default_hugepagesz default_hugepagesz
pecify the default huge page size. This parameter can Specify the default huge page size. This parameter can
only be specified once on the command line. default_hugepagesz can only be specified once on the command line. default_hugepagesz can
optionally be followed by the hugepages parameter to preallocate a optionally be followed by the hugepages parameter to preallocate a
specific number of huge pages of default size. The number of default specific number of huge pages of default size. The number of default

View File

@@ -13,10 +13,10 @@ KASAN uses compile-time instrumentation to insert validity checks before every
memory access, and therefore requires a compiler version that supports that. memory access, and therefore requires a compiler version that supports that.
Generic KASAN is supported in both GCC and Clang. With GCC it requires version Generic KASAN is supported in both GCC and Clang. With GCC it requires version
8.3.0 or later. With Clang it requires version 7.0.0 or later, but detection of 8.3.0 or later. Any supported Clang version is compatible, but detection of
out-of-bounds accesses for global variables is only supported since Clang 11. out-of-bounds accesses for global variables is only supported since Clang 11.
Tag-based KASAN is only supported in Clang and requires version 7.0.0 or later. Tag-based KASAN is only supported in Clang.
Currently generic KASAN is supported for the x86_64, arm64, xtensa, s390 and Currently generic KASAN is supported for the x86_64, arm64, xtensa, s390 and
riscv architectures, and tag-based KASAN is supported only for arm64. riscv architectures, and tag-based KASAN is supported only for arm64.
@@ -281,3 +281,73 @@ unmapped. This will require changes in arch-specific code.
This allows ``VMAP_STACK`` support on x86, and can simplify support of This allows ``VMAP_STACK`` support on x86, and can simplify support of
architectures that do not have a fixed module region. architectures that do not have a fixed module region.
CONFIG_KASAN_KUNIT_TEST & CONFIG_TEST_KASAN_MODULE
--------------------------------------------------
``CONFIG_KASAN_KUNIT_TEST`` utilizes the KUnit Test Framework for testing.
This means each test focuses on a small unit of functionality and
there are a few ways these tests can be run.
Each test will print the KASAN report if an error is detected and then
print the number of the test and the status of the test:
pass::
ok 28 - kmalloc_double_kzfree
or, if kmalloc failed::
# kmalloc_large_oob_right: ASSERTION FAILED at lib/test_kasan.c:163
Expected ptr is not null, but is
not ok 4 - kmalloc_large_oob_right
or, if a KASAN report was expected, but not found::
# kmalloc_double_kzfree: EXPECTATION FAILED at lib/test_kasan.c:629
Expected kasan_data->report_expected == kasan_data->report_found, but
kasan_data->report_expected == 1
kasan_data->report_found == 0
not ok 28 - kmalloc_double_kzfree
All test statuses are tracked as they run and an overall status will
be printed at the end::
ok 1 - kasan
or::
not ok 1 - kasan
(1) Loadable Module
~~~~~~~~~~~~~~~~~~~~
With ``CONFIG_KUNIT`` enabled, ``CONFIG_KASAN_KUNIT_TEST`` can be built as
a loadable module and run on any architecture that supports KASAN
using something like insmod or modprobe. The module is called ``test_kasan``.
(2) Built-In
~~~~~~~~~~~~~
With ``CONFIG_KUNIT`` built-in, ``CONFIG_KASAN_KUNIT_TEST`` can be built-in
on any architecure that supports KASAN. These and any other KUnit
tests enabled will run and print the results at boot as a late-init
call.
(3) Using kunit_tool
~~~~~~~~~~~~~~~~~~~~~
With ``CONFIG_KUNIT`` and ``CONFIG_KASAN_KUNIT_TEST`` built-in, we can also
use kunit_tool to see the results of these along with other KUnit
tests in a more readable way. This will not print the KASAN reports
of tests that passed. Use `KUnit documentation <https://www.kernel.org/doc/html/latest/dev-tools/kunit/index.html>`_ for more up-to-date
information on kunit_tool.
.. _KUnit: https://www.kernel.org/doc/html/latest/dev-tools/kunit/index.html
``CONFIG_TEST_KASAN_MODULE`` is a set of KASAN tests that could not be
converted to KUnit. These tests can be run only as a module with
``CONFIG_TEST_KASAN_MODULE`` built as a loadable module and
``CONFIG_KASAN`` built-in. The type of error expected and the
function being run is printed before the expression expected to give
an error. Then the error is printed, if found, and that test
should be interpretted to pass only if the error was the one expected
by the test.

View File

@@ -229,7 +229,7 @@ Testing with kmemleak-test
To check if you have all set up to use kmemleak, you can use the kmemleak-test To check if you have all set up to use kmemleak, you can use the kmemleak-test
module, a module that deliberately leaks memory. Set CONFIG_DEBUG_KMEMLEAK_TEST module, a module that deliberately leaks memory. Set CONFIG_DEBUG_KMEMLEAK_TEST
as module (it can't be used as bult-in) and boot the kernel with kmemleak as module (it can't be used as built-in) and boot the kernel with kmemleak
enabled. Load the module and perform a scan with:: enabled. Load the module and perform a scan with::
# modprobe kmemleak-test # modprobe kmemleak-test

View File

@@ -21,6 +21,7 @@ This document describes the Linux kernel Makefiles.
--- 3.10 Special Rules --- 3.10 Special Rules
--- 3.11 $(CC) support functions --- 3.11 $(CC) support functions
--- 3.12 $(LD) support functions --- 3.12 $(LD) support functions
--- 3.13 Script Invocation
=== 4 Host Program support === 4 Host Program support
--- 4.1 Simple Host Program --- 4.1 Simple Host Program
@@ -605,6 +606,25 @@ more details, with real examples.
#Makefile #Makefile
LDFLAGS_vmlinux += $(call ld-option, -X) LDFLAGS_vmlinux += $(call ld-option, -X)
3.13 Script invocation
----------------------
Make rules may invoke scripts to build the kernel. The rules shall
always provide the appropriate interpreter to execute the script. They
shall not rely on the execute bits being set, and shall not invoke the
script directly. For the convenience of manual script invocation, such
as invoking ./scripts/checkpatch.pl, it is recommended to set execute
bits on the scripts nonetheless.
Kbuild provides variables $(CONFIG_SHELL), $(AWK), $(PERL),
$(PYTHON) and $(PYTHON3) to refer to interpreters for the respective
scripts.
Example::
#Makefile
cmd_depmod = $(CONFIG_SHELL) $(srctree)/scripts/depmod.sh $(DEPMOD) \
$(KERNELRELEASE)
4 Host Program support 4 Host Program support
====================== ======================

View File

@@ -64,7 +64,7 @@ Active MM
actually get cases where you have a address space that is _only_ used by actually get cases where you have a address space that is _only_ used by
lazy users. That is often a short-lived state, because once that thread lazy users. That is often a short-lived state, because once that thread
gets scheduled away in favour of a real thread, the "zombie" mm gets gets scheduled away in favour of a real thread, the "zombie" mm gets
released because "mm_users" becomes zero. released because "mm_count" becomes zero.
Also, a new rule is that _nobody_ ever has "init_mm" as a real MM any Also, a new rule is that _nobody_ ever has "init_mm" as a real MM any
more. "init_mm" should be considered just a "lazy context when no other more. "init_mm" should be considered just a "lazy context when no other

View File

@@ -173,6 +173,10 @@ NUMA
numa=noacpi numa=noacpi
Don't parse the SRAT table for NUMA setup Don't parse the SRAT table for NUMA setup
numa=nohmat
Don't parse the HMAT table for NUMA setup, or soft-reserved memory
partitioning.
numa=fake=<size>[MG] numa=fake=<size>[MG]
If given as a memory unit, fills all system RAM with nodes of If given as a memory unit, fills all system RAM with nodes of
size interleaved over physical nodes. size interleaved over physical nodes.

View File

@@ -9734,8 +9734,8 @@ M: Catalin Marinas <catalin.marinas@arm.com>
S: Maintained S: Maintained
F: Documentation/dev-tools/kmemleak.rst F: Documentation/dev-tools/kmemleak.rst
F: include/linux/kmemleak.h F: include/linux/kmemleak.h
F: mm/kmemleak-test.c
F: mm/kmemleak.c F: mm/kmemleak.c
F: samples/kmemleak/kmemleak-test.c
KMOD KERNEL MODULE LOADER - USERMODE HELPER KMOD KERNEL MODULE LOADER - USERMODE HELPER
M: Luis Chamberlain <mcgrof@kernel.org> M: Luis Chamberlain <mcgrof@kernel.org>
@@ -13983,6 +13983,7 @@ PRINTK
M: Petr Mladek <pmladek@suse.com> M: Petr Mladek <pmladek@suse.com>
M: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> M: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
R: Steven Rostedt <rostedt@goodmis.org> R: Steven Rostedt <rostedt@goodmis.org>
R: John Ogness <john.ogness@linutronix.de>
S: Maintained S: Maintained
F: include/linux/printk.h F: include/linux/printk.h
F: kernel/printk/ F: kernel/printk/
@@ -15633,6 +15634,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/selinux.git
F: Documentation/ABI/obsolete/sysfs-selinux-checkreqprot F: Documentation/ABI/obsolete/sysfs-selinux-checkreqprot
F: Documentation/ABI/obsolete/sysfs-selinux-disable F: Documentation/ABI/obsolete/sysfs-selinux-disable
F: Documentation/admin-guide/LSM/SELinux.rst F: Documentation/admin-guide/LSM/SELinux.rst
F: include/trace/events/avc.h
F: include/uapi/linux/selinux_netlink.h F: include/uapi/linux/selinux_netlink.h
F: scripts/selinux/ F: scripts/selinux/
F: security/selinux/ F: security/selinux/

View File

@@ -926,15 +926,6 @@ KBUILD_CFLAGS += $(call cc-disable-warning, maybe-uninitialized)
# disable invalid "can't wrap" optimizations for signed / pointers # disable invalid "can't wrap" optimizations for signed / pointers
KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow) KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow)
# clang sets -fmerge-all-constants by default as optimization, but this
# is non-conforming behavior for C and in fact breaks the kernel, so we
# need to disable it here generally.
KBUILD_CFLAGS += $(call cc-option,-fno-merge-all-constants)
# for gcc -fno-merge-all-constants disables everything, but it is fine
# to have actual conforming behavior enabled.
KBUILD_CFLAGS += $(call cc-option,-fmerge-constants)
# Make sure -fstack-check isn't enabled (like gentoo apparently did) # Make sure -fstack-check isn't enabled (like gentoo apparently did)
KBUILD_CFLAGS += $(call cc-option,-fno-stack-check,) KBUILD_CFLAGS += $(call cc-option,-fno-stack-check,)

View File

@@ -450,10 +450,23 @@ config ARCH_WANT_OLD_COMPAT_IPC
select ARCH_WANT_COMPAT_IPC_PARSE_VERSION select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
bool bool
config HAVE_ARCH_SECCOMP_FILTER config HAVE_ARCH_SECCOMP
bool bool
help
An arch should select this symbol to support seccomp mode 1 (the fixed
syscall policy), and must provide an overrides for __NR_seccomp_sigreturn,
and compat syscalls if the asm-generic/seccomp.h defaults need adjustment:
- __NR_seccomp_read_32
- __NR_seccomp_write_32
- __NR_seccomp_exit_32
- __NR_seccomp_sigreturn_32
config HAVE_ARCH_SECCOMP_FILTER
bool
select HAVE_ARCH_SECCOMP
help help
An arch should select this symbol if it provides all of these things: An arch should select this symbol if it provides all of these things:
- all the requirements for HAVE_ARCH_SECCOMP
- syscall_get_arch() - syscall_get_arch()
- syscall_get_arguments() - syscall_get_arguments()
- syscall_rollback() - syscall_rollback()
@@ -464,6 +477,23 @@ config HAVE_ARCH_SECCOMP_FILTER
results in the system call being skipped immediately. results in the system call being skipped immediately.
- seccomp syscall wired up - seccomp syscall wired up
config SECCOMP
prompt "Enable seccomp to safely execute untrusted bytecode"
def_bool y
depends on HAVE_ARCH_SECCOMP
help
This kernel feature is useful for number crunching applications
that may need to handle untrusted bytecode during their
execution. By using pipes or other transports made available
to the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in their
own address space using seccomp. Once seccomp is enabled via
prctl(PR_SET_SECCOMP) or the seccomp() syscall, it cannot be
disabled and the task is only allowed to execute a few safe
syscalls defined by each seccomp mode.
If unsure, say Y.
config SECCOMP_FILTER config SECCOMP_FILTER
def_bool y def_bool y
depends on HAVE_ARCH_SECCOMP_FILTER && SECCOMP && NET depends on HAVE_ARCH_SECCOMP_FILTER && SECCOMP && NET

View File

@@ -68,6 +68,7 @@ config ARM
select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU
select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU
select HAVE_ARCH_MMAP_RND_BITS if MMU select HAVE_ARCH_MMAP_RND_BITS if MMU
select HAVE_ARCH_SECCOMP
select HAVE_ARCH_SECCOMP_FILTER if AEABI && !OABI_COMPAT select HAVE_ARCH_SECCOMP_FILTER if AEABI && !OABI_COMPAT
select HAVE_ARCH_THREAD_STRUCT_WHITELIST select HAVE_ARCH_THREAD_STRUCT_WHITELIST
select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRACEHOOK
@@ -84,7 +85,7 @@ config ARM
select HAVE_FAST_GUP if ARM_LPAE select HAVE_FAST_GUP if ARM_LPAE
select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL && !CC_IS_CLANG select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL && !CC_IS_CLANG
select HAVE_FUNCTION_TRACER if !XIP_KERNEL && (CC_IS_GCC || CLANG_VERSION >= 100000) select HAVE_FUNCTION_TRACER if !XIP_KERNEL
select HAVE_GCC_PLUGINS select HAVE_GCC_PLUGINS
select HAVE_HW_BREAKPOINT if PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7) select HAVE_HW_BREAKPOINT if PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7)
select HAVE_IDE if PCI || ISA || PCMCIA select HAVE_IDE if PCI || ISA || PCMCIA
@@ -1618,20 +1619,6 @@ config UACCESS_WITH_MEMCPY
However, if the CPU data cache is using a write-allocate mode, However, if the CPU data cache is using a write-allocate mode,
this option is unlikely to provide any performance gain. this option is unlikely to provide any performance gain.
config SECCOMP
bool
prompt "Enable seccomp to safely compute untrusted bytecode"
help
This kernel feature is useful for number crunching applications
that may need to compute untrusted bytecode during their
execution. By using pipes or other transports made available to
the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in
their own address space using seccomp. Once seccomp is
enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
and the task is only allowed to execute a few safe syscalls
defined by each seccomp mode.
config PARAVIRT config PARAVIRT
bool "Enable paravirtualization code" bool "Enable paravirtualization code"
help help

View File

@@ -59,6 +59,7 @@ __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr)
#ifdef CONFIG_ARM_LPAE #ifdef CONFIG_ARM_LPAE
struct page *page = virt_to_page(pmdp); struct page *page = virt_to_page(pmdp);
pgtable_pmd_page_dtor(page);
tlb_remove_table(tlb, page); tlb_remove_table(tlb, page);
#endif #endif
} }

View File

@@ -843,19 +843,25 @@ early_param("mem", early_mem);
static void __init request_standard_resources(const struct machine_desc *mdesc) static void __init request_standard_resources(const struct machine_desc *mdesc)
{ {
struct memblock_region *region; phys_addr_t start, end, res_end;
struct resource *res; struct resource *res;
u64 i;
kernel_code.start = virt_to_phys(_text); kernel_code.start = virt_to_phys(_text);
kernel_code.end = virt_to_phys(__init_begin - 1); kernel_code.end = virt_to_phys(__init_begin - 1);
kernel_data.start = virt_to_phys(_sdata); kernel_data.start = virt_to_phys(_sdata);
kernel_data.end = virt_to_phys(_end - 1); kernel_data.end = virt_to_phys(_end - 1);
for_each_memblock(memory, region) { for_each_mem_range(i, &start, &end) {
phys_addr_t start = __pfn_to_phys(memblock_region_memory_base_pfn(region));
phys_addr_t end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1;
unsigned long boot_alias_start; unsigned long boot_alias_start;
/*
* In memblock, end points to the first byte after the
* range while in resourses, end points to the last byte in
* the range.
*/
res_end = end - 1;
/* /*
* Some systems have a special memory alias which is only * Some systems have a special memory alias which is only
* used for booting. We need to advertise this region to * used for booting. We need to advertise this region to
@@ -869,7 +875,7 @@ static void __init request_standard_resources(const struct machine_desc *mdesc)
__func__, sizeof(*res)); __func__, sizeof(*res));
res->name = "System RAM (boot alias)"; res->name = "System RAM (boot alias)";
res->start = boot_alias_start; res->start = boot_alias_start;
res->end = phys_to_idmap(end); res->end = phys_to_idmap(res_end);
res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
request_resource(&iomem_resource, res); request_resource(&iomem_resource, res);
} }
@@ -880,7 +886,7 @@ static void __init request_standard_resources(const struct machine_desc *mdesc)
sizeof(*res)); sizeof(*res));
res->name = "System RAM"; res->name = "System RAM";
res->start = start; res->start = start;
res->end = end; res->end = res_end;
res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
request_resource(&iomem_resource, res); request_resource(&iomem_resource, res);

View File

@@ -299,16 +299,14 @@ free_memmap(unsigned long start_pfn, unsigned long end_pfn)
*/ */
static void __init free_unused_memmap(void) static void __init free_unused_memmap(void)
{ {
unsigned long start, prev_end = 0; unsigned long start, end, prev_end = 0;
struct memblock_region *reg; int i;
/* /*
* This relies on each bank being in address order. * This relies on each bank being in address order.
* The banks are sorted previously in bootmem_init(). * The banks are sorted previously in bootmem_init().
*/ */
for_each_memblock(memory, reg) { for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, NULL) {
start = memblock_region_memory_base_pfn(reg);
#ifdef CONFIG_SPARSEMEM #ifdef CONFIG_SPARSEMEM
/* /*
* Take care not to free memmap entries that don't exist * Take care not to free memmap entries that don't exist
@@ -336,8 +334,7 @@ static void __init free_unused_memmap(void)
* memmap entries are valid from the bank end aligned to * memmap entries are valid from the bank end aligned to
* MAX_ORDER_NR_PAGES. * MAX_ORDER_NR_PAGES.
*/ */
prev_end = ALIGN(memblock_region_memory_end_pfn(reg), prev_end = ALIGN(end, MAX_ORDER_NR_PAGES);
MAX_ORDER_NR_PAGES);
} }
#ifdef CONFIG_SPARSEMEM #ifdef CONFIG_SPARSEMEM
@@ -347,61 +344,29 @@ static void __init free_unused_memmap(void)
#endif #endif
} }
#ifdef CONFIG_HIGHMEM
static inline void free_area_high(unsigned long pfn, unsigned long end)
{
for (; pfn < end; pfn++)
free_highmem_page(pfn_to_page(pfn));
}
#endif
static void __init free_highpages(void) static void __init free_highpages(void)
{ {
#ifdef CONFIG_HIGHMEM #ifdef CONFIG_HIGHMEM
unsigned long max_low = max_low_pfn; unsigned long max_low = max_low_pfn;
struct memblock_region *mem, *res; phys_addr_t range_start, range_end;
u64 i;
/* set highmem page free */ /* set highmem page free */
for_each_memblock(memory, mem) { for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE,
unsigned long start = memblock_region_memory_base_pfn(mem); &range_start, &range_end, NULL) {
unsigned long end = memblock_region_memory_end_pfn(mem); unsigned long start = PHYS_PFN(range_start);
unsigned long end = PHYS_PFN(range_end);
/* Ignore complete lowmem entries */ /* Ignore complete lowmem entries */
if (end <= max_low) if (end <= max_low)
continue; continue;
if (memblock_is_nomap(mem))
continue;
/* Truncate partial highmem entries */ /* Truncate partial highmem entries */
if (start < max_low) if (start < max_low)
start = max_low; start = max_low;
/* Find and exclude any reserved regions */ for (; start < end; start++)
for_each_memblock(reserved, res) { free_highmem_page(pfn_to_page(start));
unsigned long res_start, res_end;
res_start = memblock_region_reserved_base_pfn(res);
res_end = memblock_region_reserved_end_pfn(res);
if (res_end < start)
continue;
if (res_start < start)
res_start = start;
if (res_start > end)
res_start = end;
if (res_end > end)
res_end = end;
if (res_start != start)
free_area_high(start, res_start);
start = res_end;
if (start == end)
break;
}
/* And now free anything which remains */
if (start < end)
free_area_high(start, end);
} }
#endif #endif
} }

View File

@@ -1154,9 +1154,8 @@ phys_addr_t arm_lowmem_limit __initdata = 0;
void __init adjust_lowmem_bounds(void) void __init adjust_lowmem_bounds(void)
{ {
phys_addr_t memblock_limit = 0; phys_addr_t block_start, block_end, memblock_limit = 0;
u64 vmalloc_limit; u64 vmalloc_limit, i;
struct memblock_region *reg;
phys_addr_t lowmem_limit = 0; phys_addr_t lowmem_limit = 0;
/* /*
@@ -1172,26 +1171,18 @@ void __init adjust_lowmem_bounds(void)
* The first usable region must be PMD aligned. Mark its start * The first usable region must be PMD aligned. Mark its start
* as MEMBLOCK_NOMAP if it isn't * as MEMBLOCK_NOMAP if it isn't
*/ */
for_each_memblock(memory, reg) { for_each_mem_range(i, &block_start, &block_end) {
if (!memblock_is_nomap(reg)) { if (!IS_ALIGNED(block_start, PMD_SIZE)) {
if (!IS_ALIGNED(reg->base, PMD_SIZE)) { phys_addr_t len;
phys_addr_t len;
len = round_up(reg->base, PMD_SIZE) - reg->base; len = round_up(block_start, PMD_SIZE) - block_start;
memblock_mark_nomap(reg->base, len); memblock_mark_nomap(block_start, len);
}
break;
} }
break;
} }
for_each_memblock(memory, reg) { for_each_mem_range(i, &block_start, &block_end) {
phys_addr_t block_start = reg->base; if (block_start < vmalloc_limit) {
phys_addr_t block_end = reg->base + reg->size;
if (memblock_is_nomap(reg))
continue;
if (reg->base < vmalloc_limit) {
if (block_end > lowmem_limit) if (block_end > lowmem_limit)
/* /*
* Compare as u64 to ensure vmalloc_limit does * Compare as u64 to ensure vmalloc_limit does
@@ -1440,19 +1431,15 @@ static void __init kmap_init(void)
static void __init map_lowmem(void) static void __init map_lowmem(void)
{ {
struct memblock_region *reg;
phys_addr_t kernel_x_start = round_down(__pa(KERNEL_START), SECTION_SIZE); phys_addr_t kernel_x_start = round_down(__pa(KERNEL_START), SECTION_SIZE);
phys_addr_t kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE); phys_addr_t kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE);
phys_addr_t start, end;
u64 i;
/* Map all the lowmem memory banks. */ /* Map all the lowmem memory banks. */
for_each_memblock(memory, reg) { for_each_mem_range(i, &start, &end) {
phys_addr_t start = reg->base;
phys_addr_t end = start + reg->size;
struct map_desc map; struct map_desc map;
if (memblock_is_nomap(reg))
continue;
if (end > arm_lowmem_limit) if (end > arm_lowmem_limit)
end = arm_lowmem_limit; end = arm_lowmem_limit;
if (start >= end) if (start >= end)

View File

@@ -231,12 +231,12 @@ static int __init allocate_region(phys_addr_t base, phys_addr_t size,
void __init pmsav7_adjust_lowmem_bounds(void) void __init pmsav7_adjust_lowmem_bounds(void)
{ {
phys_addr_t specified_mem_size = 0, total_mem_size = 0; phys_addr_t specified_mem_size = 0, total_mem_size = 0;
struct memblock_region *reg;
bool first = true;
phys_addr_t mem_start; phys_addr_t mem_start;
phys_addr_t mem_end; phys_addr_t mem_end;
phys_addr_t reg_start, reg_end;
unsigned int mem_max_regions; unsigned int mem_max_regions;
int num, i; int num;
u64 i;
/* Free-up PMSAv7_PROBE_REGION */ /* Free-up PMSAv7_PROBE_REGION */
mpu_min_region_order = __mpu_min_region_order(); mpu_min_region_order = __mpu_min_region_order();
@@ -262,20 +262,19 @@ void __init pmsav7_adjust_lowmem_bounds(void)
mem_max_regions -= num; mem_max_regions -= num;
#endif #endif
for_each_memblock(memory, reg) { for_each_mem_range(i, &reg_start, &reg_end) {
if (first) { if (i == 0) {
phys_addr_t phys_offset = PHYS_OFFSET; phys_addr_t phys_offset = PHYS_OFFSET;
/* /*
* Initially only use memory continuous from * Initially only use memory continuous from
* PHYS_OFFSET */ * PHYS_OFFSET */
if (reg->base != phys_offset) if (reg_start != phys_offset)
panic("First memory bank must be contiguous from PHYS_OFFSET"); panic("First memory bank must be contiguous from PHYS_OFFSET");
mem_start = reg->base; mem_start = reg_start;
mem_end = reg->base + reg->size; mem_end = reg_end;
specified_mem_size = reg->size; specified_mem_size = mem_end - mem_start;
first = false;
} else { } else {
/* /*
* memblock auto merges contiguous blocks, remove * memblock auto merges contiguous blocks, remove
@@ -283,8 +282,8 @@ void __init pmsav7_adjust_lowmem_bounds(void)
* blocks separately while iterating) * blocks separately while iterating)
*/ */
pr_notice("Ignoring RAM after %pa, memory at %pa ignored\n", pr_notice("Ignoring RAM after %pa, memory at %pa ignored\n",
&mem_end, &reg->base); &mem_end, &reg_start);
memblock_remove(reg->base, 0 - reg->base); memblock_remove(reg_start, 0 - reg_start);
break; break;
} }
} }

View File

@@ -94,20 +94,19 @@ static __init bool is_region_fixed(int number)
void __init pmsav8_adjust_lowmem_bounds(void) void __init pmsav8_adjust_lowmem_bounds(void)
{ {
phys_addr_t mem_end; phys_addr_t mem_end;
struct memblock_region *reg; phys_addr_t reg_start, reg_end;
bool first = true; u64 i;
for_each_memblock(memory, reg) { for_each_mem_range(i, &reg_start, &reg_end) {
if (first) { if (i == 0) {
phys_addr_t phys_offset = PHYS_OFFSET; phys_addr_t phys_offset = PHYS_OFFSET;
/* /*
* Initially only use memory continuous from * Initially only use memory continuous from
* PHYS_OFFSET */ * PHYS_OFFSET */
if (reg->base != phys_offset) if (reg_start != phys_offset)
panic("First memory bank must be contiguous from PHYS_OFFSET"); panic("First memory bank must be contiguous from PHYS_OFFSET");
mem_end = reg->base + reg->size; mem_end = reg_end;
first = false;
} else { } else {
/* /*
* memblock auto merges contiguous blocks, remove * memblock auto merges contiguous blocks, remove
@@ -115,8 +114,8 @@ void __init pmsav8_adjust_lowmem_bounds(void)
* blocks separately while iterating) * blocks separately while iterating)
*/ */
pr_notice("Ignoring RAM after %pa, memory at %pa ignored\n", pr_notice("Ignoring RAM after %pa, memory at %pa ignored\n",
&mem_end, &reg->base); &mem_end, &reg_start);
memblock_remove(reg->base, 0 - reg->base); memblock_remove(reg_start, 0 - reg_start);
break; break;
} }
} }

View File

@@ -25,11 +25,12 @@
unsigned long xen_get_swiotlb_free_pages(unsigned int order) unsigned long xen_get_swiotlb_free_pages(unsigned int order)
{ {
struct memblock_region *reg; phys_addr_t base;
gfp_t flags = __GFP_NOWARN|__GFP_KSWAPD_RECLAIM; gfp_t flags = __GFP_NOWARN|__GFP_KSWAPD_RECLAIM;
u64 i;
for_each_memblock(memory, reg) { for_each_mem_range(i, &base, NULL) {
if (reg->base < (phys_addr_t)0xffffffff) { if (base < (phys_addr_t)0xffffffff) {
if (IS_ENABLED(CONFIG_ZONE_DMA32)) if (IS_ENABLED(CONFIG_ZONE_DMA32))
flags |= __GFP_DMA32; flags |= __GFP_DMA32;
else else

View File

@@ -1041,19 +1041,6 @@ config ARCH_ENABLE_SPLIT_PMD_PTLOCK
config CC_HAVE_SHADOW_CALL_STACK config CC_HAVE_SHADOW_CALL_STACK
def_bool $(cc-option, -fsanitize=shadow-call-stack -ffixed-x18) def_bool $(cc-option, -fsanitize=shadow-call-stack -ffixed-x18)
config SECCOMP
bool "Enable seccomp to safely compute untrusted bytecode"
help
This kernel feature is useful for number crunching applications
that may need to compute untrusted bytecode during their
execution. By using pipes or other transports made available to
the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in
their own address space using seccomp. Once seccomp is
enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
and the task is only allowed to execute a few safe syscalls
defined by each seccomp mode.
config PARAVIRT config PARAVIRT
bool "Enable paravirtualization code" bool "Enable paravirtualization code"
help help
@@ -1612,8 +1599,6 @@ config ARM64_BTI_KERNEL
depends on CC_HAS_BRANCH_PROT_PAC_RET_BTI depends on CC_HAS_BRANCH_PROT_PAC_RET_BTI
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94697 # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94697
depends on !CC_IS_GCC || GCC_VERSION >= 100100 depends on !CC_IS_GCC || GCC_VERSION >= 100100
# https://reviews.llvm.org/rGb8ae3fdfa579dbf366b1bb1cbfdbf8c51db7fa55
depends on !CC_IS_CLANG || CLANG_VERSION >= 100001
depends on !(CC_IS_CLANG && GCOV_KERNEL) depends on !(CC_IS_CLANG && GCOV_KERNEL)
depends on (!FUNCTION_GRAPH_TRACER || DYNAMIC_FTRACE_WITH_REGS) depends on (!FUNCTION_GRAPH_TRACER || DYNAMIC_FTRACE_WITH_REGS)
help help

View File

@@ -215,8 +215,7 @@ static int prepare_elf_headers(void **addr, unsigned long *sz)
phys_addr_t start, end; phys_addr_t start, end;
nr_ranges = 1; /* for exclusion of crashkernel region */ nr_ranges = 1; /* for exclusion of crashkernel region */
for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE, for_each_mem_range(i, &start, &end)
MEMBLOCK_NONE, &start, &end, NULL)
nr_ranges++; nr_ranges++;
cmem = kmalloc(struct_size(cmem, ranges, nr_ranges), GFP_KERNEL); cmem = kmalloc(struct_size(cmem, ranges, nr_ranges), GFP_KERNEL);
@@ -225,8 +224,7 @@ static int prepare_elf_headers(void **addr, unsigned long *sz)
cmem->max_nr_ranges = nr_ranges; cmem->max_nr_ranges = nr_ranges;
cmem->nr_ranges = 0; cmem->nr_ranges = 0;
for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE, for_each_mem_range(i, &start, &end) {
MEMBLOCK_NONE, &start, &end, NULL) {
cmem->ranges[cmem->nr_ranges].start = start; cmem->ranges[cmem->nr_ranges].start = start;
cmem->ranges[cmem->nr_ranges].end = end - 1; cmem->ranges[cmem->nr_ranges].end = end - 1;
cmem->nr_ranges++; cmem->nr_ranges++;

View File

@@ -217,7 +217,7 @@ static void __init request_standard_resources(void)
if (!standard_resources) if (!standard_resources)
panic("%s: Failed to allocate %zu bytes\n", __func__, res_size); panic("%s: Failed to allocate %zu bytes\n", __func__, res_size);
for_each_memblock(memory, region) { for_each_mem_region(region) {
res = &standard_resources[i++]; res = &standard_resources[i++];
if (memblock_is_nomap(region)) { if (memblock_is_nomap(region)) {
res->name = "reserved"; res->name = "reserved";
@@ -257,7 +257,7 @@ static int __init reserve_memblock_reserved_regions(void)
if (!memblock_is_region_reserved(mem->start, mem_size)) if (!memblock_is_region_reserved(mem->start, mem_size))
continue; continue;
for_each_reserved_mem_region(j, &r_start, &r_end) { for_each_reserved_mem_range(j, &r_start, &r_end) {
resource_size_t start, end; resource_size_t start, end;
start = max(PFN_PHYS(PFN_DOWN(r_start)), mem->start); start = max(PFN_PHYS(PFN_DOWN(r_start)), mem->start);

View File

@@ -43,13 +43,6 @@ ifneq ($(c-gettimeofday-y),)
CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y) CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y)
endif endif
# Clang versions less than 8 do not support -mcmodel=tiny
ifeq ($(CONFIG_CC_IS_CLANG), y)
ifeq ($(shell test $(CONFIG_CLANG_VERSION) -lt 80000; echo $$?),0)
CFLAGS_REMOVE_vgettimeofday.o += -mcmodel=tiny
endif
endif
# Disable gcov profiling for VDSO code # Disable gcov profiling for VDSO code
GCOV_PROFILE := n GCOV_PROFILE := n

View File

@@ -471,12 +471,10 @@ static inline void free_memmap(unsigned long start_pfn, unsigned long end_pfn)
*/ */
static void __init free_unused_memmap(void) static void __init free_unused_memmap(void)
{ {
unsigned long start, prev_end = 0; unsigned long start, end, prev_end = 0;
struct memblock_region *reg; int i;
for_each_memblock(memory, reg) {
start = __phys_to_pfn(reg->base);
for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, NULL) {
#ifdef CONFIG_SPARSEMEM #ifdef CONFIG_SPARSEMEM
/* /*
* Take care not to free memmap entries that don't exist due * Take care not to free memmap entries that don't exist due
@@ -496,8 +494,7 @@ static void __init free_unused_memmap(void)
* memmap entries are valid from the bank end aligned to * memmap entries are valid from the bank end aligned to
* MAX_ORDER_NR_PAGES. * MAX_ORDER_NR_PAGES.
*/ */
prev_end = ALIGN(__phys_to_pfn(reg->base + reg->size), prev_end = ALIGN(end, MAX_ORDER_NR_PAGES);
MAX_ORDER_NR_PAGES);
} }
#ifdef CONFIG_SPARSEMEM #ifdef CONFIG_SPARSEMEM

View File

@@ -212,8 +212,8 @@ void __init kasan_init(void)
{ {
u64 kimg_shadow_start, kimg_shadow_end; u64 kimg_shadow_start, kimg_shadow_end;
u64 mod_shadow_start, mod_shadow_end; u64 mod_shadow_start, mod_shadow_end;
struct memblock_region *reg; phys_addr_t pa_start, pa_end;
int i; u64 i;
kimg_shadow_start = (u64)kasan_mem_to_shadow(_text) & PAGE_MASK; kimg_shadow_start = (u64)kasan_mem_to_shadow(_text) & PAGE_MASK;
kimg_shadow_end = PAGE_ALIGN((u64)kasan_mem_to_shadow(_end)); kimg_shadow_end = PAGE_ALIGN((u64)kasan_mem_to_shadow(_end));
@@ -246,9 +246,9 @@ void __init kasan_init(void)
kasan_populate_early_shadow((void *)mod_shadow_end, kasan_populate_early_shadow((void *)mod_shadow_end,
(void *)kimg_shadow_start); (void *)kimg_shadow_start);
for_each_memblock(memory, reg) { for_each_mem_range(i, &pa_start, &pa_end) {
void *start = (void *)__phys_to_virt(reg->base); void *start = (void *)__phys_to_virt(pa_start);
void *end = (void *)__phys_to_virt(reg->base + reg->size); void *end = (void *)__phys_to_virt(pa_end);
if (start >= end) if (start >= end)
break; break;

View File

@@ -473,8 +473,9 @@ static void __init map_mem(pgd_t *pgdp)
{ {
phys_addr_t kernel_start = __pa_symbol(_text); phys_addr_t kernel_start = __pa_symbol(_text);
phys_addr_t kernel_end = __pa_symbol(__init_begin); phys_addr_t kernel_end = __pa_symbol(__init_begin);
struct memblock_region *reg; phys_addr_t start, end;
int flags = 0; int flags = 0;
u64 i;
if (rodata_full || debug_pagealloc_enabled()) if (rodata_full || debug_pagealloc_enabled())
flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
@@ -493,15 +494,9 @@ static void __init map_mem(pgd_t *pgdp)
#endif #endif
/* map all the memory banks */ /* map all the memory banks */
for_each_memblock(memory, reg) { for_each_mem_range(i, &start, &end) {
phys_addr_t start = reg->base;
phys_addr_t end = start + reg->size;
if (start >= end) if (start >= end)
break; break;
if (memblock_is_nomap(reg))
continue;
/* /*
* The linear map must allow allocation tags reading/writing * The linear map must allow allocation tags reading/writing
* if MTE is present. Otherwise, it has the same attributes as * if MTE is present. Otherwise, it has the same attributes as

View File

@@ -354,7 +354,7 @@ static int __init numa_register_nodes(void)
struct memblock_region *mblk; struct memblock_region *mblk;
/* Check that valid nid is set to memblks */ /* Check that valid nid is set to memblks */
for_each_memblock(memory, mblk) { for_each_mem_region(mblk) {
int mblk_nid = memblock_get_region_node(mblk); int mblk_nid = memblock_get_region_node(mblk);
if (mblk_nid == NUMA_NO_NODE || mblk_nid >= MAX_NUMNODES) { if (mblk_nid == NUMA_NO_NODE || mblk_nid >= MAX_NUMNODES) {
@@ -427,19 +427,16 @@ out_free_distance:
*/ */
static int __init dummy_numa_init(void) static int __init dummy_numa_init(void)
{ {
phys_addr_t start = memblock_start_of_DRAM();
phys_addr_t end = memblock_end_of_DRAM();
int ret; int ret;
struct memblock_region *mblk;
if (numa_off) if (numa_off)
pr_info("NUMA disabled\n"); /* Forced off on command line. */ pr_info("NUMA disabled\n"); /* Forced off on command line. */
pr_info("Faking a node at [mem %#018Lx-%#018Lx]\n", pr_info("Faking a node at [mem %#018Lx-%#018Lx]\n", start, end - 1);
memblock_start_of_DRAM(), memblock_end_of_DRAM() - 1);
for_each_memblock(memory, mblk) {
ret = numa_add_memblk(0, mblk->base, mblk->base + mblk->size);
if (!ret)
continue;
ret = numa_add_memblk(0, start, end);
if (ret) {
pr_err("NUMA init failed\n"); pr_err("NUMA init failed\n");
return ret; return ret;
} }

View File

@@ -287,7 +287,8 @@ notrace void __init machine_init(unsigned long dt_ptr)
void __init setup_arch(char **cmdline_p) void __init setup_arch(char **cmdline_p)
{ {
struct memblock_region *reg; phys_addr_t start, end;
u64 i;
printk(KERN_INFO "Initializing kernel\n"); printk(KERN_INFO "Initializing kernel\n");
@@ -351,9 +352,9 @@ void __init setup_arch(char **cmdline_p)
disable_caching(ram_start, ram_end - 1); disable_caching(ram_start, ram_end - 1);
/* Set caching of external RAM used by Linux */ /* Set caching of external RAM used by Linux */
for_each_memblock(memory, reg) for_each_mem_range(i, &start, &end)
enable_caching(CACHE_REGION_START(reg->base), enable_caching(CACHE_REGION_START(start),
CACHE_REGION_START(reg->base + reg->size - 1)); CACHE_REGION_START(end - 1));
#ifdef CONFIG_BLK_DEV_INITRD #ifdef CONFIG_BLK_DEV_INITRD
/* /*

View File

@@ -309,16 +309,3 @@ endmenu
source "arch/csky/Kconfig.platforms" source "arch/csky/Kconfig.platforms"
source "kernel/Kconfig.hz" source "kernel/Kconfig.hz"
config SECCOMP
bool "Enable seccomp to safely compute untrusted bytecode"
help
This kernel feature is useful for number crunching applications
that may need to compute untrusted bytecode during their
execution. By using pipes or other transports made available to
the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in
their own address space using seccomp. Once seccomp is
enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
and the task is only allowed to execute a few safe syscalls
defined by each seccomp mode.

View File

@@ -74,17 +74,15 @@ static void __init bootmem_init(void)
memory_end = memory_start = 0; memory_end = memory_start = 0;
/* Find main memory where is the kernel */ /* Find main memory where is the kernel */
for_each_memblock(memory, region) { memory_start = memblock_start_of_DRAM();
memory_start = region->base; memory_end = memblock_end_of_DRAM();
memory_end = region->base + region->size;
}
if (!memory_end) if (!memory_end)
panic("No memory!"); panic("No memory!");
/* setup bootmem globals (we use no_bootmem, but mm still depends on this) */ /* setup bootmem globals (we use no_bootmem, but mm still depends on this) */
min_low_pfn = PFN_UP(memory_start); min_low_pfn = PFN_UP(memory_start);
max_low_pfn = PFN_DOWN(memblock_end_of_DRAM()); max_low_pfn = PFN_DOWN(memory_end);
max_pfn = max_low_pfn; max_pfn = max_low_pfn;
memblock_reserve(__pa(_stext), _end - _stext); memblock_reserve(__pa(_stext), _end - _stext);

View File

@@ -26,6 +26,7 @@ config MICROBLAZE
select GENERIC_SCHED_CLOCK select GENERIC_SCHED_CLOCK
select HAVE_ARCH_HASH select HAVE_ARCH_HASH
select HAVE_ARCH_KGDB select HAVE_ARCH_KGDB
select HAVE_ARCH_SECCOMP
select HAVE_DEBUG_KMEMLEAK select HAVE_DEBUG_KMEMLEAK
select HAVE_DMA_CONTIGUOUS select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE select HAVE_DYNAMIC_FTRACE
@@ -120,23 +121,6 @@ config CMDLINE_FORCE
Set this to have arguments from the default kernel command string Set this to have arguments from the default kernel command string
override those passed by the boot loader. override those passed by the boot loader.
config SECCOMP
bool "Enable seccomp to safely compute untrusted bytecode"
depends on PROC_FS
default y
help
This kernel feature is useful for number crunching applications
that may need to compute untrusted bytecode during their
execution. By using pipes or other transports made available to
the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in
their own address space using seccomp. Once seccomp is
enabled via /proc/<pid>/seccomp, it cannot be disabled
and the task is only allowed to execute a few safe syscalls
defined by each seccomp mode.
If unsure, say Y. Only embedded should say N here.
endmenu endmenu
menu "Kernel features" menu "Kernel features"

View File

@@ -108,15 +108,15 @@ static void __init paging_init(void)
void __init setup_memory(void) void __init setup_memory(void)
{ {
struct memblock_region *reg;
#ifndef CONFIG_MMU #ifndef CONFIG_MMU
u32 kernel_align_start, kernel_align_size; u32 kernel_align_start, kernel_align_size;
phys_addr_t start, end;
u64 i;
/* Find main memory where is the kernel */ /* Find main memory where is the kernel */
for_each_memblock(memory, reg) { for_each_mem_range(i, &start, &end) {
memory_start = (u32)reg->base; memory_start = start;
lowmem_size = reg->size; lowmem_size = end - start;
if ((memory_start <= (u32)_text) && if ((memory_start <= (u32)_text) &&
((u32)_text <= (memory_start + lowmem_size - 1))) { ((u32)_text <= (memory_start + lowmem_size - 1))) {
memory_size = lowmem_size; memory_size = lowmem_size;
@@ -164,17 +164,6 @@ void __init setup_memory(void)
pr_info("%s: max_low_pfn: %#lx\n", __func__, max_low_pfn); pr_info("%s: max_low_pfn: %#lx\n", __func__, max_low_pfn);
pr_info("%s: max_pfn: %#lx\n", __func__, max_pfn); pr_info("%s: max_pfn: %#lx\n", __func__, max_pfn);
/* Add active regions with valid PFNs */
for_each_memblock(memory, reg) {
unsigned long start_pfn, end_pfn;
start_pfn = memblock_region_memory_base_pfn(reg);
end_pfn = memblock_region_memory_end_pfn(reg);
memblock_set_node(start_pfn << PAGE_SHIFT,
(end_pfn - start_pfn) << PAGE_SHIFT,
&memblock.memory, 0);
}
paging_init(); paging_init();
} }

View File

@@ -3006,23 +3006,6 @@ config PHYSICAL_START
specified in the "crashkernel=YM@XM" command line boot parameter specified in the "crashkernel=YM@XM" command line boot parameter
passed to the panic-ed kernel). passed to the panic-ed kernel).
config SECCOMP
bool "Enable seccomp to safely compute untrusted bytecode"
depends on PROC_FS
default y
help
This kernel feature is useful for number crunching applications
that may need to compute untrusted bytecode during their
execution. By using pipes or other transports made available to
the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in
their own address space using seccomp. Once seccomp is
enabled via /proc/<pid>/seccomp, it cannot be disabled
and the task is only allowed to execute a few safe syscalls
defined by each seccomp mode.
If unsure, say Y. Only embedded should say N here.
config MIPS_O32_FP64_SUPPORT config MIPS_O32_FP64_SUPPORT
bool "Support for O32 binaries using 64-bit FP" if !CPU_MIPSR6 bool "Support for O32 binaries using 64-bit FP" if !CPU_MIPSR6
depends on 32BIT || MIPS32_O32 depends on 32BIT || MIPS32_O32

View File

@@ -190,25 +190,25 @@ char *octeon_swiotlb;
void __init plat_swiotlb_setup(void) void __init plat_swiotlb_setup(void)
{ {
struct memblock_region *mem; phys_addr_t start, end;
phys_addr_t max_addr; phys_addr_t max_addr;
phys_addr_t addr_size; phys_addr_t addr_size;
size_t swiotlbsize; size_t swiotlbsize;
unsigned long swiotlb_nslabs; unsigned long swiotlb_nslabs;
u64 i;
max_addr = 0; max_addr = 0;
addr_size = 0; addr_size = 0;
for_each_memblock(memory, mem) { for_each_mem_range(i, &start, &end) {
/* These addresses map low for PCI. */ /* These addresses map low for PCI. */
if (mem->base > 0x410000000ull && !OCTEON_IS_OCTEON2()) if (start > 0x410000000ull && !OCTEON_IS_OCTEON2())
continue; continue;
addr_size += mem->size; addr_size += (end - start);
if (max_addr < mem->base + mem->size)
max_addr = mem->base + mem->size;
if (max_addr < end)
max_addr = end;
} }
swiotlbsize = PAGE_SIZE; swiotlbsize = PAGE_SIZE;

View File

@@ -300,8 +300,9 @@ static void __init bootmem_init(void)
static void __init bootmem_init(void) static void __init bootmem_init(void)
{ {
struct memblock_region *mem;
phys_addr_t ramstart, ramend; phys_addr_t ramstart, ramend;
phys_addr_t start, end;
u64 i;
ramstart = memblock_start_of_DRAM(); ramstart = memblock_start_of_DRAM();
ramend = memblock_end_of_DRAM(); ramend = memblock_end_of_DRAM();
@@ -338,18 +339,13 @@ static void __init bootmem_init(void)
min_low_pfn = ARCH_PFN_OFFSET; min_low_pfn = ARCH_PFN_OFFSET;
max_pfn = PFN_DOWN(ramend); max_pfn = PFN_DOWN(ramend);
for_each_memblock(memory, mem) { for_each_mem_range(i, &start, &end) {
unsigned long start = memblock_region_memory_base_pfn(mem);
unsigned long end = memblock_region_memory_end_pfn(mem);
/* /*
* Skip highmem here so we get an accurate max_low_pfn if low * Skip highmem here so we get an accurate max_low_pfn if low
* memory stops short of high memory. * memory stops short of high memory.
* If the region overlaps HIGHMEM_START, end is clipped so * If the region overlaps HIGHMEM_START, end is clipped so
* max_pfn excludes the highmem portion. * max_pfn excludes the highmem portion.
*/ */
if (memblock_is_nomap(mem))
continue;
if (start >= PFN_DOWN(HIGHMEM_START)) if (start >= PFN_DOWN(HIGHMEM_START))
continue; continue;
if (end > PFN_DOWN(HIGHMEM_START)) if (end > PFN_DOWN(HIGHMEM_START))
@@ -450,13 +446,12 @@ early_param("memmap", early_parse_memmap);
unsigned long setup_elfcorehdr, setup_elfcorehdr_size; unsigned long setup_elfcorehdr, setup_elfcorehdr_size;
static int __init early_parse_elfcorehdr(char *p) static int __init early_parse_elfcorehdr(char *p)
{ {
struct memblock_region *mem; phys_addr_t start, end;
u64 i;
setup_elfcorehdr = memparse(p, &p); setup_elfcorehdr = memparse(p, &p);
for_each_memblock(memory, mem) { for_each_mem_range(i, &start, &end) {
unsigned long start = mem->base;
unsigned long end = start + mem->size;
if (setup_elfcorehdr >= start && setup_elfcorehdr < end) { if (setup_elfcorehdr >= start && setup_elfcorehdr < end) {
/* /*
* Reserve from the elf core header to the end of * Reserve from the elf core header to the end of
@@ -720,7 +715,8 @@ static void __init arch_mem_init(char **cmdline_p)
static void __init resource_init(void) static void __init resource_init(void)
{ {
struct memblock_region *region; phys_addr_t start, end;
u64 i;
if (UNCAC_BASE != IO_BASE) if (UNCAC_BASE != IO_BASE)
return; return;
@@ -732,9 +728,7 @@ static void __init resource_init(void)
bss_resource.start = __pa_symbol(&__bss_start); bss_resource.start = __pa_symbol(&__bss_start);
bss_resource.end = __pa_symbol(&__bss_stop) - 1; bss_resource.end = __pa_symbol(&__bss_stop) - 1;
for_each_memblock(memory, region) { for_each_mem_range(i, &start, &end) {
phys_addr_t start = PFN_PHYS(memblock_region_memory_base_pfn(region));
phys_addr_t end = PFN_PHYS(memblock_region_memory_end_pfn(region)) - 1;
struct resource *res; struct resource *res;
res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES); res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES);
@@ -743,7 +737,12 @@ static void __init resource_init(void)
sizeof(struct resource)); sizeof(struct resource));
res->start = start; res->start = start;
res->end = end; /*
* In memblock, end points to the first byte after the
* range while in resourses, end points to the last byte in
* the range.
*/
res->end = end - 1;
res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
res->name = "System RAM"; res->name = "System RAM";

View File

@@ -70,7 +70,7 @@ static void nlm_fixup_mem(void)
const int pref_backup = 512; const int pref_backup = 512;
struct memblock_region *mem; struct memblock_region *mem;
for_each_memblock(memory, mem) { for_each_mem_region(mem) {
memblock_remove(mem->base + mem->size - pref_backup, memblock_remove(mem->base + mem->size - pref_backup,
pref_backup); pref_backup);
} }

View File

@@ -249,12 +249,8 @@ static void __init setup_memory(void)
memory_end = memory_start = 0; memory_end = memory_start = 0;
/* Find main memory where is the kernel */ /* Find main memory where is the kernel */
for_each_memblock(memory, region) { memory_start = memblock_start_of_DRAM();
memory_start = region->base; memory_end = memblock_end_of_DRAM();
memory_end = region->base + region->size;
pr_info("%s: Memory: 0x%x-0x%x\n", __func__,
memory_start, memory_end);
}
if (!memory_end) { if (!memory_end) {
panic("No memory!"); panic("No memory!");

View File

@@ -48,17 +48,12 @@ static void __init setup_memory(void)
unsigned long ram_start_pfn; unsigned long ram_start_pfn;
unsigned long ram_end_pfn; unsigned long ram_end_pfn;
phys_addr_t memory_start, memory_end; phys_addr_t memory_start, memory_end;
struct memblock_region *region;
memory_end = memory_start = 0; memory_end = memory_start = 0;
/* Find main memory where is the kernel, we assume its the only one */ /* Find main memory where is the kernel, we assume its the only one */
for_each_memblock(memory, region) { memory_start = memblock_start_of_DRAM();
memory_start = region->base; memory_end = memblock_end_of_DRAM();
memory_end = region->base + region->size;
printk(KERN_INFO "%s: Memory: 0x%x-0x%x\n", __func__,
memory_start, memory_end);
}
if (!memory_end) { if (!memory_end) {
panic("No memory!"); panic("No memory!");

View File

@@ -64,6 +64,7 @@ extern const char _s_kernel_ro[], _e_kernel_ro[];
*/ */
static void __init map_ram(void) static void __init map_ram(void)
{ {
phys_addr_t start, end;
unsigned long v, p, e; unsigned long v, p, e;
pgprot_t prot; pgprot_t prot;
pgd_t *pge; pgd_t *pge;
@@ -71,6 +72,7 @@ static void __init map_ram(void)
pud_t *pue; pud_t *pue;
pmd_t *pme; pmd_t *pme;
pte_t *pte; pte_t *pte;
u64 i;
/* These mark extents of read-only kernel pages... /* These mark extents of read-only kernel pages...
* ...from vmlinux.lds.S * ...from vmlinux.lds.S
*/ */
@@ -78,9 +80,9 @@ static void __init map_ram(void)
v = PAGE_OFFSET; v = PAGE_OFFSET;
for_each_memblock(memory, region) { for_each_mem_range(i, &start, &end) {
p = (u32) region->base & PAGE_MASK; p = (u32) start & PAGE_MASK;
e = p + (u32) region->size; e = (u32) end;
v = (u32) __va(p); v = (u32) __va(p);
pge = pgd_offset_k(v); pge = pgd_offset_k(v);

View File

@@ -378,19 +378,3 @@ endmenu
source "drivers/parisc/Kconfig" source "drivers/parisc/Kconfig"
config SECCOMP
def_bool y
prompt "Enable seccomp to safely compute untrusted bytecode"
help
This kernel feature is useful for number crunching applications
that may need to compute untrusted bytecode during their
execution. By using pipes or other transports made available to
the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in
their own address space using seccomp. Once seccomp is
enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
and the task is only allowed to execute a few safe syscalls
defined by each seccomp mode.
If unsure, say Y. Only embedded should say N here.

View File

@@ -946,23 +946,6 @@ config ARCH_WANTS_FREEZER_CONTROL
source "kernel/power/Kconfig" source "kernel/power/Kconfig"
config SECCOMP
bool "Enable seccomp to safely compute untrusted bytecode"
depends on PROC_FS
default y
help
This kernel feature is useful for number crunching applications
that may need to compute untrusted bytecode during their
execution. By using pipes or other transports made available to
the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in
their own address space using seccomp. Once seccomp is
enabled via /proc/<pid>/seccomp, it cannot be disabled
and the task is only allowed to execute a few safe syscalls
defined by each seccomp mode.
If unsure, say Y. Only embedded should say N here.
config PPC_MEM_KEYS config PPC_MEM_KEYS
prompt "PowerPC Memory Protection Keys" prompt "PowerPC Memory Protection Keys"
def_bool y def_bool y

View File

@@ -191,13 +191,13 @@ int is_fadump_active(void)
*/ */
static bool is_fadump_mem_area_contiguous(u64 d_start, u64 d_end) static bool is_fadump_mem_area_contiguous(u64 d_start, u64 d_end)
{ {
struct memblock_region *reg; phys_addr_t reg_start, reg_end;
bool ret = false; bool ret = false;
u64 start, end; u64 i, start, end;
for_each_memblock(memory, reg) { for_each_mem_range(i, &reg_start, &reg_end) {
start = max_t(u64, d_start, reg->base); start = max_t(u64, d_start, reg_start);
end = min_t(u64, d_end, (reg->base + reg->size)); end = min_t(u64, d_end, reg_end);
if (d_start < end) { if (d_start < end) {
/* Memory hole from d_start to start */ /* Memory hole from d_start to start */
if (start > d_start) if (start > d_start)
@@ -422,34 +422,34 @@ static int __init add_boot_mem_regions(unsigned long mstart,
static int __init fadump_get_boot_mem_regions(void) static int __init fadump_get_boot_mem_regions(void)
{ {
unsigned long base, size, cur_size, hole_size, last_end; unsigned long size, cur_size, hole_size, last_end;
unsigned long mem_size = fw_dump.boot_memory_size; unsigned long mem_size = fw_dump.boot_memory_size;
struct memblock_region *reg; phys_addr_t reg_start, reg_end;
int ret = 1; int ret = 1;
u64 i;
fw_dump.boot_mem_regs_cnt = 0; fw_dump.boot_mem_regs_cnt = 0;
last_end = 0; last_end = 0;
hole_size = 0; hole_size = 0;
cur_size = 0; cur_size = 0;
for_each_memblock(memory, reg) { for_each_mem_range(i, &reg_start, &reg_end) {
base = reg->base; size = reg_end - reg_start;
size = reg->size; hole_size += (reg_start - last_end);
hole_size += (base - last_end);
if ((cur_size + size) >= mem_size) { if ((cur_size + size) >= mem_size) {
size = (mem_size - cur_size); size = (mem_size - cur_size);
ret = add_boot_mem_regions(base, size); ret = add_boot_mem_regions(reg_start, size);
break; break;
} }
mem_size -= size; mem_size -= size;
cur_size += size; cur_size += size;
ret = add_boot_mem_regions(base, size); ret = add_boot_mem_regions(reg_start, size);
if (!ret) if (!ret)
break; break;
last_end = base + size; last_end = reg_end;
} }
fw_dump.boot_mem_top = PAGE_ALIGN(fw_dump.boot_memory_size + hole_size); fw_dump.boot_mem_top = PAGE_ALIGN(fw_dump.boot_memory_size + hole_size);
@@ -985,9 +985,8 @@ static int fadump_init_elfcore_header(char *bufp)
*/ */
static int fadump_setup_crash_memory_ranges(void) static int fadump_setup_crash_memory_ranges(void)
{ {
struct memblock_region *reg; u64 i, start, end;
u64 start, end; int ret;
int i, ret;
pr_debug("Setup crash memory ranges.\n"); pr_debug("Setup crash memory ranges.\n");
crash_mrange_info.mem_range_cnt = 0; crash_mrange_info.mem_range_cnt = 0;
@@ -1005,10 +1004,7 @@ static int fadump_setup_crash_memory_ranges(void)
return ret; return ret;
} }
for_each_memblock(memory, reg) { for_each_mem_range(i, &start, &end) {
start = (u64)reg->base;
end = start + (u64)reg->size;
/* /*
* skip the memory chunk that is already added * skip the memory chunk that is already added
* (0 through boot_memory_top). * (0 through boot_memory_top).
@@ -1242,14 +1238,17 @@ static void fadump_free_reserved_memory(unsigned long start_pfn,
*/ */
static void fadump_release_reserved_area(u64 start, u64 end) static void fadump_release_reserved_area(u64 start, u64 end)
{ {
unsigned long reg_spfn, reg_epfn;
u64 tstart, tend, spfn, epfn; u64 tstart, tend, spfn, epfn;
struct memblock_region *reg; int i;
spfn = PHYS_PFN(start); spfn = PHYS_PFN(start);
epfn = PHYS_PFN(end); epfn = PHYS_PFN(end);
for_each_memblock(memory, reg) {
tstart = max_t(u64, spfn, memblock_region_memory_base_pfn(reg)); for_each_mem_pfn_range(i, MAX_NUMNODES, &reg_spfn, &reg_epfn, NULL) {
tend = min_t(u64, epfn, memblock_region_memory_end_pfn(reg)); tstart = max_t(u64, spfn, reg_spfn);
tend = min_t(u64, epfn, reg_epfn);
if (tstart < tend) { if (tstart < tend) {
fadump_free_reserved_memory(tstart, tend); fadump_free_reserved_memory(tstart, tend);
@@ -1684,12 +1683,10 @@ int __init fadump_reserve_mem(void)
/* Preserve everything above the base address */ /* Preserve everything above the base address */
static void __init fadump_reserve_crash_area(u64 base) static void __init fadump_reserve_crash_area(u64 base)
{ {
struct memblock_region *reg; u64 i, mstart, mend, msize;
u64 mstart, msize;
for_each_memblock(memory, reg) { for_each_mem_range(i, &mstart, &mend) {
mstart = reg->base; msize = mend - mstart;
msize = reg->size;
if ((mstart + msize) < base) if ((mstart + msize) < base)
continue; continue;

View File

@@ -138,15 +138,13 @@ out:
*/ */
static int get_crash_memory_ranges(struct crash_mem **mem_ranges) static int get_crash_memory_ranges(struct crash_mem **mem_ranges)
{ {
struct memblock_region *reg; phys_addr_t base, end;
struct crash_mem *tmem; struct crash_mem *tmem;
u64 i;
int ret; int ret;
for_each_memblock(memory, reg) { for_each_mem_range(i, &base, &end) {
u64 base, size; u64 size = end - base;
base = (u64)reg->base;
size = (u64)reg->size;
/* Skip backup memory region, which needs a separate entry */ /* Skip backup memory region, which needs a separate entry */
if (base == BACKUP_SRC_START) { if (base == BACKUP_SRC_START) {
@@ -250,8 +248,7 @@ static int __locate_mem_hole_top_down(struct kexec_buf *kbuf,
phys_addr_t start, end; phys_addr_t start, end;
u64 i; u64 i;
for_each_mem_range_rev(i, &memblock.memory, NULL, NUMA_NO_NODE, for_each_mem_range_rev(i, &start, &end) {
MEMBLOCK_NONE, &start, &end, NULL) {
/* /*
* memblock uses [start, end) convention while it is * memblock uses [start, end) convention while it is
* [start, end] here. Fix the off-by-one to have the * [start, end] here. Fix the off-by-one to have the
@@ -350,8 +347,7 @@ static int __locate_mem_hole_bottom_up(struct kexec_buf *kbuf,
phys_addr_t start, end; phys_addr_t start, end;
u64 i; u64 i;
for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE, for_each_mem_range(i, &start, &end) {
MEMBLOCK_NONE, &start, &end, NULL) {
/* /*
* memblock uses [start, end) convention while it is * memblock uses [start, end) convention while it is
* [start, end] here. Fix the off-by-one to have the * [start, end] here. Fix the off-by-one to have the

View File

@@ -95,23 +95,15 @@ EXPORT_SYMBOL_GPL(kvm_free_hpt_cma);
void __init kvm_cma_reserve(void) void __init kvm_cma_reserve(void)
{ {
unsigned long align_size; unsigned long align_size;
struct memblock_region *reg; phys_addr_t selected_size;
phys_addr_t selected_size = 0;
/* /*
* We need CMA reservation only when we are in HV mode * We need CMA reservation only when we are in HV mode
*/ */
if (!cpu_has_feature(CPU_FTR_HVMODE)) if (!cpu_has_feature(CPU_FTR_HVMODE))
return; return;
/*
* We cannot use memblock_phys_mem_size() here, because
* memblock_analyze() has not been called yet.
*/
for_each_memblock(memory, reg)
selected_size += memblock_region_memory_end_pfn(reg) -
memblock_region_memory_base_pfn(reg);
selected_size = (selected_size * kvm_cma_resv_ratio / 100) << PAGE_SHIFT; selected_size = PAGE_ALIGN(memblock_phys_mem_size() * kvm_cma_resv_ratio / 100);
if (selected_size) { if (selected_size) {
pr_info("%s: reserving %ld MiB for global area\n", __func__, pr_info("%s: reserving %ld MiB for global area\n", __func__,
(unsigned long)selected_size / SZ_1M); (unsigned long)selected_size / SZ_1M);

View File

@@ -687,9 +687,9 @@ static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm)
struct kvmppc_uvmem_page_pvt *pvt; struct kvmppc_uvmem_page_pvt *pvt;
unsigned long pfn_last, pfn_first; unsigned long pfn_last, pfn_first;
pfn_first = kvmppc_uvmem_pgmap.res.start >> PAGE_SHIFT; pfn_first = kvmppc_uvmem_pgmap.range.start >> PAGE_SHIFT;
pfn_last = pfn_first + pfn_last = pfn_first +
(resource_size(&kvmppc_uvmem_pgmap.res) >> PAGE_SHIFT); (range_len(&kvmppc_uvmem_pgmap.range) >> PAGE_SHIFT);
spin_lock(&kvmppc_uvmem_bitmap_lock); spin_lock(&kvmppc_uvmem_bitmap_lock);
bit = find_first_zero_bit(kvmppc_uvmem_bitmap, bit = find_first_zero_bit(kvmppc_uvmem_bitmap,
@@ -1007,7 +1007,7 @@ static vm_fault_t kvmppc_uvmem_migrate_to_ram(struct vm_fault *vmf)
static void kvmppc_uvmem_page_free(struct page *page) static void kvmppc_uvmem_page_free(struct page *page)
{ {
unsigned long pfn = page_to_pfn(page) - unsigned long pfn = page_to_pfn(page) -
(kvmppc_uvmem_pgmap.res.start >> PAGE_SHIFT); (kvmppc_uvmem_pgmap.range.start >> PAGE_SHIFT);
struct kvmppc_uvmem_page_pvt *pvt; struct kvmppc_uvmem_page_pvt *pvt;
spin_lock(&kvmppc_uvmem_bitmap_lock); spin_lock(&kvmppc_uvmem_bitmap_lock);
@@ -1170,7 +1170,9 @@ int kvmppc_uvmem_init(void)
} }
kvmppc_uvmem_pgmap.type = MEMORY_DEVICE_PRIVATE; kvmppc_uvmem_pgmap.type = MEMORY_DEVICE_PRIVATE;
kvmppc_uvmem_pgmap.res = *res; kvmppc_uvmem_pgmap.range.start = res->start;
kvmppc_uvmem_pgmap.range.end = res->end;
kvmppc_uvmem_pgmap.nr_range = 1;
kvmppc_uvmem_pgmap.ops = &kvmppc_uvmem_ops; kvmppc_uvmem_pgmap.ops = &kvmppc_uvmem_ops;
/* just one global instance: */ /* just one global instance: */
kvmppc_uvmem_pgmap.owner = &kvmppc_uvmem_pgmap; kvmppc_uvmem_pgmap.owner = &kvmppc_uvmem_pgmap;
@@ -1205,7 +1207,7 @@ void kvmppc_uvmem_free(void)
return; return;
memunmap_pages(&kvmppc_uvmem_pgmap); memunmap_pages(&kvmppc_uvmem_pgmap);
release_mem_region(kvmppc_uvmem_pgmap.res.start, release_mem_region(kvmppc_uvmem_pgmap.range.start,
resource_size(&kvmppc_uvmem_pgmap.res)); range_len(&kvmppc_uvmem_pgmap.range));
kfree(kvmppc_uvmem_bitmap); kfree(kvmppc_uvmem_bitmap);
} }

View File

@@ -7,7 +7,7 @@
* *
* SMP scalability work: * SMP scalability work:
* Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
* *
* Module name: htab.c * Module name: htab.c
* *
* Description: * Description:
@@ -867,8 +867,8 @@ static void __init htab_initialize(void)
unsigned long table; unsigned long table;
unsigned long pteg_count; unsigned long pteg_count;
unsigned long prot; unsigned long prot;
unsigned long base = 0, size = 0; phys_addr_t base = 0, size = 0, end;
struct memblock_region *reg; u64 i;
DBG(" -> htab_initialize()\n"); DBG(" -> htab_initialize()\n");
@@ -884,7 +884,7 @@ static void __init htab_initialize(void)
/* /*
* Calculate the required size of the htab. We want the number of * Calculate the required size of the htab. We want the number of
* PTEGs to equal one half the number of real pages. * PTEGs to equal one half the number of real pages.
*/ */
htab_size_bytes = htab_get_table_size(); htab_size_bytes = htab_get_table_size();
pteg_count = htab_size_bytes >> 7; pteg_count = htab_size_bytes >> 7;
@@ -894,7 +894,7 @@ static void __init htab_initialize(void)
firmware_has_feature(FW_FEATURE_PS3_LV1)) { firmware_has_feature(FW_FEATURE_PS3_LV1)) {
/* Using a hypervisor which owns the htab */ /* Using a hypervisor which owns the htab */
htab_address = NULL; htab_address = NULL;
_SDR1 = 0; _SDR1 = 0;
#ifdef CONFIG_FA_DUMP #ifdef CONFIG_FA_DUMP
/* /*
* If firmware assisted dump is active firmware preserves * If firmware assisted dump is active firmware preserves
@@ -960,9 +960,9 @@ static void __init htab_initialize(void)
#endif /* CONFIG_DEBUG_PAGEALLOC */ #endif /* CONFIG_DEBUG_PAGEALLOC */
/* create bolted the linear mapping in the hash table */ /* create bolted the linear mapping in the hash table */
for_each_memblock(memory, reg) { for_each_mem_range(i, &base, &end) {
base = (unsigned long)__va(reg->base); size = end - base;
size = reg->size; base = (unsigned long)__va(base);
DBG("creating mapping for region: %lx..%lx (prot: %lx)\n", DBG("creating mapping for region: %lx..%lx (prot: %lx)\n",
base, size, prot); base, size, prot);

View File

@@ -329,7 +329,8 @@ static int __meminit create_physical_mapping(unsigned long start,
static void __init radix_init_pgtable(void) static void __init radix_init_pgtable(void)
{ {
unsigned long rts_field; unsigned long rts_field;
struct memblock_region *reg; phys_addr_t start, end;
u64 i;
/* We don't support slb for radix */ /* We don't support slb for radix */
mmu_slb_size = 0; mmu_slb_size = 0;
@@ -337,20 +338,19 @@ static void __init radix_init_pgtable(void)
/* /*
* Create the linear mapping * Create the linear mapping
*/ */
for_each_memblock(memory, reg) { for_each_mem_range(i, &start, &end) {
/* /*
* The memblock allocator is up at this point, so the * The memblock allocator is up at this point, so the
* page tables will be allocated within the range. No * page tables will be allocated within the range. No
* need or a node (which we don't have yet). * need or a node (which we don't have yet).
*/ */
if ((reg->base + reg->size) >= RADIX_VMALLOC_START) { if (end >= RADIX_VMALLOC_START) {
pr_warn("Outside the supported range\n"); pr_warn("Outside the supported range\n");
continue; continue;
} }
WARN_ON(create_physical_mapping(reg->base, WARN_ON(create_physical_mapping(start, end,
reg->base + reg->size,
radix_mem_block_size, radix_mem_block_size,
-1, PAGE_KERNEL)); -1, PAGE_KERNEL));
} }

View File

@@ -138,11 +138,11 @@ void __init kasan_mmu_init(void)
void __init kasan_init(void) void __init kasan_init(void)
{ {
struct memblock_region *reg; phys_addr_t base, end;
u64 i;
for_each_memblock(memory, reg) { for_each_mem_range(i, &base, &end) {
phys_addr_t base = reg->base; phys_addr_t top = min(end, total_lowmem);
phys_addr_t top = min(base + reg->size, total_lowmem);
int ret; int ret;
if (base >= top) if (base >= top)

View File

@@ -184,15 +184,16 @@ void __init initmem_init(void)
/* mark pages that don't exist as nosave */ /* mark pages that don't exist as nosave */
static int __init mark_nonram_nosave(void) static int __init mark_nonram_nosave(void)
{ {
struct memblock_region *reg, *prev = NULL; unsigned long spfn, epfn, prev = 0;
int i;
for_each_memblock(memory, reg) { for_each_mem_pfn_range(i, MAX_NUMNODES, &spfn, &epfn, NULL) {
if (prev && if (prev && prev < spfn)
memblock_region_memory_end_pfn(prev) < memblock_region_memory_base_pfn(reg)) register_nosave_region(prev, spfn);
register_nosave_region(memblock_region_memory_end_pfn(prev),
memblock_region_memory_base_pfn(reg)); prev = epfn;
prev = reg;
} }
return 0; return 0;
} }
#else /* CONFIG_NEED_MULTIPLE_NODES */ #else /* CONFIG_NEED_MULTIPLE_NODES */
@@ -584,20 +585,24 @@ void flush_icache_user_page(struct vm_area_struct *vma, struct page *page,
*/ */
static int __init add_system_ram_resources(void) static int __init add_system_ram_resources(void)
{ {
struct memblock_region *reg; phys_addr_t start, end;
u64 i;
for_each_memblock(memory, reg) { for_each_mem_range(i, &start, &end) {
struct resource *res; struct resource *res;
unsigned long base = reg->base;
unsigned long size = reg->size;
res = kzalloc(sizeof(struct resource), GFP_KERNEL); res = kzalloc(sizeof(struct resource), GFP_KERNEL);
WARN_ON(!res); WARN_ON(!res);
if (res) { if (res) {
res->name = "System RAM"; res->name = "System RAM";
res->start = base; res->start = start;
res->end = base + size - 1; /*
* In memblock, end points to the first byte after
* the range while in resourses, end points to the
* last byte in the range.
*/
res->end = end - 1;
res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
WARN_ON(request_resource(&iomem_resource, res) < 0); WARN_ON(request_resource(&iomem_resource, res) < 0);
} }

View File

@@ -804,17 +804,14 @@ static void __init setup_nonnuma(void)
unsigned long total_ram = memblock_phys_mem_size(); unsigned long total_ram = memblock_phys_mem_size();
unsigned long start_pfn, end_pfn; unsigned long start_pfn, end_pfn;
unsigned int nid = 0; unsigned int nid = 0;
struct memblock_region *reg; int i;
printk(KERN_DEBUG "Top of RAM: 0x%lx, Total RAM: 0x%lx\n", printk(KERN_DEBUG "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
top_of_ram, total_ram); top_of_ram, total_ram);
printk(KERN_DEBUG "Memory hole size: %ldMB\n", printk(KERN_DEBUG "Memory hole size: %ldMB\n",
(top_of_ram - total_ram) >> 20); (top_of_ram - total_ram) >> 20);
for_each_memblock(memory, reg) { for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) {
start_pfn = memblock_region_memory_base_pfn(reg);
end_pfn = memblock_region_memory_end_pfn(reg);
fake_numa_create_new_node(end_pfn, &nid); fake_numa_create_new_node(end_pfn, &nid);
memblock_set_node(PFN_PHYS(start_pfn), memblock_set_node(PFN_PHYS(start_pfn),
PFN_PHYS(end_pfn - start_pfn), PFN_PHYS(end_pfn - start_pfn),

View File

@@ -123,11 +123,11 @@ static void __init __mapin_ram_chunk(unsigned long offset, unsigned long top)
void __init mapin_ram(void) void __init mapin_ram(void)
{ {
struct memblock_region *reg; phys_addr_t base, end;
u64 i;
for_each_memblock(memory, reg) { for_each_mem_range(i, &base, &end) {
phys_addr_t base = reg->base; phys_addr_t top = min(end, total_lowmem);
phys_addr_t top = min(base + reg->size, total_lowmem);
if (base >= top) if (base >= top)
continue; continue;

View File

@@ -334,19 +334,6 @@ menu "Kernel features"
source "kernel/Kconfig.hz" source "kernel/Kconfig.hz"
config SECCOMP
bool "Enable seccomp to safely compute untrusted bytecode"
help
This kernel feature is useful for number crunching applications
that may need to compute untrusted bytecode during their
execution. By using pipes or other transports made available to
the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in
their own address space using seccomp. Once seccomp is
enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
and the task is only allowed to execute a few safe syscalls
defined by each seccomp mode.
config RISCV_SBI_V01 config RISCV_SBI_V01
bool "SBI v0.1 support" bool "SBI v0.1 support"
default y default y

View File

@@ -145,21 +145,21 @@ static phys_addr_t dtb_early_pa __initdata;
void __init setup_bootmem(void) void __init setup_bootmem(void)
{ {
struct memblock_region *reg;
phys_addr_t mem_size = 0; phys_addr_t mem_size = 0;
phys_addr_t total_mem = 0; phys_addr_t total_mem = 0;
phys_addr_t mem_start, end = 0; phys_addr_t mem_start, start, end = 0;
phys_addr_t vmlinux_end = __pa_symbol(&_end); phys_addr_t vmlinux_end = __pa_symbol(&_end);
phys_addr_t vmlinux_start = __pa_symbol(&_start); phys_addr_t vmlinux_start = __pa_symbol(&_start);
u64 i;
/* Find the memory region containing the kernel */ /* Find the memory region containing the kernel */
for_each_memblock(memory, reg) { for_each_mem_range(i, &start, &end) {
end = reg->base + reg->size; phys_addr_t size = end - start;
if (!total_mem) if (!total_mem)
mem_start = reg->base; mem_start = start;
if (reg->base <= vmlinux_start && vmlinux_end <= end) if (start <= vmlinux_start && vmlinux_end <= end)
BUG_ON(reg->size == 0); BUG_ON(size == 0);
total_mem = total_mem + reg->size; total_mem = total_mem + size;
} }
/* /*
@@ -191,15 +191,6 @@ void __init setup_bootmem(void)
early_init_fdt_scan_reserved_mem(); early_init_fdt_scan_reserved_mem();
memblock_allow_resize(); memblock_allow_resize();
memblock_dump_all(); memblock_dump_all();
for_each_memblock(memory, reg) {
unsigned long start_pfn = memblock_region_memory_base_pfn(reg);
unsigned long end_pfn = memblock_region_memory_end_pfn(reg);
memblock_set_node(PFN_PHYS(start_pfn),
PFN_PHYS(end_pfn - start_pfn),
&memblock.memory, 0);
}
} }
#ifdef CONFIG_MMU #ifdef CONFIG_MMU
@@ -464,7 +455,7 @@ static void __init setup_vm_final(void)
{ {
uintptr_t va, map_size; uintptr_t va, map_size;
phys_addr_t pa, start, end; phys_addr_t pa, start, end;
struct memblock_region *reg; u64 i;
/* Set mmu_enabled flag */ /* Set mmu_enabled flag */
mmu_enabled = true; mmu_enabled = true;
@@ -475,14 +466,9 @@ static void __init setup_vm_final(void)
PGDIR_SIZE, PAGE_TABLE); PGDIR_SIZE, PAGE_TABLE);
/* Map all memory banks */ /* Map all memory banks */
for_each_memblock(memory, reg) { for_each_mem_range(i, &start, &end) {
start = reg->base;
end = start + reg->size;
if (start >= end) if (start >= end)
break; break;
if (memblock_is_nomap(reg))
continue;
if (start <= __pa(PAGE_OFFSET) && if (start <= __pa(PAGE_OFFSET) &&
__pa(PAGE_OFFSET) < end) __pa(PAGE_OFFSET) < end)
start = __pa(PAGE_OFFSET); start = __pa(PAGE_OFFSET);
@@ -545,7 +531,7 @@ static void __init resource_init(void)
{ {
struct memblock_region *region; struct memblock_region *region;
for_each_memblock(memory, region) { for_each_mem_region(region) {
struct resource *res; struct resource *res;
res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES); res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES);

View File

@@ -85,16 +85,16 @@ static void __init populate(void *start, void *end)
void __init kasan_init(void) void __init kasan_init(void)
{ {
struct memblock_region *reg; phys_addr_t _start, _end;
unsigned long i; u64 i;
kasan_populate_early_shadow((void *)KASAN_SHADOW_START, kasan_populate_early_shadow((void *)KASAN_SHADOW_START,
(void *)kasan_mem_to_shadow((void *) (void *)kasan_mem_to_shadow((void *)
VMALLOC_END)); VMALLOC_END));
for_each_memblock(memory, reg) { for_each_mem_range(i, &_start, &_end) {
void *start = (void *)__va(reg->base); void *start = (void *)_start;
void *end = (void *)__va(reg->base + reg->size); void *end = (void *)_end;
if (start >= end) if (start >= end)
break; break;

View File

@@ -792,23 +792,6 @@ config CRASH_DUMP
endmenu endmenu
config SECCOMP
def_bool y
prompt "Enable seccomp to safely compute untrusted bytecode"
depends on PROC_FS
help
This kernel feature is useful for number crunching applications
that may need to compute untrusted bytecode during their
execution. By using pipes or other transports made available to
the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in
their own address space using seccomp. Once seccomp is
enabled via /proc/<pid>/seccomp, it cannot be disabled
and the task is only allowed to execute a few safe syscalls
defined by each seccomp mode.
If unsure, say Y.
config CCW config CCW
def_bool y def_bool y

View File

@@ -484,8 +484,9 @@ static struct resource __initdata *standard_resources[] = {
static void __init setup_resources(void) static void __init setup_resources(void)
{ {
struct resource *res, *std_res, *sub_res; struct resource *res, *std_res, *sub_res;
struct memblock_region *reg; phys_addr_t start, end;
int j; int j;
u64 i;
code_resource.start = (unsigned long) _text; code_resource.start = (unsigned long) _text;
code_resource.end = (unsigned long) _etext - 1; code_resource.end = (unsigned long) _etext - 1;
@@ -494,7 +495,7 @@ static void __init setup_resources(void)
bss_resource.start = (unsigned long) __bss_start; bss_resource.start = (unsigned long) __bss_start;
bss_resource.end = (unsigned long) __bss_stop - 1; bss_resource.end = (unsigned long) __bss_stop - 1;
for_each_memblock(memory, reg) { for_each_mem_range(i, &start, &end) {
res = memblock_alloc(sizeof(*res), 8); res = memblock_alloc(sizeof(*res), 8);
if (!res) if (!res)
panic("%s: Failed to allocate %zu bytes align=0x%x\n", panic("%s: Failed to allocate %zu bytes align=0x%x\n",
@@ -502,8 +503,13 @@ static void __init setup_resources(void)
res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM; res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
res->name = "System RAM"; res->name = "System RAM";
res->start = reg->base; res->start = start;
res->end = reg->base + reg->size - 1; /*
* In memblock, end points to the first byte after the
* range while in resourses, end points to the last byte in
* the range.
*/
res->end = end - 1;
request_resource(&iomem_resource, res); request_resource(&iomem_resource, res);
for (j = 0; j < ARRAY_SIZE(standard_resources); j++) { for (j = 0; j < ARRAY_SIZE(standard_resources); j++) {
@@ -776,8 +782,8 @@ static void __init memblock_add_mem_detect_info(void)
unsigned long start, end; unsigned long start, end;
int i; int i;
memblock_dbg("physmem info source: %s (%hhd)\n", pr_debug("physmem info source: %s (%hhd)\n",
get_mem_info_source(), mem_detect.info_source); get_mem_info_source(), mem_detect.info_source);
/* keep memblock lists close to the kernel */ /* keep memblock lists close to the kernel */
memblock_set_bottom_up(true); memblock_set_bottom_up(true);
for_each_mem_detect_block(i, &start, &end) { for_each_mem_detect_block(i, &start, &end) {
@@ -819,14 +825,15 @@ static void __init reserve_kernel(void)
static void __init setup_memory(void) static void __init setup_memory(void)
{ {
struct memblock_region *reg; phys_addr_t start, end;
u64 i;
/* /*
* Init storage key for present memory * Init storage key for present memory
*/ */
for_each_memblock(memory, reg) { for_each_mem_range(i, &start, &end)
storage_key_init_range(reg->base, reg->base + reg->size); storage_key_init_range(start, end);
}
psw_set_key(PAGE_DEFAULT_KEY); psw_set_key(PAGE_DEFAULT_KEY);
/* Only cosmetics */ /* Only cosmetics */

View File

@@ -183,9 +183,9 @@ static void mark_kernel_pgd(void)
void __init cmma_init_nodat(void) void __init cmma_init_nodat(void)
{ {
struct memblock_region *reg;
struct page *page; struct page *page;
unsigned long start, end, ix; unsigned long start, end, ix;
int i;
if (cmma_flag < 2) if (cmma_flag < 2)
return; return;
@@ -193,9 +193,7 @@ void __init cmma_init_nodat(void)
mark_kernel_pgd(); mark_kernel_pgd();
/* Set all kernel pages not used for page tables to stable/no-dat */ /* Set all kernel pages not used for page tables to stable/no-dat */
for_each_memblock(memory, reg) { for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, NULL) {
start = memblock_region_memory_base_pfn(reg);
end = memblock_region_memory_end_pfn(reg);
page = pfn_to_page(start); page = pfn_to_page(start);
for (ix = start; ix < end; ix++, page++) { for (ix = start; ix < end; ix++, page++) {
if (__test_and_clear_bit(PG_arch_1, &page->flags)) if (__test_and_clear_bit(PG_arch_1, &page->flags))

View File

@@ -555,10 +555,11 @@ int vmem_add_mapping(unsigned long start, unsigned long size)
*/ */
void __init vmem_map_init(void) void __init vmem_map_init(void)
{ {
struct memblock_region *reg; phys_addr_t base, end;
u64 i;
for_each_memblock(memory, reg) for_each_mem_range(i, &base, &end)
vmem_add_range(reg->base, reg->size); vmem_add_range(base, end - base);
__set_memory((unsigned long)_stext, __set_memory((unsigned long)_stext,
(unsigned long)(_etext - _stext) >> PAGE_SHIFT, (unsigned long)(_etext - _stext) >> PAGE_SHIFT,
SET_MEMORY_RO | SET_MEMORY_X); SET_MEMORY_RO | SET_MEMORY_X);

View File

@@ -600,22 +600,6 @@ config PHYSICAL_START
where the fail safe kernel needs to run at a different address where the fail safe kernel needs to run at a different address
than the panic-ed kernel. than the panic-ed kernel.
config SECCOMP
bool "Enable seccomp to safely compute untrusted bytecode"
depends on PROC_FS
help
This kernel feature is useful for number crunching applications
that may need to compute untrusted bytecode during their
execution. By using pipes or other transports made available to
the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in
their own address space using seccomp. Once seccomp is
enabled via prctl, it cannot be disabled and the task is only
allowed to execute a few safe syscalls defined by each seccomp
mode.
If unsure, say N.
config SMP config SMP
bool "Symmetric multi-processing support" bool "Symmetric multi-processing support"
depends on SYS_SUPPORTS_SMP depends on SYS_SUPPORTS_SMP

View File

@@ -226,15 +226,12 @@ void __init allocate_pgdat(unsigned int nid)
static void __init do_init_bootmem(void) static void __init do_init_bootmem(void)
{ {
struct memblock_region *reg; unsigned long start_pfn, end_pfn;
int i;
/* Add active regions with valid PFNs. */ /* Add active regions with valid PFNs. */
for_each_memblock(memory, reg) { for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL)
unsigned long start_pfn, end_pfn;
start_pfn = memblock_region_memory_base_pfn(reg);
end_pfn = memblock_region_memory_end_pfn(reg);
__add_active_range(0, start_pfn, end_pfn); __add_active_range(0, start_pfn, end_pfn);
}
/* All of system RAM sits in node 0 for the non-NUMA case */ /* All of system RAM sits in node 0 for the non-NUMA case */
allocate_pgdat(0); allocate_pgdat(0);

View File

@@ -23,6 +23,7 @@ config SPARC
select HAVE_OPROFILE select HAVE_OPROFILE
select HAVE_ARCH_KGDB if !SMP || SPARC64 select HAVE_ARCH_KGDB if !SMP || SPARC64
select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_SECCOMP if SPARC64
select HAVE_EXIT_THREAD select HAVE_EXIT_THREAD
select HAVE_PCI select HAVE_PCI
select SYSCTL_EXCEPTION_TRACE select SYSCTL_EXCEPTION_TRACE
@@ -227,23 +228,6 @@ config EARLYFB
help help
Say Y here to enable a faster early framebuffer boot console. Say Y here to enable a faster early framebuffer boot console.
config SECCOMP
bool "Enable seccomp to safely compute untrusted bytecode"
depends on SPARC64 && PROC_FS
default y
help
This kernel feature is useful for number crunching applications
that may need to compute untrusted bytecode during their
execution. By using pipes or other transports made available to
the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in
their own address space using seccomp. Once seccomp is
enabled via /proc/<pid>/seccomp, it cannot be disabled
and the task is only allowed to execute a few safe syscalls
defined by each seccomp mode.
If unsure, say Y. Only embedded should say N here.
config HOTPLUG_CPU config HOTPLUG_CPU
bool "Support for hot-pluggable CPUs" bool "Support for hot-pluggable CPUs"
depends on SPARC64 && SMP depends on SPARC64 && SMP

View File

@@ -1192,18 +1192,14 @@ int of_node_to_nid(struct device_node *dp)
static void __init add_node_ranges(void) static void __init add_node_ranges(void)
{ {
struct memblock_region *reg; phys_addr_t start, end;
unsigned long prev_max; unsigned long prev_max;
u64 i;
memblock_resized: memblock_resized:
prev_max = memblock.memory.max; prev_max = memblock.memory.max;
for_each_memblock(memory, reg) { for_each_mem_range(i, &start, &end) {
unsigned long size = reg->size;
unsigned long start, end;
start = reg->base;
end = start + size;
while (start < end) { while (start < end) {
unsigned long this_end; unsigned long this_end;
int nid; int nid;
@@ -1211,7 +1207,7 @@ memblock_resized:
this_end = memblock_nid_range(start, end, &nid); this_end = memblock_nid_range(start, end, &nid);
numadbg("Setting memblock NUMA node nid[%d] " numadbg("Setting memblock NUMA node nid[%d] "
"start[%lx] end[%lx]\n", "start[%llx] end[%lx]\n",
nid, start, this_end); nid, start, this_end);
memblock_set_node(start, this_end - start, memblock_set_node(start, this_end - start,

View File

@@ -173,22 +173,6 @@ config PGTABLE_LEVELS
default 3 if 3_LEVEL_PGTABLES default 3 if 3_LEVEL_PGTABLES
default 2 default 2
config SECCOMP
def_bool y
prompt "Enable seccomp to safely compute untrusted bytecode"
help
This kernel feature is useful for number crunching applications
that may need to compute untrusted bytecode during their
execution. By using pipes or other transports made available to
the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in
their own address space using seccomp. Once seccomp is
enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
and the task is only allowed to execute a few safe syscalls
defined by each seccomp mode.
If unsure, say Y.
config UML_TIME_TRAVEL_SUPPORT config UML_TIME_TRAVEL_SUPPORT
bool bool
prompt "Support time-travel mode (e.g. for test execution)" prompt "Support time-travel mode (e.g. for test execution)"

View File

@@ -1970,22 +1970,6 @@ config EFI_MIXED
If unsure, say N. If unsure, say N.
config SECCOMP
def_bool y
prompt "Enable seccomp to safely compute untrusted bytecode"
help
This kernel feature is useful for number crunching applications
that may need to compute untrusted bytecode during their
execution. By using pipes or other transports made available to
the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in
their own address space using seccomp. Once seccomp is
enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
and the task is only allowed to execute a few safe syscalls
defined by each seccomp mode.
If unsure, say Y. Only embedded should say N here.
source "kernel/Kconfig.hz" source "kernel/Kconfig.hz"
config KEXEC config KEXEC

View File

@@ -5,15 +5,6 @@
#include "pgtable.h" #include "pgtable.h"
#include "../string.h" #include "../string.h"
/*
* __force_order is used by special_insns.h asm code to force instruction
* serialization.
*
* It is not referenced from the code, but GCC < 5 with -fPIE would fail
* due to an undefined symbol. Define it to make these ancient GCCs work.
*/
unsigned long __force_order;
#define BIOS_START_MIN 0x20000U /* 128K, less than this is insane */ #define BIOS_START_MIN 0x20000U /* 128K, less than this is insane */
#define BIOS_START_MAX 0x9f000U /* 640K, absolute maximum */ #define BIOS_START_MAX 0x9f000U /* 640K, absolute maximum */

View File

@@ -3,6 +3,7 @@
#define _ASM_X86_NUMA_H #define _ASM_X86_NUMA_H
#include <linux/nodemask.h> #include <linux/nodemask.h>
#include <linux/errno.h>
#include <asm/topology.h> #include <asm/topology.h>
#include <asm/apicdef.h> #include <asm/apicdef.h>
@@ -77,7 +78,12 @@ void debug_cpumask_set_cpu(int cpu, int node, bool enable);
#ifdef CONFIG_NUMA_EMU #ifdef CONFIG_NUMA_EMU
#define FAKE_NODE_MIN_SIZE ((u64)32 << 20) #define FAKE_NODE_MIN_SIZE ((u64)32 << 20)
#define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL)) #define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL))
void numa_emu_cmdline(char *); int numa_emu_cmdline(char *str);
#else /* CONFIG_NUMA_EMU */
static inline int numa_emu_cmdline(char *str)
{
return -EINVAL;
}
#endif /* CONFIG_NUMA_EMU */ #endif /* CONFIG_NUMA_EMU */
#endif /* _ASM_X86_NUMA_H */ #endif /* _ASM_X86_NUMA_H */

View File

@@ -11,45 +11,47 @@
#include <linux/jump_label.h> #include <linux/jump_label.h>
/* /*
* Volatile isn't enough to prevent the compiler from reordering the * The compiler should not reorder volatile asm statements with respect to each
* read/write functions for the control registers and messing everything up. * other: they should execute in program order. However GCC 4.9.x and 5.x have
* A memory clobber would solve the problem, but would prevent reordering of * a bug (which was fixed in 8.1, 7.3 and 6.5) where they might reorder
* all loads stores around it, which can hurt performance. Solution is to * volatile asm. The write functions are not affected since they have memory
* use a variable and mimic reads and writes to it to enforce serialization * clobbers preventing reordering. To prevent reads from being reordered with
* respect to writes, use a dummy memory operand.
*/ */
extern unsigned long __force_order;
#define __FORCE_ORDER "m"(*(unsigned int *)0x1000UL)
void native_write_cr0(unsigned long val); void native_write_cr0(unsigned long val);
static inline unsigned long native_read_cr0(void) static inline unsigned long native_read_cr0(void)
{ {
unsigned long val; unsigned long val;
asm volatile("mov %%cr0,%0\n\t" : "=r" (val), "=m" (__force_order)); asm volatile("mov %%cr0,%0\n\t" : "=r" (val) : __FORCE_ORDER);
return val; return val;
} }
static __always_inline unsigned long native_read_cr2(void) static __always_inline unsigned long native_read_cr2(void)
{ {
unsigned long val; unsigned long val;
asm volatile("mov %%cr2,%0\n\t" : "=r" (val), "=m" (__force_order)); asm volatile("mov %%cr2,%0\n\t" : "=r" (val) : __FORCE_ORDER);
return val; return val;
} }
static __always_inline void native_write_cr2(unsigned long val) static __always_inline void native_write_cr2(unsigned long val)
{ {
asm volatile("mov %0,%%cr2": : "r" (val), "m" (__force_order)); asm volatile("mov %0,%%cr2": : "r" (val) : "memory");
} }
static inline unsigned long __native_read_cr3(void) static inline unsigned long __native_read_cr3(void)
{ {
unsigned long val; unsigned long val;
asm volatile("mov %%cr3,%0\n\t" : "=r" (val), "=m" (__force_order)); asm volatile("mov %%cr3,%0\n\t" : "=r" (val) : __FORCE_ORDER);
return val; return val;
} }
static inline void native_write_cr3(unsigned long val) static inline void native_write_cr3(unsigned long val)
{ {
asm volatile("mov %0,%%cr3": : "r" (val), "m" (__force_order)); asm volatile("mov %0,%%cr3": : "r" (val) : "memory");
} }
static inline unsigned long native_read_cr4(void) static inline unsigned long native_read_cr4(void)
@@ -64,10 +66,10 @@ static inline unsigned long native_read_cr4(void)
asm volatile("1: mov %%cr4, %0\n" asm volatile("1: mov %%cr4, %0\n"
"2:\n" "2:\n"
_ASM_EXTABLE(1b, 2b) _ASM_EXTABLE(1b, 2b)
: "=r" (val), "=m" (__force_order) : "0" (0)); : "=r" (val) : "0" (0), __FORCE_ORDER);
#else #else
/* CR4 always exists on x86_64. */ /* CR4 always exists on x86_64. */
asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order)); asm volatile("mov %%cr4,%0\n\t" : "=r" (val) : __FORCE_ORDER);
#endif #endif
return val; return val;
} }

View File

@@ -418,7 +418,7 @@ do { \
"2:\n" \ "2:\n" \
".section .fixup,\"ax\"\n" \ ".section .fixup,\"ax\"\n" \
"3: mov %[efault],%[errout]\n" \ "3: mov %[efault],%[errout]\n" \
" xor"itype" %[output],%[output]\n" \ " xorl %k[output],%k[output]\n" \
" jmp 2b\n" \ " jmp 2b\n" \
".previous\n" \ ".previous\n" \
_ASM_EXTABLE_UA(1b, 3b) \ _ASM_EXTABLE_UA(1b, 3b) \

View File

@@ -360,7 +360,7 @@ void native_write_cr0(unsigned long val)
unsigned long bits_missing = 0; unsigned long bits_missing = 0;
set_register: set_register:
asm volatile("mov %0,%%cr0": "+r" (val), "+m" (__force_order)); asm volatile("mov %0,%%cr0": "+r" (val) : : "memory");
if (static_branch_likely(&cr_pinning)) { if (static_branch_likely(&cr_pinning)) {
if (unlikely((val & X86_CR0_WP) != X86_CR0_WP)) { if (unlikely((val & X86_CR0_WP) != X86_CR0_WP)) {
@@ -379,7 +379,7 @@ void native_write_cr4(unsigned long val)
unsigned long bits_changed = 0; unsigned long bits_changed = 0;
set_register: set_register:
asm volatile("mov %0,%%cr4": "+r" (val), "+m" (cr4_pinned_bits)); asm volatile("mov %0,%%cr4": "+r" (val) : : "memory");
if (static_branch_likely(&cr_pinning)) { if (static_branch_likely(&cr_pinning)) {
if (unlikely((val & cr4_pinned_mask) != cr4_pinned_bits)) { if (unlikely((val & cr4_pinned_mask) != cr4_pinned_bits)) {

View File

@@ -305,6 +305,20 @@ static int __init cpcompare(const void *a, const void *b)
return (ap->addr != ap->entry->addr) - (bp->addr != bp->entry->addr); return (ap->addr != ap->entry->addr) - (bp->addr != bp->entry->addr);
} }
static bool e820_nomerge(enum e820_type type)
{
/*
* These types may indicate distinct platform ranges aligned to
* numa node, protection domain, performance domain, or other
* boundaries. Do not merge them.
*/
if (type == E820_TYPE_PRAM)
return true;
if (type == E820_TYPE_SOFT_RESERVED)
return true;
return false;
}
int __init e820__update_table(struct e820_table *table) int __init e820__update_table(struct e820_table *table)
{ {
struct e820_entry *entries = table->entries; struct e820_entry *entries = table->entries;
@@ -380,7 +394,7 @@ int __init e820__update_table(struct e820_table *table)
} }
/* Continue building up new map based on this information: */ /* Continue building up new map based on this information: */
if (current_type != last_type || current_type == E820_TYPE_PRAM) { if (current_type != last_type || e820_nomerge(current_type)) {
if (last_type != 0) { if (last_type != 0) {
new_entries[new_nr_entries].size = change_point[chg_idx]->addr - last_addr; new_entries[new_nr_entries].size = change_point[chg_idx]->addr - last_addr;
/* Move forward only if the new size was non-zero: */ /* Move forward only if the new size was non-zero: */

View File

@@ -264,16 +264,12 @@ static void __init relocate_initrd(void)
u64 area_size = PAGE_ALIGN(ramdisk_size); u64 area_size = PAGE_ALIGN(ramdisk_size);
/* We need to move the initrd down into directly mapped mem */ /* We need to move the initrd down into directly mapped mem */
relocated_ramdisk = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), relocated_ramdisk = memblock_phys_alloc_range(area_size, PAGE_SIZE, 0,
area_size, PAGE_SIZE); PFN_PHYS(max_pfn_mapped));
if (!relocated_ramdisk) if (!relocated_ramdisk)
panic("Cannot find place for new RAMDISK of size %lld\n", panic("Cannot find place for new RAMDISK of size %lld\n",
ramdisk_size); ramdisk_size);
/* Note: this includes all the mem currently occupied by
the initrd, we rely on that fact to keep the data intact. */
memblock_reserve(relocated_ramdisk, area_size);
initrd_start = relocated_ramdisk + PAGE_OFFSET; initrd_start = relocated_ramdisk + PAGE_OFFSET;
initrd_end = initrd_start + ramdisk_size; initrd_end = initrd_start + ramdisk_size;
printk(KERN_INFO "Allocated new RAMDISK: [mem %#010llx-%#010llx]\n", printk(KERN_INFO "Allocated new RAMDISK: [mem %#010llx-%#010llx]\n",
@@ -300,13 +296,13 @@ static void __init early_reserve_initrd(void)
memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image); memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image);
} }
static void __init reserve_initrd(void) static void __init reserve_initrd(void)
{ {
/* Assume only end is not page aligned */ /* Assume only end is not page aligned */
u64 ramdisk_image = get_ramdisk_image(); u64 ramdisk_image = get_ramdisk_image();
u64 ramdisk_size = get_ramdisk_size(); u64 ramdisk_size = get_ramdisk_size();
u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
u64 mapped_size;
if (!boot_params.hdr.type_of_loader || if (!boot_params.hdr.type_of_loader ||
!ramdisk_image || !ramdisk_size) !ramdisk_image || !ramdisk_size)
@@ -314,12 +310,6 @@ static void __init reserve_initrd(void)
initrd_start = 0; initrd_start = 0;
mapped_size = memblock_mem_size(max_pfn_mapped);
if (ramdisk_size >= (mapped_size>>1))
panic("initrd too large to handle, "
"disabling initrd (%lld needed, %lld available)\n",
ramdisk_size, mapped_size>>1);
printk(KERN_INFO "RAMDISK: [mem %#010llx-%#010llx]\n", ramdisk_image, printk(KERN_INFO "RAMDISK: [mem %#010llx-%#010llx]\n", ramdisk_image,
ramdisk_end - 1); ramdisk_end - 1);
@@ -431,13 +421,13 @@ static int __init reserve_crashkernel_low(void)
{ {
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
unsigned long long base, low_base = 0, low_size = 0; unsigned long long base, low_base = 0, low_size = 0;
unsigned long total_low_mem; unsigned long low_mem_limit;
int ret; int ret;
total_low_mem = memblock_mem_size(1UL << (32 - PAGE_SHIFT)); low_mem_limit = min(memblock_phys_mem_size(), CRASH_ADDR_LOW_MAX);
/* crashkernel=Y,low */ /* crashkernel=Y,low */
ret = parse_crashkernel_low(boot_command_line, total_low_mem, &low_size, &base); ret = parse_crashkernel_low(boot_command_line, low_mem_limit, &low_size, &base);
if (ret) { if (ret) {
/* /*
* two parts from kernel/dma/swiotlb.c: * two parts from kernel/dma/swiotlb.c:
@@ -455,23 +445,17 @@ static int __init reserve_crashkernel_low(void)
return 0; return 0;
} }
low_base = memblock_find_in_range(0, 1ULL << 32, low_size, CRASH_ALIGN); low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, CRASH_ADDR_LOW_MAX);
if (!low_base) { if (!low_base) {
pr_err("Cannot reserve %ldMB crashkernel low memory, please try smaller size.\n", pr_err("Cannot reserve %ldMB crashkernel low memory, please try smaller size.\n",
(unsigned long)(low_size >> 20)); (unsigned long)(low_size >> 20));
return -ENOMEM; return -ENOMEM;
} }
ret = memblock_reserve(low_base, low_size); pr_info("Reserving %ldMB of low memory at %ldMB for crashkernel (low RAM limit: %ldMB)\n",
if (ret) {
pr_err("%s: Error reserving crashkernel low memblock.\n", __func__);
return ret;
}
pr_info("Reserving %ldMB of low memory at %ldMB for crashkernel (System low RAM: %ldMB)\n",
(unsigned long)(low_size >> 20), (unsigned long)(low_size >> 20),
(unsigned long)(low_base >> 20), (unsigned long)(low_base >> 20),
(unsigned long)(total_low_mem >> 20)); (unsigned long)(low_mem_limit >> 20));
crashk_low_res.start = low_base; crashk_low_res.start = low_base;
crashk_low_res.end = low_base + low_size - 1; crashk_low_res.end = low_base + low_size - 1;
@@ -515,13 +499,13 @@ static void __init reserve_crashkernel(void)
* unless "crashkernel=size[KMG],high" is specified. * unless "crashkernel=size[KMG],high" is specified.
*/ */
if (!high) if (!high)
crash_base = memblock_find_in_range(CRASH_ALIGN, crash_base = memblock_phys_alloc_range(crash_size,
CRASH_ADDR_LOW_MAX, CRASH_ALIGN, CRASH_ALIGN,
crash_size, CRASH_ALIGN); CRASH_ADDR_LOW_MAX);
if (!crash_base) if (!crash_base)
crash_base = memblock_find_in_range(CRASH_ALIGN, crash_base = memblock_phys_alloc_range(crash_size,
CRASH_ADDR_HIGH_MAX, CRASH_ALIGN, CRASH_ALIGN,
crash_size, CRASH_ALIGN); CRASH_ADDR_HIGH_MAX);
if (!crash_base) { if (!crash_base) {
pr_info("crashkernel reservation failed - No suitable area found.\n"); pr_info("crashkernel reservation failed - No suitable area found.\n");
return; return;
@@ -529,19 +513,13 @@ static void __init reserve_crashkernel(void)
} else { } else {
unsigned long long start; unsigned long long start;
start = memblock_find_in_range(crash_base, start = memblock_phys_alloc_range(crash_size, SZ_1M, crash_base,
crash_base + crash_size, crash_base + crash_size);
crash_size, 1 << 20);
if (start != crash_base) { if (start != crash_base) {
pr_info("crashkernel reservation failed - memory is in use.\n"); pr_info("crashkernel reservation failed - memory is in use.\n");
return; return;
} }
} }
ret = memblock_reserve(crash_base, crash_size);
if (ret) {
pr_err("%s: Error reserving crashkernel memblock.\n", __func__);
return;
}
if (crash_base >= (1ULL << 32) && reserve_crashkernel_low()) { if (crash_base >= (1ULL << 32) && reserve_crashkernel_low()) {
memblock_free(crash_base, crash_size); memblock_free(crash_base, crash_size);

View File

@@ -37,14 +37,12 @@ static __init int numa_setup(char *opt)
return -EINVAL; return -EINVAL;
if (!strncmp(opt, "off", 3)) if (!strncmp(opt, "off", 3))
numa_off = 1; numa_off = 1;
#ifdef CONFIG_NUMA_EMU
if (!strncmp(opt, "fake=", 5)) if (!strncmp(opt, "fake=", 5))
numa_emu_cmdline(opt + 5); return numa_emu_cmdline(opt + 5);
#endif
#ifdef CONFIG_ACPI_NUMA
if (!strncmp(opt, "noacpi", 6)) if (!strncmp(opt, "noacpi", 6))
acpi_numa = -1; disable_srat();
#endif if (!strncmp(opt, "nohmat", 6))
disable_hmat();
return 0; return 0;
} }
early_param("numa", numa_setup); early_param("numa", numa_setup);
@@ -516,7 +514,7 @@ static void __init numa_clear_kernel_node_hotplug(void)
* memory ranges, because quirks such as trim_snb_memory() * memory ranges, because quirks such as trim_snb_memory()
* reserve specific pages for Sandy Bridge graphics. ] * reserve specific pages for Sandy Bridge graphics. ]
*/ */
for_each_memblock(reserved, mb_region) { for_each_reserved_mem_region(mb_region) {
int nid = memblock_get_region_node(mb_region); int nid = memblock_get_region_node(mb_region);
if (nid != MAX_NUMNODES) if (nid != MAX_NUMNODES)
@@ -919,7 +917,6 @@ int phys_to_target_node(phys_addr_t start)
return meminfo_to_nid(&numa_reserved_meminfo, start); return meminfo_to_nid(&numa_reserved_meminfo, start);
} }
EXPORT_SYMBOL_GPL(phys_to_target_node);
int memory_add_physaddr_to_nid(u64 start) int memory_add_physaddr_to_nid(u64 start)
{ {

View File

@@ -13,9 +13,10 @@
static int emu_nid_to_phys[MAX_NUMNODES]; static int emu_nid_to_phys[MAX_NUMNODES];
static char *emu_cmdline __initdata; static char *emu_cmdline __initdata;
void __init numa_emu_cmdline(char *str) int __init numa_emu_cmdline(char *str)
{ {
emu_cmdline = str; emu_cmdline = str;
return 0;
} }
static int __init emu_find_memblk_by_nid(int nid, const struct numa_meminfo *mi) static int __init emu_find_memblk_by_nid(int nid, const struct numa_meminfo *mi)

View File

@@ -1300,7 +1300,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
* any NUMA information the kernel tries to get from ACPI will * any NUMA information the kernel tries to get from ACPI will
* be meaningless. Prevent it from trying. * be meaningless. Prevent it from trying.
*/ */
acpi_numa = -1; disable_srat();
#endif #endif
WARN_ON(xen_cpuhp_setup(xen_cpu_up_prepare_pv, xen_cpu_dead_pv)); WARN_ON(xen_cpuhp_setup(xen_cpu_up_prepare_pv, xen_cpu_dead_pv));

View File

@@ -217,20 +217,6 @@ config HOTPLUG_CPU
Say N if you want to disable CPU hotplug. Say N if you want to disable CPU hotplug.
config SECCOMP
bool
prompt "Enable seccomp to safely compute untrusted bytecode"
help
This kernel feature is useful for number crunching applications
that may need to compute untrusted bytecode during their
execution. By using pipes or other transports made available to
the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in
their own address space using seccomp. Once seccomp is
enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
and the task is only allowed to execute a few safe syscalls
defined by each seccomp mode.
config FAST_SYSCALL_XTENSA config FAST_SYSCALL_XTENSA
bool "Enable fast atomic syscalls" bool "Enable fast atomic syscalls"
default n default n

View File

@@ -79,67 +79,32 @@ void __init zones_init(void)
free_area_init(max_zone_pfn); free_area_init(max_zone_pfn);
} }
#ifdef CONFIG_HIGHMEM
static void __init free_area_high(unsigned long pfn, unsigned long end)
{
for (; pfn < end; pfn++)
free_highmem_page(pfn_to_page(pfn));
}
static void __init free_highpages(void) static void __init free_highpages(void)
{ {
#ifdef CONFIG_HIGHMEM
unsigned long max_low = max_low_pfn; unsigned long max_low = max_low_pfn;
struct memblock_region *mem, *res; phys_addr_t range_start, range_end;
u64 i;
reset_all_zones_managed_pages();
/* set highmem page free */ /* set highmem page free */
for_each_memblock(memory, mem) { for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE,
unsigned long start = memblock_region_memory_base_pfn(mem); &range_start, &range_end, NULL) {
unsigned long end = memblock_region_memory_end_pfn(mem); unsigned long start = PHYS_PFN(range_start);
unsigned long end = PHYS_PFN(range_end);
/* Ignore complete lowmem entries */ /* Ignore complete lowmem entries */
if (end <= max_low) if (end <= max_low)
continue; continue;
if (memblock_is_nomap(mem))
continue;
/* Truncate partial highmem entries */ /* Truncate partial highmem entries */
if (start < max_low) if (start < max_low)
start = max_low; start = max_low;
/* Find and exclude any reserved regions */ for (; start < end; start++)
for_each_memblock(reserved, res) { free_highmem_page(pfn_to_page(start));
unsigned long res_start, res_end;
res_start = memblock_region_reserved_base_pfn(res);
res_end = memblock_region_reserved_end_pfn(res);
if (res_end < start)
continue;
if (res_start < start)
res_start = start;
if (res_start > end)
res_start = end;
if (res_end > end)
res_end = end;
if (res_start != start)
free_area_high(start, res_start);
start = res_end;
if (start == end)
break;
}
/* And now free anything which remains */
if (start < end)
free_area_high(start, end);
} }
}
#else
static void __init free_highpages(void)
{
}
#endif #endif
}
/* /*
* Initialize memory pages. * Initialize memory pages.

View File

@@ -24,8 +24,15 @@
#include <linux/mutex.h> #include <linux/mutex.h>
#include <linux/node.h> #include <linux/node.h>
#include <linux/sysfs.h> #include <linux/sysfs.h>
#include <linux/dax.h>
static u8 hmat_revision; static u8 hmat_revision;
static int hmat_disable __initdata;
void __init disable_hmat(void)
{
hmat_disable = 1;
}
static LIST_HEAD(targets); static LIST_HEAD(targets);
static LIST_HEAD(initiators); static LIST_HEAD(initiators);
@@ -634,66 +641,6 @@ static void hmat_register_target_perf(struct memory_target *target)
node_set_perf_attrs(mem_nid, &target->hmem_attrs, 0); node_set_perf_attrs(mem_nid, &target->hmem_attrs, 0);
} }
static void hmat_register_target_device(struct memory_target *target,
struct resource *r)
{
/* define a clean / non-busy resource for the platform device */
struct resource res = {
.start = r->start,
.end = r->end,
.flags = IORESOURCE_MEM,
};
struct platform_device *pdev;
struct memregion_info info;
int rc, id;
rc = region_intersects(res.start, resource_size(&res), IORESOURCE_MEM,
IORES_DESC_SOFT_RESERVED);
if (rc != REGION_INTERSECTS)
return;
id = memregion_alloc(GFP_KERNEL);
if (id < 0) {
pr_err("memregion allocation failure for %pr\n", &res);
return;
}
pdev = platform_device_alloc("hmem", id);
if (!pdev) {
pr_err("hmem device allocation failure for %pr\n", &res);
goto out_pdev;
}
pdev->dev.numa_node = acpi_map_pxm_to_online_node(target->memory_pxm);
info = (struct memregion_info) {
.target_node = acpi_map_pxm_to_node(target->memory_pxm),
};
rc = platform_device_add_data(pdev, &info, sizeof(info));
if (rc < 0) {
pr_err("hmem memregion_info allocation failure for %pr\n", &res);
goto out_pdev;
}
rc = platform_device_add_resources(pdev, &res, 1);
if (rc < 0) {
pr_err("hmem resource allocation failure for %pr\n", &res);
goto out_resource;
}
rc = platform_device_add(pdev);
if (rc < 0) {
dev_err(&pdev->dev, "device add failed for %pr\n", &res);
goto out_resource;
}
return;
out_resource:
put_device(&pdev->dev);
out_pdev:
memregion_free(id);
}
static void hmat_register_target_devices(struct memory_target *target) static void hmat_register_target_devices(struct memory_target *target)
{ {
struct resource *res; struct resource *res;
@@ -705,8 +652,11 @@ static void hmat_register_target_devices(struct memory_target *target)
if (!IS_ENABLED(CONFIG_DEV_DAX_HMEM)) if (!IS_ENABLED(CONFIG_DEV_DAX_HMEM))
return; return;
for (res = target->memregions.child; res; res = res->sibling) for (res = target->memregions.child; res; res = res->sibling) {
hmat_register_target_device(target, res); int target_nid = acpi_map_pxm_to_node(target->memory_pxm);
hmem_register_device(target_nid, res);
}
} }
static void hmat_register_target(struct memory_target *target) static void hmat_register_target(struct memory_target *target)
@@ -814,7 +764,7 @@ static __init int hmat_init(void)
enum acpi_hmat_type i; enum acpi_hmat_type i;
acpi_status status; acpi_status status;
if (srat_disabled()) if (srat_disabled() || hmat_disable)
return 0; return 0;
status = acpi_get_table(ACPI_SIG_SRAT, 0, &tbl); status = acpi_get_table(ACPI_SIG_SRAT, 0, &tbl);

View File

@@ -27,7 +27,12 @@ static int node_to_pxm_map[MAX_NUMNODES]
= { [0 ... MAX_NUMNODES - 1] = PXM_INVAL }; = { [0 ... MAX_NUMNODES - 1] = PXM_INVAL };
unsigned char acpi_srat_revision __initdata; unsigned char acpi_srat_revision __initdata;
int acpi_numa __initdata; static int acpi_numa __initdata;
void __init disable_srat(void)
{
acpi_numa = -1;
}
int pxm_to_node(int pxm) int pxm_to_node(int pxm)
{ {
@@ -163,7 +168,7 @@ static int __init slit_valid(struct acpi_table_slit *slit)
void __init bad_srat(void) void __init bad_srat(void)
{ {
pr_err("SRAT: SRAT not used.\n"); pr_err("SRAT: SRAT not used.\n");
acpi_numa = -1; disable_srat();
} }
int __init srat_disabled(void) int __init srat_disabled(void)

View File

@@ -3324,7 +3324,7 @@ struct device *device_find_child_by_name(struct device *parent,
klist_iter_init(&parent->p->klist_children, &i); klist_iter_init(&parent->p->klist_children, &i);
while ((child = next_device(&i))) while ((child = next_device(&i)))
if (!strcmp(dev_name(child), name) && get_device(child)) if (sysfs_streq(dev_name(child), name) && get_device(child))
break; break;
klist_iter_exit(&i); klist_iter_exit(&i);
return child; return child;
@@ -4061,22 +4061,21 @@ void device_shutdown(void)
*/ */
#ifdef CONFIG_PRINTK #ifdef CONFIG_PRINTK
static int static void
create_syslog_header(const struct device *dev, char *hdr, size_t hdrlen) set_dev_info(const struct device *dev, struct dev_printk_info *dev_info)
{ {
const char *subsys; const char *subsys;
size_t pos = 0;
memset(dev_info, 0, sizeof(*dev_info));
if (dev->class) if (dev->class)
subsys = dev->class->name; subsys = dev->class->name;
else if (dev->bus) else if (dev->bus)
subsys = dev->bus->name; subsys = dev->bus->name;
else else
return 0; return;
pos += snprintf(hdr + pos, hdrlen - pos, "SUBSYSTEM=%s", subsys); strscpy(dev_info->subsystem, subsys, sizeof(dev_info->subsystem));
if (pos >= hdrlen)
goto overflow;
/* /*
* Add device identifier DEVICE=: * Add device identifier DEVICE=:
@@ -4092,41 +4091,28 @@ create_syslog_header(const struct device *dev, char *hdr, size_t hdrlen)
c = 'b'; c = 'b';
else else
c = 'c'; c = 'c';
pos++;
pos += snprintf(hdr + pos, hdrlen - pos, snprintf(dev_info->device, sizeof(dev_info->device),
"DEVICE=%c%u:%u", "%c%u:%u", c, MAJOR(dev->devt), MINOR(dev->devt));
c, MAJOR(dev->devt), MINOR(dev->devt));
} else if (strcmp(subsys, "net") == 0) { } else if (strcmp(subsys, "net") == 0) {
struct net_device *net = to_net_dev(dev); struct net_device *net = to_net_dev(dev);
pos++; snprintf(dev_info->device, sizeof(dev_info->device),
pos += snprintf(hdr + pos, hdrlen - pos, "n%u", net->ifindex);
"DEVICE=n%u", net->ifindex);
} else { } else {
pos++; snprintf(dev_info->device, sizeof(dev_info->device),
pos += snprintf(hdr + pos, hdrlen - pos, "+%s:%s", subsys, dev_name(dev));
"DEVICE=+%s:%s", subsys, dev_name(dev));
} }
if (pos >= hdrlen)
goto overflow;
return pos;
overflow:
dev_WARN(dev, "device/subsystem name too long");
return 0;
} }
int dev_vprintk_emit(int level, const struct device *dev, int dev_vprintk_emit(int level, const struct device *dev,
const char *fmt, va_list args) const char *fmt, va_list args)
{ {
char hdr[128]; struct dev_printk_info dev_info;
size_t hdrlen;
hdrlen = create_syslog_header(dev, hdr, sizeof(hdr)); set_dev_info(dev, &dev_info);
return vprintk_emit(0, level, hdrlen ? hdr : NULL, hdrlen, fmt, args); return vprintk_emit(0, level, &dev_info, fmt, args);
} }
EXPORT_SYMBOL(dev_vprintk_emit); EXPORT_SYMBOL(dev_vprintk_emit);

View File

@@ -610,23 +610,23 @@ static unsigned int armada_xp_mbus_win_remap_offset(int win)
static void __init static void __init
mvebu_mbus_find_bridge_hole(uint64_t *start, uint64_t *end) mvebu_mbus_find_bridge_hole(uint64_t *start, uint64_t *end)
{ {
struct memblock_region *r; phys_addr_t reg_start, reg_end;
uint64_t s = 0; uint64_t i, s = 0;
for_each_memblock(memory, r) { for_each_mem_range(i, &reg_start, &reg_end) {
/* /*
* This part of the memory is above 4 GB, so we don't * This part of the memory is above 4 GB, so we don't
* care for the MBus bridge hole. * care for the MBus bridge hole.
*/ */
if (r->base >= 0x100000000ULL) if (reg_start >= 0x100000000ULL)
continue; continue;
/* /*
* The MBus bridge hole is at the end of the RAM under * The MBus bridge hole is at the end of the RAM under
* the 4 GB limit. * the 4 GB limit.
*/ */
if (r->base + r->size > s) if (reg_end > s)
s = r->base + r->size; s = reg_end;
} }
*start = s; *start = s;

View File

@@ -35,6 +35,7 @@ config DEV_DAX_PMEM
config DEV_DAX_HMEM config DEV_DAX_HMEM
tristate "HMEM DAX: direct access to 'specific purpose' memory" tristate "HMEM DAX: direct access to 'specific purpose' memory"
depends on EFI_SOFT_RESERVE depends on EFI_SOFT_RESERVE
select NUMA_KEEP_MEMINFO if (NUMA && X86)
default DEV_DAX default DEV_DAX
help help
EFI 2.8 platforms, and others, may advertise 'specific purpose' EFI 2.8 platforms, and others, may advertise 'specific purpose'
@@ -48,6 +49,11 @@ config DEV_DAX_HMEM
Say M if unsure. Say M if unsure.
config DEV_DAX_HMEM_DEVICES
depends on NUMA_KEEP_MEMINFO # for phys_to_target_node()
depends on DEV_DAX_HMEM && DAX=y
def_bool y
config DEV_DAX_KMEM config DEV_DAX_KMEM
tristate "KMEM DAX: volatile-use of persistent memory" tristate "KMEM DAX: volatile-use of persistent memory"
default DEV_DAX default DEV_DAX

View File

@@ -2,11 +2,10 @@
obj-$(CONFIG_DAX) += dax.o obj-$(CONFIG_DAX) += dax.o
obj-$(CONFIG_DEV_DAX) += device_dax.o obj-$(CONFIG_DEV_DAX) += device_dax.o
obj-$(CONFIG_DEV_DAX_KMEM) += kmem.o obj-$(CONFIG_DEV_DAX_KMEM) += kmem.o
obj-$(CONFIG_DEV_DAX_HMEM) += dax_hmem.o
dax-y := super.o dax-y := super.o
dax-y += bus.o dax-y += bus.o
device_dax-y := device.o device_dax-y := device.o
dax_hmem-y := hmem.o
obj-y += pmem/ obj-y += pmem/
obj-y += hmem/

File diff suppressed because it is too large Load Diff

View File

@@ -3,29 +3,33 @@
#ifndef __DAX_BUS_H__ #ifndef __DAX_BUS_H__
#define __DAX_BUS_H__ #define __DAX_BUS_H__
#include <linux/device.h> #include <linux/device.h>
#include <linux/range.h>
struct dev_dax; struct dev_dax;
struct resource; struct resource;
struct dax_device; struct dax_device;
struct dax_region; struct dax_region;
void dax_region_put(struct dax_region *dax_region); void dax_region_put(struct dax_region *dax_region);
#define IORESOURCE_DAX_STATIC (1UL << 0)
struct dax_region *alloc_dax_region(struct device *parent, int region_id, struct dax_region *alloc_dax_region(struct device *parent, int region_id,
struct resource *res, int target_node, unsigned int align, struct range *range, int target_node, unsigned int align,
unsigned long long flags); unsigned long flags);
enum dev_dax_subsys { enum dev_dax_subsys {
DEV_DAX_BUS, DEV_DAX_BUS = 0, /* zeroed dev_dax_data picks this by default */
DEV_DAX_CLASS, DEV_DAX_CLASS,
}; };
struct dev_dax *__devm_create_dev_dax(struct dax_region *dax_region, int id, struct dev_dax_data {
struct dev_pagemap *pgmap, enum dev_dax_subsys subsys); struct dax_region *dax_region;
struct dev_pagemap *pgmap;
enum dev_dax_subsys subsys;
resource_size_t size;
int id;
};
static inline struct dev_dax *devm_create_dev_dax(struct dax_region *dax_region, struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data);
int id, struct dev_pagemap *pgmap)
{
return __devm_create_dev_dax(dax_region, id, pgmap, DEV_DAX_BUS);
}
/* to be deleted when DEV_DAX_CLASS is removed */ /* to be deleted when DEV_DAX_CLASS is removed */
struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys); struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys);
@@ -34,6 +38,8 @@ struct dax_device_driver {
struct device_driver drv; struct device_driver drv;
struct list_head ids; struct list_head ids;
int match_always; int match_always;
int (*probe)(struct dev_dax *dev);
int (*remove)(struct dev_dax *dev);
}; };
int __dax_driver_register(struct dax_device_driver *dax_drv, int __dax_driver_register(struct dax_device_driver *dax_drv,
@@ -44,7 +50,7 @@ void dax_driver_unregister(struct dax_device_driver *dax_drv);
void kill_dev_dax(struct dev_dax *dev_dax); void kill_dev_dax(struct dev_dax *dev_dax);
#if IS_ENABLED(CONFIG_DEV_DAX_PMEM_COMPAT) #if IS_ENABLED(CONFIG_DEV_DAX_PMEM_COMPAT)
int dev_dax_probe(struct device *dev); int dev_dax_probe(struct dev_dax *dev_dax);
#endif #endif
/* /*

View File

@@ -7,6 +7,7 @@
#include <linux/device.h> #include <linux/device.h>
#include <linux/cdev.h> #include <linux/cdev.h>
#include <linux/idr.h>
/* private routines between core files */ /* private routines between core files */
struct dax_device; struct dax_device;
@@ -22,8 +23,10 @@ void dax_bus_exit(void);
* @kref: to pin while other agents have a need to do lookups * @kref: to pin while other agents have a need to do lookups
* @dev: parent device backing this region * @dev: parent device backing this region
* @align: allocation and mapping alignment for child dax devices * @align: allocation and mapping alignment for child dax devices
* @res: physical address range of the region * @ida: instance id allocator
* @pfn_flags: identify whether the pfns are paged back or not * @res: resource tree to track instance allocations
* @seed: allow userspace to find the first unbound seed device
* @youngest: allow userspace to find the most recently created device
*/ */
struct dax_region { struct dax_region {
int id; int id;
@@ -31,8 +34,16 @@ struct dax_region {
struct kref kref; struct kref kref;
struct device *dev; struct device *dev;
unsigned int align; unsigned int align;
struct ida ida;
struct resource res; struct resource res;
unsigned long long pfn_flags; struct device *seed;
struct device *youngest;
};
struct dax_mapping {
struct device dev;
int range_id;
int id;
}; };
/** /**
@@ -41,22 +52,57 @@ struct dax_region {
* @region - parent region * @region - parent region
* @dax_dev - core dax functionality * @dax_dev - core dax functionality
* @target_node: effective numa node if dev_dax memory range is onlined * @target_node: effective numa node if dev_dax memory range is onlined
* @id: ida allocated id
* @ida: mapping id allocator
* @dev - device core * @dev - device core
* @pgmap - pgmap for memmap setup / lifetime (driver owned) * @pgmap - pgmap for memmap setup / lifetime (driver owned)
* @dax_mem_res: physical address range of hotadded DAX memory * @nr_range: size of @ranges
* @dax_mem_name: name for hotadded DAX memory via add_memory_driver_managed() * @ranges: resource-span + pgoff tuples for the instance
*/ */
struct dev_dax { struct dev_dax {
struct dax_region *region; struct dax_region *region;
struct dax_device *dax_dev; struct dax_device *dax_dev;
unsigned int align;
int target_node; int target_node;
int id;
struct ida ida;
struct device dev; struct device dev;
struct dev_pagemap pgmap; struct dev_pagemap *pgmap;
struct resource *dax_kmem_res; int nr_range;
struct dev_dax_range {
unsigned long pgoff;
struct range range;
struct dax_mapping *mapping;
} *ranges;
}; };
static inline struct dev_dax *to_dev_dax(struct device *dev) static inline struct dev_dax *to_dev_dax(struct device *dev)
{ {
return container_of(dev, struct dev_dax, dev); return container_of(dev, struct dev_dax, dev);
} }
static inline struct dax_mapping *to_dax_mapping(struct device *dev)
{
return container_of(dev, struct dax_mapping, dev);
}
phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff, unsigned long size);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static inline bool dax_align_valid(unsigned long align)
{
if (align == PUD_SIZE && IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD))
return true;
if (align == PMD_SIZE && has_transparent_hugepage())
return true;
if (align == PAGE_SIZE)
return true;
return false;
}
#else
static inline bool dax_align_valid(unsigned long align)
{
return align == PAGE_SIZE;
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif #endif

View File

@@ -17,7 +17,6 @@
static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma, static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma,
const char *func) const char *func)
{ {
struct dax_region *dax_region = dev_dax->region;
struct device *dev = &dev_dax->dev; struct device *dev = &dev_dax->dev;
unsigned long mask; unsigned long mask;
@@ -32,7 +31,7 @@ static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma,
return -EINVAL; return -EINVAL;
} }
mask = dax_region->align - 1; mask = dev_dax->align - 1;
if (vma->vm_start & mask || vma->vm_end & mask) { if (vma->vm_start & mask || vma->vm_end & mask) {
dev_info_ratelimited(dev, dev_info_ratelimited(dev,
"%s: %s: fail, unaligned vma (%#lx - %#lx, %#lx)\n", "%s: %s: fail, unaligned vma (%#lx - %#lx, %#lx)\n",
@@ -41,14 +40,6 @@ static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma,
return -EINVAL; return -EINVAL;
} }
if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) == PFN_DEV
&& (vma->vm_flags & VM_DONTCOPY) == 0) {
dev_info_ratelimited(dev,
"%s: %s: fail, dax range requires MADV_DONTFORK\n",
current->comm, func);
return -EINVAL;
}
if (!vma_is_dax(vma)) { if (!vma_is_dax(vma)) {
dev_info_ratelimited(dev, dev_info_ratelimited(dev,
"%s: %s: fail, vma is not DAX capable\n", "%s: %s: fail, vma is not DAX capable\n",
@@ -63,15 +54,22 @@ static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma,
__weak phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff, __weak phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff,
unsigned long size) unsigned long size)
{ {
struct resource *res = &dev_dax->region->res; int i;
phys_addr_t phys;
phys = pgoff * PAGE_SIZE + res->start; for (i = 0; i < dev_dax->nr_range; i++) {
if (phys >= res->start && phys <= res->end) { struct dev_dax_range *dax_range = &dev_dax->ranges[i];
if (phys + size - 1 <= res->end) struct range *range = &dax_range->range;
unsigned long long pgoff_end;
phys_addr_t phys;
pgoff_end = dax_range->pgoff + PHYS_PFN(range_len(range)) - 1;
if (pgoff < dax_range->pgoff || pgoff > pgoff_end)
continue;
phys = PFN_PHYS(pgoff - dax_range->pgoff) + range->start;
if (phys + size - 1 <= range->end)
return phys; return phys;
break;
} }
return -1; return -1;
} }
@@ -79,21 +77,19 @@ static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax,
struct vm_fault *vmf, pfn_t *pfn) struct vm_fault *vmf, pfn_t *pfn)
{ {
struct device *dev = &dev_dax->dev; struct device *dev = &dev_dax->dev;
struct dax_region *dax_region;
phys_addr_t phys; phys_addr_t phys;
unsigned int fault_size = PAGE_SIZE; unsigned int fault_size = PAGE_SIZE;
if (check_vma(dev_dax, vmf->vma, __func__)) if (check_vma(dev_dax, vmf->vma, __func__))
return VM_FAULT_SIGBUS; return VM_FAULT_SIGBUS;
dax_region = dev_dax->region; if (dev_dax->align > PAGE_SIZE) {
if (dax_region->align > PAGE_SIZE) {
dev_dbg(dev, "alignment (%#x) > fault size (%#x)\n", dev_dbg(dev, "alignment (%#x) > fault size (%#x)\n",
dax_region->align, fault_size); dev_dax->align, fault_size);
return VM_FAULT_SIGBUS; return VM_FAULT_SIGBUS;
} }
if (fault_size != dax_region->align) if (fault_size != dev_dax->align)
return VM_FAULT_SIGBUS; return VM_FAULT_SIGBUS;
phys = dax_pgoff_to_phys(dev_dax, vmf->pgoff, PAGE_SIZE); phys = dax_pgoff_to_phys(dev_dax, vmf->pgoff, PAGE_SIZE);
@@ -102,7 +98,7 @@ static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax,
return VM_FAULT_SIGBUS; return VM_FAULT_SIGBUS;
} }
*pfn = phys_to_pfn_t(phys, dax_region->pfn_flags); *pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
return vmf_insert_mixed(vmf->vma, vmf->address, *pfn); return vmf_insert_mixed(vmf->vma, vmf->address, *pfn);
} }
@@ -112,7 +108,6 @@ static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax,
{ {
unsigned long pmd_addr = vmf->address & PMD_MASK; unsigned long pmd_addr = vmf->address & PMD_MASK;
struct device *dev = &dev_dax->dev; struct device *dev = &dev_dax->dev;
struct dax_region *dax_region;
phys_addr_t phys; phys_addr_t phys;
pgoff_t pgoff; pgoff_t pgoff;
unsigned int fault_size = PMD_SIZE; unsigned int fault_size = PMD_SIZE;
@@ -120,22 +115,15 @@ static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax,
if (check_vma(dev_dax, vmf->vma, __func__)) if (check_vma(dev_dax, vmf->vma, __func__))
return VM_FAULT_SIGBUS; return VM_FAULT_SIGBUS;
dax_region = dev_dax->region; if (dev_dax->align > PMD_SIZE) {
if (dax_region->align > PMD_SIZE) {
dev_dbg(dev, "alignment (%#x) > fault size (%#x)\n", dev_dbg(dev, "alignment (%#x) > fault size (%#x)\n",
dax_region->align, fault_size); dev_dax->align, fault_size);
return VM_FAULT_SIGBUS; return VM_FAULT_SIGBUS;
} }
/* dax pmd mappings require pfn_t_devmap() */ if (fault_size < dev_dax->align)
if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) != (PFN_DEV|PFN_MAP)) {
dev_dbg(dev, "region lacks devmap flags\n");
return VM_FAULT_SIGBUS; return VM_FAULT_SIGBUS;
} else if (fault_size > dev_dax->align)
if (fault_size < dax_region->align)
return VM_FAULT_SIGBUS;
else if (fault_size > dax_region->align)
return VM_FAULT_FALLBACK; return VM_FAULT_FALLBACK;
/* if we are outside of the VMA */ /* if we are outside of the VMA */
@@ -150,7 +138,7 @@ static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax,
return VM_FAULT_SIGBUS; return VM_FAULT_SIGBUS;
} }
*pfn = phys_to_pfn_t(phys, dax_region->pfn_flags); *pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
return vmf_insert_pfn_pmd(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE); return vmf_insert_pfn_pmd(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE);
} }
@@ -161,7 +149,6 @@ static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
{ {
unsigned long pud_addr = vmf->address & PUD_MASK; unsigned long pud_addr = vmf->address & PUD_MASK;
struct device *dev = &dev_dax->dev; struct device *dev = &dev_dax->dev;
struct dax_region *dax_region;
phys_addr_t phys; phys_addr_t phys;
pgoff_t pgoff; pgoff_t pgoff;
unsigned int fault_size = PUD_SIZE; unsigned int fault_size = PUD_SIZE;
@@ -170,22 +157,15 @@ static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
if (check_vma(dev_dax, vmf->vma, __func__)) if (check_vma(dev_dax, vmf->vma, __func__))
return VM_FAULT_SIGBUS; return VM_FAULT_SIGBUS;
dax_region = dev_dax->region; if (dev_dax->align > PUD_SIZE) {
if (dax_region->align > PUD_SIZE) {
dev_dbg(dev, "alignment (%#x) > fault size (%#x)\n", dev_dbg(dev, "alignment (%#x) > fault size (%#x)\n",
dax_region->align, fault_size); dev_dax->align, fault_size);
return VM_FAULT_SIGBUS; return VM_FAULT_SIGBUS;
} }
/* dax pud mappings require pfn_t_devmap() */ if (fault_size < dev_dax->align)
if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) != (PFN_DEV|PFN_MAP)) {
dev_dbg(dev, "region lacks devmap flags\n");
return VM_FAULT_SIGBUS; return VM_FAULT_SIGBUS;
} else if (fault_size > dev_dax->align)
if (fault_size < dax_region->align)
return VM_FAULT_SIGBUS;
else if (fault_size > dax_region->align)
return VM_FAULT_FALLBACK; return VM_FAULT_FALLBACK;
/* if we are outside of the VMA */ /* if we are outside of the VMA */
@@ -200,7 +180,7 @@ static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
return VM_FAULT_SIGBUS; return VM_FAULT_SIGBUS;
} }
*pfn = phys_to_pfn_t(phys, dax_region->pfn_flags); *pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
return vmf_insert_pfn_pud(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE); return vmf_insert_pfn_pud(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE);
} }
@@ -280,9 +260,8 @@ static int dev_dax_split(struct vm_area_struct *vma, unsigned long addr)
{ {
struct file *filp = vma->vm_file; struct file *filp = vma->vm_file;
struct dev_dax *dev_dax = filp->private_data; struct dev_dax *dev_dax = filp->private_data;
struct dax_region *dax_region = dev_dax->region;
if (!IS_ALIGNED(addr, dax_region->align)) if (!IS_ALIGNED(addr, dev_dax->align))
return -EINVAL; return -EINVAL;
return 0; return 0;
} }
@@ -291,9 +270,8 @@ static unsigned long dev_dax_pagesize(struct vm_area_struct *vma)
{ {
struct file *filp = vma->vm_file; struct file *filp = vma->vm_file;
struct dev_dax *dev_dax = filp->private_data; struct dev_dax *dev_dax = filp->private_data;
struct dax_region *dax_region = dev_dax->region;
return dax_region->align; return dev_dax->align;
} }
static const struct vm_operations_struct dax_vm_ops = { static const struct vm_operations_struct dax_vm_ops = {
@@ -332,13 +310,11 @@ static unsigned long dax_get_unmapped_area(struct file *filp,
{ {
unsigned long off, off_end, off_align, len_align, addr_align, align; unsigned long off, off_end, off_align, len_align, addr_align, align;
struct dev_dax *dev_dax = filp ? filp->private_data : NULL; struct dev_dax *dev_dax = filp ? filp->private_data : NULL;
struct dax_region *dax_region;
if (!dev_dax || addr) if (!dev_dax || addr)
goto out; goto out;
dax_region = dev_dax->region; align = dev_dax->align;
align = dax_region->align;
off = pgoff << PAGE_SHIFT; off = pgoff << PAGE_SHIFT;
off_end = off + len; off_end = off + len;
off_align = round_up(off, align); off_align = round_up(off, align);
@@ -412,25 +388,45 @@ static void dev_dax_kill(void *dev_dax)
kill_dev_dax(dev_dax); kill_dev_dax(dev_dax);
} }
int dev_dax_probe(struct device *dev) int dev_dax_probe(struct dev_dax *dev_dax)
{ {
struct dev_dax *dev_dax = to_dev_dax(dev);
struct dax_device *dax_dev = dev_dax->dax_dev; struct dax_device *dax_dev = dev_dax->dax_dev;
struct resource *res = &dev_dax->region->res; struct device *dev = &dev_dax->dev;
struct dev_pagemap *pgmap;
struct inode *inode; struct inode *inode;
struct cdev *cdev; struct cdev *cdev;
void *addr; void *addr;
int rc; int rc, i;
/* 1:1 map region resource range to device-dax instance range */ pgmap = dev_dax->pgmap;
if (!devm_request_mem_region(dev, res->start, resource_size(res), if (dev_WARN_ONCE(dev, pgmap && dev_dax->nr_range > 1,
dev_name(dev))) { "static pgmap / multi-range device conflict\n"))
dev_warn(dev, "could not reserve region %pR\n", res); return -EINVAL;
return -EBUSY;
if (!pgmap) {
pgmap = devm_kzalloc(dev, sizeof(*pgmap) + sizeof(struct range)
* (dev_dax->nr_range - 1), GFP_KERNEL);
if (!pgmap)
return -ENOMEM;
pgmap->nr_range = dev_dax->nr_range;
} }
dev_dax->pgmap.type = MEMORY_DEVICE_GENERIC; for (i = 0; i < dev_dax->nr_range; i++) {
addr = devm_memremap_pages(dev, &dev_dax->pgmap); struct range *range = &dev_dax->ranges[i].range;
if (!devm_request_mem_region(dev, range->start,
range_len(range), dev_name(dev))) {
dev_warn(dev, "mapping%d: %#llx-%#llx could not reserve range\n",
i, range->start, range->end);
return -EBUSY;
}
/* don't update the range for static pgmap */
if (!dev_dax->pgmap)
pgmap->ranges[i] = *range;
}
pgmap->type = MEMORY_DEVICE_GENERIC;
addr = devm_memremap_pages(dev, pgmap);
if (IS_ERR(addr)) if (IS_ERR(addr))
return PTR_ERR(addr); return PTR_ERR(addr);
@@ -456,17 +452,15 @@ int dev_dax_probe(struct device *dev)
} }
EXPORT_SYMBOL_GPL(dev_dax_probe); EXPORT_SYMBOL_GPL(dev_dax_probe);
static int dev_dax_remove(struct device *dev) static int dev_dax_remove(struct dev_dax *dev_dax)
{ {
/* all probe actions are unwound by devm */ /* all probe actions are unwound by devm */
return 0; return 0;
} }
static struct dax_device_driver device_dax_driver = { static struct dax_device_driver device_dax_driver = {
.drv = { .probe = dev_dax_probe,
.probe = dev_dax_probe, .remove = dev_dax_remove,
.remove = dev_dax_remove,
},
.match_always = 1, .match_always = 1,
}; };

View File

@@ -0,0 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_DEV_DAX_HMEM) += dax_hmem.o
obj-$(CONFIG_DEV_DAX_HMEM_DEVICES) += device_hmem.o
device_hmem-y := device.o
dax_hmem-y := hmem.o

100
drivers/dax/hmem/device.c Normal file
View File

@@ -0,0 +1,100 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/platform_device.h>
#include <linux/memregion.h>
#include <linux/module.h>
#include <linux/dax.h>
#include <linux/mm.h>
static bool nohmem;
module_param_named(disable, nohmem, bool, 0444);
void hmem_register_device(int target_nid, struct resource *r)
{
/* define a clean / non-busy resource for the platform device */
struct resource res = {
.start = r->start,
.end = r->end,
.flags = IORESOURCE_MEM,
};
struct platform_device *pdev;
struct memregion_info info;
int rc, id;
if (nohmem)
return;
rc = region_intersects(res.start, resource_size(&res), IORESOURCE_MEM,
IORES_DESC_SOFT_RESERVED);
if (rc != REGION_INTERSECTS)
return;
id = memregion_alloc(GFP_KERNEL);
if (id < 0) {
pr_err("memregion allocation failure for %pr\n", &res);
return;
}
pdev = platform_device_alloc("hmem", id);
if (!pdev) {
pr_err("hmem device allocation failure for %pr\n", &res);
goto out_pdev;
}
pdev->dev.numa_node = numa_map_to_online_node(target_nid);
info = (struct memregion_info) {
.target_node = target_nid,
};
rc = platform_device_add_data(pdev, &info, sizeof(info));
if (rc < 0) {
pr_err("hmem memregion_info allocation failure for %pr\n", &res);
goto out_pdev;
}
rc = platform_device_add_resources(pdev, &res, 1);
if (rc < 0) {
pr_err("hmem resource allocation failure for %pr\n", &res);
goto out_resource;
}
rc = platform_device_add(pdev);
if (rc < 0) {
dev_err(&pdev->dev, "device add failed for %pr\n", &res);
goto out_resource;
}
return;
out_resource:
put_device(&pdev->dev);
out_pdev:
memregion_free(id);
}
static __init int hmem_register_one(struct resource *res, void *data)
{
/*
* If the resource is not a top-level resource it was already
* assigned to a device by the HMAT parsing.
*/
if (res->parent != &iomem_resource) {
pr_info("HMEM: skip %pr, already claimed\n", res);
return 0;
}
hmem_register_device(phys_to_target_node(res->start), res);
return 0;
}
static __init int hmem_init(void)
{
walk_iomem_res_desc(IORES_DESC_SOFT_RESERVED,
IORESOURCE_MEM, 0, -1, NULL, hmem_register_one);
return 0;
}
/*
* As this is a fallback for address ranges unclaimed by the ACPI HMAT
* parsing it must be at an initcall level greater than hmat_init().
*/
late_initcall(hmem_init);

View File

@@ -3,30 +3,39 @@
#include <linux/memregion.h> #include <linux/memregion.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/pfn_t.h> #include <linux/pfn_t.h>
#include "bus.h" #include "../bus.h"
static bool region_idle;
module_param_named(region_idle, region_idle, bool, 0644);
static int dax_hmem_probe(struct platform_device *pdev) static int dax_hmem_probe(struct platform_device *pdev)
{ {
struct device *dev = &pdev->dev; struct device *dev = &pdev->dev;
struct dev_pagemap pgmap = { };
struct dax_region *dax_region; struct dax_region *dax_region;
struct memregion_info *mri; struct memregion_info *mri;
struct dev_dax_data data;
struct dev_dax *dev_dax; struct dev_dax *dev_dax;
struct resource *res; struct resource *res;
struct range range;
res = platform_get_resource(pdev, IORESOURCE_MEM, 0); res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!res) if (!res)
return -ENOMEM; return -ENOMEM;
mri = dev->platform_data; mri = dev->platform_data;
memcpy(&pgmap.res, res, sizeof(*res)); range.start = res->start;
range.end = res->end;
dax_region = alloc_dax_region(dev, pdev->id, res, mri->target_node, dax_region = alloc_dax_region(dev, pdev->id, &range, mri->target_node,
PMD_SIZE, PFN_DEV|PFN_MAP); PMD_SIZE, 0);
if (!dax_region) if (!dax_region)
return -ENOMEM; return -ENOMEM;
dev_dax = devm_create_dev_dax(dax_region, 0, &pgmap); data = (struct dev_dax_data) {
.dax_region = dax_region,
.id = -1,
.size = region_idle ? 0 : resource_size(res),
};
dev_dax = devm_create_dev_dax(&data);
if (IS_ERR(dev_dax)) if (IS_ERR(dev_dax))
return PTR_ERR(dev_dax); return PTR_ERR(dev_dax);

View File

@@ -19,17 +19,28 @@ static const char *kmem_name;
/* Set if any memory will remain added when the driver will be unloaded. */ /* Set if any memory will remain added when the driver will be unloaded. */
static bool any_hotremove_failed; static bool any_hotremove_failed;
int dev_dax_kmem_probe(struct device *dev) static int dax_kmem_range(struct dev_dax *dev_dax, int i, struct range *r)
{ {
struct dev_dax *dev_dax = to_dev_dax(dev); struct dev_dax_range *dax_range = &dev_dax->ranges[i];
struct resource *res = &dev_dax->region->res; struct range *range = &dax_range->range;
resource_size_t kmem_start;
resource_size_t kmem_size; /* memory-block align the hotplug range */
resource_size_t kmem_end; r->start = ALIGN(range->start, memory_block_size_bytes());
struct resource *new_res; r->end = ALIGN_DOWN(range->end + 1, memory_block_size_bytes()) - 1;
const char *new_res_name; if (r->start >= r->end) {
r->start = range->start;
r->end = range->end;
return -ENOSPC;
}
return 0;
}
static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
{
struct device *dev = &dev_dax->dev;
int i, mapped = 0;
char *res_name;
int numa_node; int numa_node;
int rc;
/* /*
* Ensure good NUMA information for the persistent memory. * Ensure good NUMA information for the persistent memory.
@@ -39,68 +50,80 @@ int dev_dax_kmem_probe(struct device *dev)
*/ */
numa_node = dev_dax->target_node; numa_node = dev_dax->target_node;
if (numa_node < 0) { if (numa_node < 0) {
dev_warn(dev, "rejecting DAX region %pR with invalid node: %d\n", dev_warn(dev, "rejecting DAX region with invalid node: %d\n",
res, numa_node); numa_node);
return -EINVAL; return -EINVAL;
} }
/* Hotplug starting at the beginning of the next block: */ res_name = kstrdup(dev_name(dev), GFP_KERNEL);
kmem_start = ALIGN(res->start, memory_block_size_bytes()); if (!res_name)
kmem_size = resource_size(res);
/* Adjust the size down to compensate for moving up kmem_start: */
kmem_size -= kmem_start - res->start;
/* Align the size down to cover only complete blocks: */
kmem_size &= ~(memory_block_size_bytes() - 1);
kmem_end = kmem_start + kmem_size;
new_res_name = kstrdup(dev_name(dev), GFP_KERNEL);
if (!new_res_name)
return -ENOMEM; return -ENOMEM;
/* Region is permanently reserved if hotremove fails. */ for (i = 0; i < dev_dax->nr_range; i++) {
new_res = request_mem_region(kmem_start, kmem_size, new_res_name); struct resource *res;
if (!new_res) { struct range range;
dev_warn(dev, "could not reserve region [%pa-%pa]\n", int rc;
&kmem_start, &kmem_end);
kfree(new_res_name); rc = dax_kmem_range(dev_dax, i, &range);
return -EBUSY; if (rc) {
dev_info(dev, "mapping%d: %#llx-%#llx too small after alignment\n",
i, range.start, range.end);
continue;
}
/* Region is permanently reserved if hotremove fails. */
res = request_mem_region(range.start, range_len(&range), res_name);
if (!res) {
dev_warn(dev, "mapping%d: %#llx-%#llx could not reserve region\n",
i, range.start, range.end);
/*
* Once some memory has been onlined we can't
* assume that it can be un-onlined safely.
*/
if (mapped)
continue;
kfree(res_name);
return -EBUSY;
}
/*
* Set flags appropriate for System RAM. Leave ..._BUSY clear
* so that add_memory() can add a child resource. Do not
* inherit flags from the parent since it may set new flags
* unknown to us that will break add_memory() below.
*/
res->flags = IORESOURCE_SYSTEM_RAM;
/*
* Ensure that future kexec'd kernels will not treat
* this as RAM automatically.
*/
rc = add_memory_driver_managed(numa_node, range.start,
range_len(&range), kmem_name);
if (rc) {
dev_warn(dev, "mapping%d: %#llx-%#llx memory add failed\n",
i, range.start, range.end);
release_mem_region(range.start, range_len(&range));
if (mapped)
continue;
kfree(res_name);
return rc;
}
mapped++;
} }
/* dev_set_drvdata(dev, res_name);
* Set flags appropriate for System RAM. Leave ..._BUSY clear
* so that add_memory() can add a child resource. Do not
* inherit flags from the parent since it may set new flags
* unknown to us that will break add_memory() below.
*/
new_res->flags = IORESOURCE_SYSTEM_RAM;
/*
* Ensure that future kexec'd kernels will not treat this as RAM
* automatically.
*/
rc = add_memory_driver_managed(numa_node, new_res->start,
resource_size(new_res), kmem_name);
if (rc) {
release_resource(new_res);
kfree(new_res);
kfree(new_res_name);
return rc;
}
dev_dax->dax_kmem_res = new_res;
return 0; return 0;
} }
#ifdef CONFIG_MEMORY_HOTREMOVE #ifdef CONFIG_MEMORY_HOTREMOVE
static int dev_dax_kmem_remove(struct device *dev) static int dev_dax_kmem_remove(struct dev_dax *dev_dax)
{ {
struct dev_dax *dev_dax = to_dev_dax(dev); int i, success = 0;
struct resource *res = dev_dax->dax_kmem_res; struct device *dev = &dev_dax->dev;
resource_size_t kmem_start = res->start; const char *res_name = dev_get_drvdata(dev);
resource_size_t kmem_size = resource_size(res);
const char *res_name = res->name;
int rc;
/* /*
* We have one shot for removing memory, if some memory blocks were not * We have one shot for removing memory, if some memory blocks were not
@@ -108,25 +131,36 @@ static int dev_dax_kmem_remove(struct device *dev)
* there is no way to hotremove this memory until reboot because device * there is no way to hotremove this memory until reboot because device
* unbind will succeed even if we return failure. * unbind will succeed even if we return failure.
*/ */
rc = remove_memory(dev_dax->target_node, kmem_start, kmem_size); for (i = 0; i < dev_dax->nr_range; i++) {
if (rc) { struct range range;
int rc;
rc = dax_kmem_range(dev_dax, i, &range);
if (rc)
continue;
rc = remove_memory(dev_dax->target_node, range.start,
range_len(&range));
if (rc == 0) {
release_mem_region(range.start, range_len(&range));
success++;
continue;
}
any_hotremove_failed = true; any_hotremove_failed = true;
dev_err(dev, dev_err(dev,
"DAX region %pR cannot be hotremoved until the next reboot\n", "mapping%d: %#llx-%#llx cannot be hotremoved until the next reboot\n",
res); i, range.start, range.end);
return rc;
} }
/* Release and free dax resources */ if (success >= dev_dax->nr_range) {
release_resource(res); kfree(res_name);
kfree(res); dev_set_drvdata(dev, NULL);
kfree(res_name); }
dev_dax->dax_kmem_res = NULL;
return 0; return 0;
} }
#else #else
static int dev_dax_kmem_remove(struct device *dev) static int dev_dax_kmem_remove(struct dev_dax *dev_dax)
{ {
/* /*
* Without hotremove purposely leak the request_mem_region() for the * Without hotremove purposely leak the request_mem_region() for the
@@ -141,10 +175,8 @@ static int dev_dax_kmem_remove(struct device *dev)
#endif /* CONFIG_MEMORY_HOTREMOVE */ #endif /* CONFIG_MEMORY_HOTREMOVE */
static struct dax_device_driver device_dax_kmem_driver = { static struct dax_device_driver device_dax_kmem_driver = {
.drv = { .probe = dev_dax_kmem_probe,
.probe = dev_dax_kmem_probe, .remove = dev_dax_kmem_remove,
.remove = dev_dax_kmem_remove,
},
}; };
static int __init dax_kmem_init(void) static int __init dax_kmem_init(void)

View File

@@ -22,7 +22,7 @@ static int dax_pmem_compat_probe(struct device *dev)
return -ENOMEM; return -ENOMEM;
device_lock(&dev_dax->dev); device_lock(&dev_dax->dev);
rc = dev_dax_probe(&dev_dax->dev); rc = dev_dax_probe(dev_dax);
device_unlock(&dev_dax->dev); device_unlock(&dev_dax->dev);
devres_close_group(&dev_dax->dev, dev_dax); devres_close_group(&dev_dax->dev, dev_dax);

View File

@@ -9,11 +9,12 @@
struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys) struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys)
{ {
struct resource res; struct range range;
int rc, id, region_id; int rc, id, region_id;
resource_size_t offset; resource_size_t offset;
struct nd_pfn_sb *pfn_sb; struct nd_pfn_sb *pfn_sb;
struct dev_dax *dev_dax; struct dev_dax *dev_dax;
struct dev_dax_data data;
struct nd_namespace_io *nsio; struct nd_namespace_io *nsio;
struct dax_region *dax_region; struct dax_region *dax_region;
struct dev_pagemap pgmap = { }; struct dev_pagemap pgmap = { };
@@ -49,16 +50,23 @@ struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys)
if (rc != 2) if (rc != 2)
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
/* adjust the dax_region resource to the start of data */ /* adjust the dax_region range to the start of data */
memcpy(&res, &pgmap.res, sizeof(res)); range = pgmap.range;
res.start += offset; range.start += offset,
dax_region = alloc_dax_region(dev, region_id, &res, dax_region = alloc_dax_region(dev, region_id, &range,
nd_region->target_node, le32_to_cpu(pfn_sb->align), nd_region->target_node, le32_to_cpu(pfn_sb->align),
PFN_DEV|PFN_MAP); IORESOURCE_DAX_STATIC);
if (!dax_region) if (!dax_region)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
dev_dax = __devm_create_dev_dax(dax_region, id, &pgmap, subsys); data = (struct dev_dax_data) {
.dax_region = dax_region,
.id = id,
.pgmap = &pgmap,
.subsys = subsys,
.size = range_len(&range),
};
dev_dax = devm_create_dev_dax(&data);
/* child dev_dax instances now own the lifetime of the dax_region */ /* child dev_dax instances now own the lifetime of the dax_region */
dax_region_put(dax_region); dax_region_put(dax_region);

View File

@@ -38,7 +38,7 @@ void __init efi_fake_memmap_early(void)
m_start = mem->range.start; m_start = mem->range.start;
m_end = mem->range.end; m_end = mem->range.end;
for_each_efi_memory_desc(md) { for_each_efi_memory_desc(md) {
u64 start, end; u64 start, end, size;
if (md->type != EFI_CONVENTIONAL_MEMORY) if (md->type != EFI_CONVENTIONAL_MEMORY)
continue; continue;
@@ -58,11 +58,17 @@ void __init efi_fake_memmap_early(void)
*/ */
start = max(start, m_start); start = max(start, m_start);
end = min(end, m_end); end = min(end, m_end);
size = end - start + 1;
if (end <= start) if (end <= start)
continue; continue;
e820__range_update(start, end - start + 1, E820_TYPE_RAM,
E820_TYPE_SOFT_RESERVED); /*
* Ensure each efi_fake_mem instance results in
* a unique e820 resource
*/
e820__range_remove(start, size, E820_TYPE_RAM, 1);
e820__range_add(start, size, E820_TYPE_SOFT_RESERVED);
e820__update_table(e820_table); e820__update_table(e820_table);
} }
} }

View File

@@ -258,8 +258,8 @@ shmem_writeback(struct drm_i915_gem_object *obj)
for (i = 0; i < obj->base.size >> PAGE_SHIFT; i++) { for (i = 0; i < obj->base.size >> PAGE_SHIFT; i++) {
struct page *page; struct page *page;
page = find_lock_entry(mapping, i); page = find_lock_page(mapping, i);
if (!page || xa_is_value(page)) if (!page)
continue; continue;
if (!page_mapped(page) && clear_page_dirty_for_io(page)) { if (!page_mapped(page) && clear_page_dirty_for_io(page)) {

View File

@@ -101,7 +101,7 @@ unsigned long nouveau_dmem_page_addr(struct page *page)
{ {
struct nouveau_dmem_chunk *chunk = nouveau_page_to_chunk(page); struct nouveau_dmem_chunk *chunk = nouveau_page_to_chunk(page);
unsigned long off = (page_to_pfn(page) << PAGE_SHIFT) - unsigned long off = (page_to_pfn(page) << PAGE_SHIFT) -
chunk->pagemap.res.start; chunk->pagemap.range.start;
return chunk->bo->offset + off; return chunk->bo->offset + off;
} }
@@ -249,7 +249,9 @@ nouveau_dmem_chunk_alloc(struct nouveau_drm *drm, struct page **ppage)
chunk->drm = drm; chunk->drm = drm;
chunk->pagemap.type = MEMORY_DEVICE_PRIVATE; chunk->pagemap.type = MEMORY_DEVICE_PRIVATE;
chunk->pagemap.res = *res; chunk->pagemap.range.start = res->start;
chunk->pagemap.range.end = res->end;
chunk->pagemap.nr_range = 1;
chunk->pagemap.ops = &nouveau_dmem_pagemap_ops; chunk->pagemap.ops = &nouveau_dmem_pagemap_ops;
chunk->pagemap.owner = drm->dev; chunk->pagemap.owner = drm->dev;
@@ -273,7 +275,7 @@ nouveau_dmem_chunk_alloc(struct nouveau_drm *drm, struct page **ppage)
list_add(&chunk->list, &drm->dmem->chunks); list_add(&chunk->list, &drm->dmem->chunks);
mutex_unlock(&drm->dmem->mutex); mutex_unlock(&drm->dmem->mutex);
pfn_first = chunk->pagemap.res.start >> PAGE_SHIFT; pfn_first = chunk->pagemap.range.start >> PAGE_SHIFT;
page = pfn_to_page(pfn_first); page = pfn_to_page(pfn_first);
spin_lock(&drm->dmem->lock); spin_lock(&drm->dmem->lock);
for (i = 0; i < DMEM_CHUNK_NPAGES - 1; ++i, ++page) { for (i = 0; i < DMEM_CHUNK_NPAGES - 1; ++i, ++page) {
@@ -294,8 +296,7 @@ out_bo_unpin:
out_bo_free: out_bo_free:
nouveau_bo_ref(NULL, &chunk->bo); nouveau_bo_ref(NULL, &chunk->bo);
out_release: out_release:
release_mem_region(chunk->pagemap.res.start, release_mem_region(chunk->pagemap.range.start, range_len(&chunk->pagemap.range));
resource_size(&chunk->pagemap.res));
out_free: out_free:
kfree(chunk); kfree(chunk);
out: out:
@@ -382,8 +383,8 @@ nouveau_dmem_fini(struct nouveau_drm *drm)
nouveau_bo_ref(NULL, &chunk->bo); nouveau_bo_ref(NULL, &chunk->bo);
list_del(&chunk->list); list_del(&chunk->list);
memunmap_pages(&chunk->pagemap); memunmap_pages(&chunk->pagemap);
release_mem_region(chunk->pagemap.res.start, release_mem_region(chunk->pagemap.range.start,
resource_size(&chunk->pagemap.res)); range_len(&chunk->pagemap.range));
kfree(chunk); kfree(chunk);
} }

View File

@@ -2198,7 +2198,7 @@ static bool gic_check_reserved_range(phys_addr_t addr, unsigned long size)
addr_end = addr + size - 1; addr_end = addr + size - 1;
for_each_reserved_mem_region(i, &start, &end) { for_each_reserved_mem_range(i, &start, &end) {
if (addr >= start && addr_end <= end) if (addr >= start && addr_end <= end)
return true; return true;
} }

View File

@@ -211,7 +211,7 @@ static void __add_badblock_range(struct badblocks *bb, u64 ns_offset, u64 len)
} }
static void badblocks_populate(struct badrange *badrange, static void badblocks_populate(struct badrange *badrange,
struct badblocks *bb, const struct resource *res) struct badblocks *bb, const struct range *range)
{ {
struct badrange_entry *bre; struct badrange_entry *bre;
@@ -222,34 +222,34 @@ static void badblocks_populate(struct badrange *badrange,
u64 bre_end = bre->start + bre->length - 1; u64 bre_end = bre->start + bre->length - 1;
/* Discard intervals with no intersection */ /* Discard intervals with no intersection */
if (bre_end < res->start) if (bre_end < range->start)
continue; continue;
if (bre->start > res->end) if (bre->start > range->end)
continue; continue;
/* Deal with any overlap after start of the namespace */ /* Deal with any overlap after start of the namespace */
if (bre->start >= res->start) { if (bre->start >= range->start) {
u64 start = bre->start; u64 start = bre->start;
u64 len; u64 len;
if (bre_end <= res->end) if (bre_end <= range->end)
len = bre->length; len = bre->length;
else else
len = res->start + resource_size(res) len = range->start + range_len(range)
- bre->start; - bre->start;
__add_badblock_range(bb, start - res->start, len); __add_badblock_range(bb, start - range->start, len);
continue; continue;
} }
/* /*
* Deal with overlap for badrange starting before * Deal with overlap for badrange starting before
* the namespace. * the namespace.
*/ */
if (bre->start < res->start) { if (bre->start < range->start) {
u64 len; u64 len;
if (bre_end < res->end) if (bre_end < range->end)
len = bre->start + bre->length - res->start; len = bre->start + bre->length - range->start;
else else
len = resource_size(res); len = range_len(range);
__add_badblock_range(bb, 0, len); __add_badblock_range(bb, 0, len);
} }
} }
@@ -267,7 +267,7 @@ static void badblocks_populate(struct badrange *badrange,
* and add badblocks entries for all matching sub-ranges * and add badblocks entries for all matching sub-ranges
*/ */
void nvdimm_badblocks_populate(struct nd_region *nd_region, void nvdimm_badblocks_populate(struct nd_region *nd_region,
struct badblocks *bb, const struct resource *res) struct badblocks *bb, const struct range *range)
{ {
struct nvdimm_bus *nvdimm_bus; struct nvdimm_bus *nvdimm_bus;
@@ -279,7 +279,7 @@ void nvdimm_badblocks_populate(struct nd_region *nd_region,
nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev); nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev);
nvdimm_bus_lock(&nvdimm_bus->dev); nvdimm_bus_lock(&nvdimm_bus->dev);
badblocks_populate(&nvdimm_bus->badrange, bb, res); badblocks_populate(&nvdimm_bus->badrange, bb, range);
nvdimm_bus_unlock(&nvdimm_bus->dev); nvdimm_bus_unlock(&nvdimm_bus->dev);
} }
EXPORT_SYMBOL_GPL(nvdimm_badblocks_populate); EXPORT_SYMBOL_GPL(nvdimm_badblocks_populate);

Some files were not shown because too many files have changed in this diff Show More