Merge branch 'for-linus' of git://git.armlinux.org.uk/~rmk/linux-arm

Pull ARM updates from Russell King:

 - Correct ARMs dma-mapping to use the correct printk format strings.

 - Avoid defining OBJCOPYFLAGS globally which upsets lkdtm rodata
   testing.

 - Cleanups to ARMs asm/memory.h include.

 - L2 cache cleanups.

 - Allow flat nommu binaries to be executed on ARM MMU systems.

 - Kernel hardening - add more read-only after init annotations,
   including making some kernel vdso variables const.

 - Ensure AMBA primecell clocks are appropriately defaulted.

 - ARM breakpoint cleanup.

 - Various StrongARM 11x0 and companion chip (SA1111) updates to bring
   this legacy platform to use more modern APIs for (eg) GPIOs and
   interrupts, which will allow us in the future to reduce some of the
   board-level driver clutter and elimate function callbacks into board
   code via platform data. There still appears to be interest in these
   platforms!

 - Remove the now redundant secure_flush_area() API.

 - Module PLT relocation optimisations. Ard says: This series of 4
   patches optimizes the ARM PLT generation code that is invoked at
   module load time, to get rid of the O(n^2) algorithm that results in
   pathological load times of 10 seconds or more for large modules on
   certain STB platforms.

 - ARMv7M cache maintanence support.

 - L2 cache PMU support

* 'for-linus' of git://git.armlinux.org.uk/~rmk/linux-arm: (35 commits)
  ARM: sa1111: provide to_sa1111_device() macro
  ARM: sa1111: add sa1111_get_irq()
  ARM: sa1111: clean up duplication in IRQ chip implementation
  ARM: sa1111: implement a gpio_chip for SA1111 GPIOs
  ARM: sa1111: move irq cleanup to separate function
  ARM: sa1111: use devm_clk_get()
  ARM: sa1111: use devm_kzalloc()
  ARM: sa1111: ensure we only touch RAB bus type devices when removing
  ARM: 8611/1: l2x0: add PMU support
  ARM: 8610/1: V7M: Add dsb before jumping in handler mode
  ARM: 8609/1: V7M: Add support for the Cortex-M7 processor
  ARM: 8608/1: V7M: Indirect proc_info construction for V7M CPUs
  ARM: 8607/1: V7M: Wire up caches for V7M processors with cache support.
  ARM: 8606/1: V7M: introduce cache operations
  ARM: 8605/1: V7M: fix notrace variant of save_and_disable_irqs
  ARM: 8604/1: V7M: Add support for reading the CTR with read_cpuid_cachetype()
  ARM: 8603/1: V7M: Add addresses for mem-mapped V7M cache operations
  ARM: 8602/1: factor out CSSELR/CCSIDR operations that use cp15 directly
  ARM: kernel: avoid brute force search on PLT generation
  ARM: kernel: sort relocation sections before allocating PLTs
  ...
This commit is contained in:
Linus Torvalds
2016-10-06 07:59:37 -07:00
39 changed files with 1757 additions and 399 deletions

View File

@@ -19,7 +19,7 @@ extern struct of_cpuidle_method __cpuidle_method_of_table[];
static const struct of_cpuidle_method __cpuidle_method_of_table_sentinel
__used __section(__cpuidle_method_of_table_end);
static struct cpuidle_ops cpuidle_ops[NR_CPUS];
static struct cpuidle_ops cpuidle_ops[NR_CPUS] __ro_after_init;
/**
* arm_cpuidle_simple_enter() - a wrapper to cpu_do_idle()

View File

@@ -158,7 +158,21 @@ __after_proc_init:
bic r0, r0, #CR_V
#endif
mcr p15, 0, r0, c1, c0, 0 @ write control reg
#endif /* CONFIG_CPU_CP15 */
#elif defined (CONFIG_CPU_V7M)
/* For V7M systems we want to modify the CCR similarly to the SCTLR */
#ifdef CONFIG_CPU_DCACHE_DISABLE
bic r0, r0, #V7M_SCB_CCR_DC
#endif
#ifdef CONFIG_CPU_BPREDICT_DISABLE
bic r0, r0, #V7M_SCB_CCR_BP
#endif
#ifdef CONFIG_CPU_ICACHE_DISABLE
bic r0, r0, #V7M_SCB_CCR_IC
#endif
movw r3, #:lower16:(BASEADDR_V7M_SCB + V7M_SCB_CCR)
movt r3, #:upper16:(BASEADDR_V7M_SCB + V7M_SCB_CCR)
str r0, [r3]
#endif /* CONFIG_CPU_CP15 elif CONFIG_CPU_V7M */
ret lr
ENDPROC(__after_proc_init)
.ltorg

View File

@@ -9,6 +9,7 @@
#include <linux/elf.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/sort.h>
#include <asm/cache.h>
#include <asm/opcodes.h>
@@ -30,154 +31,198 @@ struct plt_entries {
u32 lit[PLT_ENT_COUNT];
};
static bool in_init(const struct module *mod, u32 addr)
{
return addr - (u32)mod->init_layout.base < mod->init_layout.size;
}
u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val)
{
struct plt_entries *plt, *plt_end;
int c, *count;
struct plt_entries *plt = (struct plt_entries *)mod->arch.plt->sh_addr;
int idx = 0;
if (in_init(mod, loc)) {
plt = (void *)mod->arch.init_plt->sh_addr;
plt_end = (void *)plt + mod->arch.init_plt->sh_size;
count = &mod->arch.init_plt_count;
} else {
plt = (void *)mod->arch.core_plt->sh_addr;
plt_end = (void *)plt + mod->arch.core_plt->sh_size;
count = &mod->arch.core_plt_count;
/*
* Look for an existing entry pointing to 'val'. Given that the
* relocations are sorted, this will be the last entry we allocated.
* (if one exists).
*/
if (mod->arch.plt_count > 0) {
plt += (mod->arch.plt_count - 1) / PLT_ENT_COUNT;
idx = (mod->arch.plt_count - 1) % PLT_ENT_COUNT;
if (plt->lit[idx] == val)
return (u32)&plt->ldr[idx];
idx = (idx + 1) % PLT_ENT_COUNT;
if (!idx)
plt++;
}
/* Look for an existing entry pointing to 'val' */
for (c = *count; plt < plt_end; c -= PLT_ENT_COUNT, plt++) {
int i;
mod->arch.plt_count++;
BUG_ON(mod->arch.plt_count * PLT_ENT_SIZE > mod->arch.plt->sh_size);
if (!c) {
/* Populate a new set of entries */
*plt = (struct plt_entries){
{ [0 ... PLT_ENT_COUNT - 1] = PLT_ENT_LDR, },
{ val, }
};
++*count;
return (u32)plt->ldr;
}
for (i = 0; i < PLT_ENT_COUNT; i++) {
if (!plt->lit[i]) {
plt->lit[i] = val;
++*count;
}
if (plt->lit[i] == val)
return (u32)&plt->ldr[i];
}
if (!idx)
/* Populate a new set of entries */
*plt = (struct plt_entries){
{ [0 ... PLT_ENT_COUNT - 1] = PLT_ENT_LDR, },
{ val, }
};
else
plt->lit[idx] = val;
return (u32)&plt->ldr[idx];
}
#define cmp_3way(a,b) ((a) < (b) ? -1 : (a) > (b))
static int cmp_rel(const void *a, const void *b)
{
const Elf32_Rel *x = a, *y = b;
int i;
/* sort by type and symbol index */
i = cmp_3way(ELF32_R_TYPE(x->r_info), ELF32_R_TYPE(y->r_info));
if (i == 0)
i = cmp_3way(ELF32_R_SYM(x->r_info), ELF32_R_SYM(y->r_info));
return i;
}
static bool is_zero_addend_relocation(Elf32_Addr base, const Elf32_Rel *rel)
{
u32 *tval = (u32 *)(base + rel->r_offset);
/*
* Do a bitwise compare on the raw addend rather than fully decoding
* the offset and doing an arithmetic comparison.
* Note that a zero-addend jump/call relocation is encoded taking the
* PC bias into account, i.e., -8 for ARM and -4 for Thumb2.
*/
switch (ELF32_R_TYPE(rel->r_info)) {
u16 upper, lower;
case R_ARM_THM_CALL:
case R_ARM_THM_JUMP24:
upper = __mem_to_opcode_thumb16(((u16 *)tval)[0]);
lower = __mem_to_opcode_thumb16(((u16 *)tval)[1]);
return (upper & 0x7ff) == 0x7ff && (lower & 0x2fff) == 0x2ffe;
case R_ARM_CALL:
case R_ARM_PC24:
case R_ARM_JUMP24:
return (__mem_to_opcode_arm(*tval) & 0xffffff) == 0xfffffe;
}
BUG();
}
static int duplicate_rel(Elf32_Addr base, const Elf32_Rel *rel, int num,
u32 mask)
static bool duplicate_rel(Elf32_Addr base, const Elf32_Rel *rel, int num)
{
u32 *loc1, *loc2;
int i;
const Elf32_Rel *prev;
for (i = 0; i < num; i++) {
if (rel[i].r_info != rel[num].r_info)
continue;
/*
* Entries are sorted by type and symbol index. That means that,
* if a duplicate entry exists, it must be in the preceding
* slot.
*/
if (!num)
return false;
/*
* Identical relocation types against identical symbols can
* still result in different PLT entries if the addend in the
* place is different. So resolve the target of the relocation
* to compare the values.
*/
loc1 = (u32 *)(base + rel[i].r_offset);
loc2 = (u32 *)(base + rel[num].r_offset);
if (((*loc1 ^ *loc2) & mask) == 0)
return 1;
}
return 0;
prev = rel + num - 1;
return cmp_rel(rel + num, prev) == 0 &&
is_zero_addend_relocation(base, prev);
}
/* Count how many PLT entries we may need */
static unsigned int count_plts(Elf32_Addr base, const Elf32_Rel *rel, int num)
static unsigned int count_plts(const Elf32_Sym *syms, Elf32_Addr base,
const Elf32_Rel *rel, int num)
{
unsigned int ret = 0;
const Elf32_Sym *s;
int i;
/*
* Sure, this is order(n^2), but it's usually short, and not
* time critical
*/
for (i = 0; i < num; i++)
for (i = 0; i < num; i++) {
switch (ELF32_R_TYPE(rel[i].r_info)) {
case R_ARM_CALL:
case R_ARM_PC24:
case R_ARM_JUMP24:
if (!duplicate_rel(base, rel, i,
__opcode_to_mem_arm(0x00ffffff)))
ret++;
break;
#ifdef CONFIG_THUMB2_KERNEL
case R_ARM_THM_CALL:
case R_ARM_THM_JUMP24:
if (!duplicate_rel(base, rel, i,
__opcode_to_mem_thumb32(0x07ff2fff)))
/*
* We only have to consider branch targets that resolve
* to undefined symbols. This is not simply a heuristic,
* it is a fundamental limitation, since the PLT itself
* is part of the module, and needs to be within range
* as well, so modules can never grow beyond that limit.
*/
s = syms + ELF32_R_SYM(rel[i].r_info);
if (s->st_shndx != SHN_UNDEF)
break;
/*
* Jump relocations with non-zero addends against
* undefined symbols are supported by the ELF spec, but
* do not occur in practice (e.g., 'jump n bytes past
* the entry point of undefined function symbol f').
* So we need to support them, but there is no need to
* take them into consideration when trying to optimize
* this code. So let's only check for duplicates when
* the addend is zero.
*/
if (!is_zero_addend_relocation(base, rel + i) ||
!duplicate_rel(base, rel, i))
ret++;
#endif
}
}
return ret;
}
int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
char *secstrings, struct module *mod)
{
unsigned long core_plts = 0, init_plts = 0;
unsigned long plts = 0;
Elf32_Shdr *s, *sechdrs_end = sechdrs + ehdr->e_shnum;
Elf32_Sym *syms = NULL;
/*
* To store the PLTs, we expand the .text section for core module code
* and the .init.text section for initialization code.
* and for initialization code.
*/
for (s = sechdrs; s < sechdrs_end; ++s)
if (strcmp(".core.plt", secstrings + s->sh_name) == 0)
mod->arch.core_plt = s;
else if (strcmp(".init.plt", secstrings + s->sh_name) == 0)
mod->arch.init_plt = s;
for (s = sechdrs; s < sechdrs_end; ++s) {
if (strcmp(".plt", secstrings + s->sh_name) == 0)
mod->arch.plt = s;
else if (s->sh_type == SHT_SYMTAB)
syms = (Elf32_Sym *)s->sh_addr;
}
if (!mod->arch.core_plt || !mod->arch.init_plt) {
pr_err("%s: sections missing\n", mod->name);
if (!mod->arch.plt) {
pr_err("%s: module PLT section missing\n", mod->name);
return -ENOEXEC;
}
if (!syms) {
pr_err("%s: module symtab section missing\n", mod->name);
return -ENOEXEC;
}
for (s = sechdrs + 1; s < sechdrs_end; ++s) {
const Elf32_Rel *rels = (void *)ehdr + s->sh_offset;
Elf32_Rel *rels = (void *)ehdr + s->sh_offset;
int numrels = s->sh_size / sizeof(Elf32_Rel);
Elf32_Shdr *dstsec = sechdrs + s->sh_info;
if (s->sh_type != SHT_REL)
continue;
if (strstr(secstrings + s->sh_name, ".init"))
init_plts += count_plts(dstsec->sh_addr, rels, numrels);
else
core_plts += count_plts(dstsec->sh_addr, rels, numrels);
/* ignore relocations that operate on non-exec sections */
if (!(dstsec->sh_flags & SHF_EXECINSTR))
continue;
/* sort by type and symbol index */
sort(rels, numrels, sizeof(Elf32_Rel), cmp_rel, NULL);
plts += count_plts(syms, dstsec->sh_addr, rels, numrels);
}
mod->arch.core_plt->sh_type = SHT_NOBITS;
mod->arch.core_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
mod->arch.core_plt->sh_addralign = L1_CACHE_BYTES;
mod->arch.core_plt->sh_size = round_up(core_plts * PLT_ENT_SIZE,
sizeof(struct plt_entries));
mod->arch.core_plt_count = 0;
mod->arch.plt->sh_type = SHT_NOBITS;
mod->arch.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
mod->arch.plt->sh_addralign = L1_CACHE_BYTES;
mod->arch.plt->sh_size = round_up(plts * PLT_ENT_SIZE,
sizeof(struct plt_entries));
mod->arch.plt_count = 0;
mod->arch.init_plt->sh_type = SHT_NOBITS;
mod->arch.init_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
mod->arch.init_plt->sh_addralign = L1_CACHE_BYTES;
mod->arch.init_plt->sh_size = round_up(init_plts * PLT_ENT_SIZE,
sizeof(struct plt_entries));
mod->arch.init_plt_count = 0;
pr_debug("%s: core.plt=%x, init.plt=%x\n", __func__,
mod->arch.core_plt->sh_size, mod->arch.init_plt->sh_size);
pr_debug("%s: plt=%x\n", __func__, mod->arch.plt->sh_size);
return 0;
}

View File

@@ -1,4 +1,3 @@
SECTIONS {
.core.plt : { BYTE(0) }
.init.plt : { BYTE(0) }
.plt : { BYTE(0) }
}

View File

@@ -114,19 +114,19 @@ EXPORT_SYMBOL(elf_hwcap2);
#ifdef MULTI_CPU
struct processor processor __read_mostly;
struct processor processor __ro_after_init;
#endif
#ifdef MULTI_TLB
struct cpu_tlb_fns cpu_tlb __read_mostly;
struct cpu_tlb_fns cpu_tlb __ro_after_init;
#endif
#ifdef MULTI_USER
struct cpu_user_fns cpu_user __read_mostly;
struct cpu_user_fns cpu_user __ro_after_init;
#endif
#ifdef MULTI_CACHE
struct cpu_cache_fns cpu_cache __read_mostly;
struct cpu_cache_fns cpu_cache __ro_after_init;
#endif
#ifdef CONFIG_OUTER_CACHE
struct outer_cache_fns outer_cache __read_mostly;
struct outer_cache_fns outer_cache __ro_after_init;
EXPORT_SYMBOL(outer_cache);
#endif
@@ -290,12 +290,9 @@ static int cpu_has_aliasing_icache(unsigned int arch)
/* arch specifies the register format */
switch (arch) {
case CPU_ARCH_ARMv7:
asm("mcr p15, 2, %0, c0, c0, 0 @ set CSSELR"
: /* No output operands */
: "r" (1));
set_csselr(CSSELR_ICACHE | CSSELR_L1);
isb();
asm("mrc p15, 1, %0, c0, c0, 0 @ read CCSIDR"
: "=r" (id_reg));
id_reg = read_ccsidr();
line_size = 4 << ((id_reg & 0x7) + 2);
num_sets = ((id_reg >> 13) & 0x7fff) + 1;
aliasing_icache = (line_size * num_sets) > PAGE_SIZE;
@@ -315,11 +312,12 @@ static void __init cacheid_init(void)
{
unsigned int arch = cpu_architecture();
if (arch == CPU_ARCH_ARMv7M) {
cacheid = 0;
} else if (arch >= CPU_ARCH_ARMv6) {
if (arch >= CPU_ARCH_ARMv6) {
unsigned int cachetype = read_cpuid_cachetype();
if ((cachetype & (7 << 29)) == 4 << 29) {
if ((arch == CPU_ARCH_ARMv7M) && !cachetype) {
cacheid = 0;
} else if ((cachetype & (7 << 29)) == 4 << 29) {
/* ARMv7 register format */
arch = CPU_ARCH_ARMv7;
cacheid = CACHEID_VIPT_NONALIASING;

View File

@@ -82,7 +82,7 @@ enum ipi_msg_type {
static DECLARE_COMPLETION(cpu_running);
static struct smp_operations smp_ops;
static struct smp_operations smp_ops __ro_after_init;
void __init smp_set_ops(const struct smp_operations *ops)
{

View File

@@ -17,6 +17,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/cache.h>
#include <linux/elf.h>
#include <linux/err.h>
#include <linux/kernel.h>
@@ -39,7 +40,7 @@
static struct page **vdso_text_pagelist;
/* Total number of pages needed for the data and text portions of the VDSO. */
unsigned int vdso_total_pages __read_mostly;
unsigned int vdso_total_pages __ro_after_init;
/*
* The VDSO data page.
@@ -47,13 +48,13 @@ unsigned int vdso_total_pages __read_mostly;
static union vdso_data_store vdso_data_store __page_aligned_data;
static struct vdso_data *vdso_data = &vdso_data_store.data;
static struct page *vdso_data_page;
static struct vm_special_mapping vdso_data_mapping = {
static struct page *vdso_data_page __ro_after_init;
static const struct vm_special_mapping vdso_data_mapping = {
.name = "[vvar]",
.pages = &vdso_data_page,
};
static struct vm_special_mapping vdso_text_mapping = {
static struct vm_special_mapping vdso_text_mapping __ro_after_init = {
.name = "[vdso]",
};
@@ -67,7 +68,7 @@ struct elfinfo {
/* Cached result of boot-time check for whether the arch timer exists,
* and if so, whether the virtual counter is useable.
*/
static bool cntvct_ok __read_mostly;
static bool cntvct_ok __ro_after_init;
static bool __init cntvct_functional(void)
{