Merge tag 'nds32-for-linus-4.21' of git://git.kernel.org/pub/scm/linux/kernel/git/greentime/linux

Pull nds32 updates from Greentime Hu:

 - Perf support

 - Power management support

 - FPU support

 - Hardware prefetcher support

 - Build error fixed

 - Performance enhancement

* tag 'nds32-for-linus-4.21' of git://git.kernel.org/pub/scm/linux/kernel/git/greentime/linux:
  nds32: support hardware prefetcher
  nds32: Fix the items of hwcap_str ordering issue.
  math-emu/soft-fp.h: (_FP_ROUND_ZERO) cast 0 to void to fix warning
  math-emu/op-2.h: Use statement expressions to prevent negative constant shift
  nds32: support denormalized result through FP emulator
  nds32: Support FP emulation
  nds32: nds32 FPU port
  nds32: Remove duplicated include from pm.c
  nds32: Power management for nds32
  nds32: Add document for NDS32 PMU.
  nds32: Add perf call-graph support.
  nds32: Perf porting
  nds32: Fix bug in bitfield.h
  nds32: Fix gcc 8.0 compiler option incompatible.
  nds32: Fill all TLB entries with kernel image mapping
  nds32: Remove the redundant assignment
This commit is contained in:
Linus Torvalds
2018-12-29 09:37:03 -08:00
65 changed files with 4440 additions and 88 deletions

View File

@@ -36,6 +36,7 @@ generic-y += kprobes.h
generic-y += kvm_para.h
generic-y += limits.h
generic-y += local.h
generic-y += local64.h
generic-y += mm-arch-hooks.h
generic-y += mman.h
generic-y += parport.h

View File

@@ -251,6 +251,11 @@
#define ITYPE_mskSTYPE ( 0xF << ITYPE_offSTYPE )
#define ITYPE_mskCPID ( 0x3 << ITYPE_offCPID )
/* Additional definitions of ITYPE register for FPU */
#define FPU_DISABLE_EXCEPTION (0x1 << ITYPE_offSTYPE)
#define FPU_EXCEPTION (0x2 << ITYPE_offSTYPE)
#define FPU_CPID 0 /* FPU Co-Processor ID is 0 */
#define NDS32_VECTOR_mskNONEXCEPTION 0x78
#define NDS32_VECTOR_offEXCEPTION 8
#define NDS32_VECTOR_offINTERRUPT 9
@@ -692,8 +697,8 @@
#define PFM_CTL_offKU1 13 /* Enable user mode event counting for PFMC1 */
#define PFM_CTL_offKU2 14 /* Enable user mode event counting for PFMC2 */
#define PFM_CTL_offSEL0 15 /* The event selection for PFMC0 */
#define PFM_CTL_offSEL1 21 /* The event selection for PFMC1 */
#define PFM_CTL_offSEL2 27 /* The event selection for PFMC2 */
#define PFM_CTL_offSEL1 16 /* The event selection for PFMC1 */
#define PFM_CTL_offSEL2 22 /* The event selection for PFMC2 */
/* bit 28:31 reserved */
#define PFM_CTL_mskEN0 ( 0x01 << PFM_CTL_offEN0 )
@@ -735,14 +740,20 @@
#define N13MISC_CTL_offRTP 1 /* Disable Return Target Predictor */
#define N13MISC_CTL_offPTEPF 2 /* Disable HPTWK L2 PTE pefetch */
#define N13MISC_CTL_offSP_SHADOW_EN 4 /* Enable shadow stack pointers */
#define MISC_CTL_offHWPRE 11 /* Enable HardWare PREFETCH */
/* bit 6, 9:31 reserved */
#define N13MISC_CTL_makBTB ( 0x1 << N13MISC_CTL_offBTB )
#define N13MISC_CTL_makRTP ( 0x1 << N13MISC_CTL_offRTP )
#define N13MISC_CTL_makPTEPF ( 0x1 << N13MISC_CTL_offPTEPF )
#define N13MISC_CTL_makSP_SHADOW_EN ( 0x1 << N13MISC_CTL_offSP_SHADOW_EN )
#define MISC_CTL_makHWPRE_EN ( 0x1 << MISC_CTL_offHWPRE )
#ifdef CONFIG_HW_PRE
#define MISC_init (N13MISC_CTL_makBTB|N13MISC_CTL_makRTP|N13MISC_CTL_makSP_SHADOW_EN|MISC_CTL_makHWPRE_EN)
#else
#define MISC_init (N13MISC_CTL_makBTB|N13MISC_CTL_makRTP|N13MISC_CTL_makSP_SHADOW_EN)
#endif
/******************************************************************************
* PRUSR_ACC_CTL (Privileged Resource User Access Control Registers)
@@ -926,6 +937,7 @@
#define FPCSR_mskDNIT ( 0x1 << FPCSR_offDNIT )
#define FPCSR_mskRIT ( 0x1 << FPCSR_offRIT )
#define FPCSR_mskALL (FPCSR_mskIVO | FPCSR_mskDBZ | FPCSR_mskOVF | FPCSR_mskUDF | FPCSR_mskIEX)
#define FPCSR_mskALLE_NO_UDFE (FPCSR_mskIVOE | FPCSR_mskDBZE | FPCSR_mskOVFE | FPCSR_mskIEXE)
#define FPCSR_mskALLE (FPCSR_mskIVOE | FPCSR_mskDBZE | FPCSR_mskOVFE | FPCSR_mskUDFE | FPCSR_mskIEXE)
#define FPCSR_mskALLT (FPCSR_mskIVOT | FPCSR_mskDBZT | FPCSR_mskOVFT | FPCSR_mskUDFT | FPCSR_mskIEXT |FPCSR_mskDNIT | FPCSR_mskRIT)
@@ -946,6 +958,15 @@
#define FPCFG_mskIMVER ( 0x1F << FPCFG_offIMVER )
#define FPCFG_mskAVER ( 0x1F << FPCFG_offAVER )
/* 8 Single precision or 4 double precision registers are available */
#define SP8_DP4_reg 0
/* 16 Single precision or 8 double precision registers are available */
#define SP16_DP8_reg 1
/* 32 Single precision or 16 double precision registers are available */
#define SP32_DP16_reg 2
/* 32 Single precision or 32 double precision registers are available */
#define SP32_DP32_reg 3
/******************************************************************************
* fucpr: FUCOP_CTL (FPU and Coprocessor Enable Control Register)
*****************************************************************************/

View File

@@ -9,6 +9,7 @@
*/
#include <asm/ptrace.h>
#include <asm/fpu.h>
typedef unsigned long elf_greg_t;
typedef unsigned long elf_freg_t[3];
@@ -159,8 +160,18 @@ struct elf32_hdr;
#endif
#if IS_ENABLED(CONFIG_FPU)
#define FPU_AUX_ENT NEW_AUX_ENT(AT_FPUCW, FPCSR_INIT)
#else
#define FPU_AUX_ENT NEW_AUX_ENT(AT_IGNORE, 0)
#endif
#define ARCH_DLINFO \
do { \
/* Optional FPU initialization */ \
FPU_AUX_ENT; \
\
NEW_AUX_ENT(AT_SYSINFO_EHDR, \
(elf_addr_t)current->mm->context.vdso); \
} while (0)

View File

@@ -0,0 +1,126 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (C) 2005-2018 Andes Technology Corporation */
#ifndef __ASM_NDS32_FPU_H
#define __ASM_NDS32_FPU_H
#if IS_ENABLED(CONFIG_FPU)
#ifndef __ASSEMBLY__
#include <linux/sched/task_stack.h>
#include <linux/preempt.h>
#include <asm/ptrace.h>
extern bool has_fpu;
extern void save_fpu(struct task_struct *__tsk);
extern void load_fpu(const struct fpu_struct *fpregs);
extern bool do_fpu_exception(unsigned int subtype, struct pt_regs *regs);
extern int do_fpuemu(struct pt_regs *regs, struct fpu_struct *fpu);
#define test_tsk_fpu(regs) (regs->fucop_ctl & FUCOP_CTL_mskCP0EN)
/*
* Initially load the FPU with signalling NANS. This bit pattern
* has the property that no matter whether considered as single or as
* double precision, it still represents a signalling NAN.
*/
#define sNAN64 0xFFFFFFFFFFFFFFFFULL
#define sNAN32 0xFFFFFFFFUL
#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC)
/*
* Denormalized number is unsupported by nds32 FPU. Hence the operation
* is treated as underflow cases when the final result is a denormalized
* number. To enhance precision, underflow exception trap should be
* enabled by default and kerenl will re-execute it by fpu emulator
* when getting underflow exception.
*/
#define FPCSR_INIT FPCSR_mskUDFE
#else
#define FPCSR_INIT 0x0UL
#endif
extern const struct fpu_struct init_fpuregs;
static inline void disable_ptreg_fpu(struct pt_regs *regs)
{
regs->fucop_ctl &= ~FUCOP_CTL_mskCP0EN;
}
static inline void enable_ptreg_fpu(struct pt_regs *regs)
{
regs->fucop_ctl |= FUCOP_CTL_mskCP0EN;
}
static inline void enable_fpu(void)
{
unsigned long fucop_ctl;
fucop_ctl = __nds32__mfsr(NDS32_SR_FUCOP_CTL) | FUCOP_CTL_mskCP0EN;
__nds32__mtsr(fucop_ctl, NDS32_SR_FUCOP_CTL);
__nds32__isb();
}
static inline void disable_fpu(void)
{
unsigned long fucop_ctl;
fucop_ctl = __nds32__mfsr(NDS32_SR_FUCOP_CTL) & ~FUCOP_CTL_mskCP0EN;
__nds32__mtsr(fucop_ctl, NDS32_SR_FUCOP_CTL);
__nds32__isb();
}
static inline void lose_fpu(void)
{
preempt_disable();
#if IS_ENABLED(CONFIG_LAZY_FPU)
if (last_task_used_math == current) {
last_task_used_math = NULL;
#else
if (test_tsk_fpu(task_pt_regs(current))) {
#endif
save_fpu(current);
}
disable_ptreg_fpu(task_pt_regs(current));
preempt_enable();
}
static inline void own_fpu(void)
{
preempt_disable();
#if IS_ENABLED(CONFIG_LAZY_FPU)
if (last_task_used_math != current) {
if (last_task_used_math != NULL)
save_fpu(last_task_used_math);
load_fpu(&current->thread.fpu);
last_task_used_math = current;
}
#else
if (!test_tsk_fpu(task_pt_regs(current))) {
load_fpu(&current->thread.fpu);
}
#endif
enable_ptreg_fpu(task_pt_regs(current));
preempt_enable();
}
#if !IS_ENABLED(CONFIG_LAZY_FPU)
static inline void unlazy_fpu(struct task_struct *tsk)
{
preempt_disable();
if (test_tsk_fpu(task_pt_regs(tsk)))
save_fpu(tsk);
preempt_enable();
}
#endif /* !CONFIG_LAZY_FPU */
static inline void clear_fpu(struct pt_regs *regs)
{
preempt_disable();
if (test_tsk_fpu(regs))
disable_ptreg_fpu(regs);
preempt_enable();
}
#endif /* CONFIG_FPU */
#endif /* __ASSEMBLY__ */
#endif /* __ASM_NDS32_FPU_H */

View File

@@ -0,0 +1,32 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (C) 2005-2018 Andes Technology Corporation */
#ifndef __ARCH_NDS32_FPUEMU_H
#define __ARCH_NDS32_FPUEMU_H
/*
* single precision
*/
void fadds(void *ft, void *fa, void *fb);
void fsubs(void *ft, void *fa, void *fb);
void fmuls(void *ft, void *fa, void *fb);
void fdivs(void *ft, void *fa, void *fb);
void fs2d(void *ft, void *fa);
void fsqrts(void *ft, void *fa);
void fnegs(void *ft, void *fa);
int fcmps(void *ft, void *fa, void *fb, int cop);
/*
* double precision
*/
void faddd(void *ft, void *fa, void *fb);
void fsubd(void *ft, void *fa, void *fb);
void fmuld(void *ft, void *fa, void *fb);
void fdivd(void *ft, void *fa, void *fb);
void fsqrtd(void *ft, void *fa);
void fd2s(void *ft, void *fa);
void fnegd(void *ft, void *fa);
int fcmpd(void *ft, void *fa, void *fb, int cop);
#endif /* __ARCH_NDS32_FPUEMU_H */

View File

@@ -0,0 +1,109 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (C) 2005-2018 Andes Technology Corporation */
#ifndef __NDS32_FPU_INST_H
#define __NDS32_FPU_INST_H
#define cop0_op 0x35
/*
* COP0 field of opcodes.
*/
#define fs1_op 0x0
#define fs2_op 0x4
#define fd1_op 0x8
#define fd2_op 0xc
/*
* FS1 opcode.
*/
enum fs1 {
fadds_op, fsubs_op, fcpynss_op, fcpyss_op,
fmadds_op, fmsubs_op, fcmovns_op, fcmovzs_op,
fnmadds_op, fnmsubs_op,
fmuls_op = 0xc, fdivs_op,
fs1_f2op_op = 0xf
};
/*
* FS1/F2OP opcode.
*/
enum fs1_f2 {
fs2d_op, fsqrts_op,
fui2s_op = 0x8, fsi2s_op = 0xc,
fs2ui_op = 0x10, fs2ui_z_op = 0x14,
fs2si_op = 0x18, fs2si_z_op = 0x1c
};
/*
* FS2 opcode.
*/
enum fs2 {
fcmpeqs_op, fcmpeqs_e_op, fcmplts_op, fcmplts_e_op,
fcmples_op, fcmples_e_op, fcmpuns_op, fcmpuns_e_op
};
/*
* FD1 opcode.
*/
enum fd1 {
faddd_op, fsubd_op, fcpynsd_op, fcpysd_op,
fmaddd_op, fmsubd_op, fcmovnd_op, fcmovzd_op,
fnmaddd_op, fnmsubd_op,
fmuld_op = 0xc, fdivd_op, fd1_f2op_op = 0xf
};
/*
* FD1/F2OP opcode.
*/
enum fd1_f2 {
fd2s_op, fsqrtd_op,
fui2d_op = 0x8, fsi2d_op = 0xc,
fd2ui_op = 0x10, fd2ui_z_op = 0x14,
fd2si_op = 0x18, fd2si_z_op = 0x1c
};
/*
* FD2 opcode.
*/
enum fd2 {
fcmpeqd_op, fcmpeqd_e_op, fcmpltd_op, fcmpltd_e_op,
fcmpled_op, fcmpled_e_op, fcmpund_op, fcmpund_e_op
};
#define NDS32Insn(x) x
#define I_OPCODE_off 25
#define NDS32Insn_OPCODE(x) (NDS32Insn(x) >> I_OPCODE_off)
#define I_OPCODE_offRt 20
#define I_OPCODE_mskRt (0x1fUL << I_OPCODE_offRt)
#define NDS32Insn_OPCODE_Rt(x) \
((NDS32Insn(x) & I_OPCODE_mskRt) >> I_OPCODE_offRt)
#define I_OPCODE_offRa 15
#define I_OPCODE_mskRa (0x1fUL << I_OPCODE_offRa)
#define NDS32Insn_OPCODE_Ra(x) \
((NDS32Insn(x) & I_OPCODE_mskRa) >> I_OPCODE_offRa)
#define I_OPCODE_offRb 10
#define I_OPCODE_mskRb (0x1fUL << I_OPCODE_offRb)
#define NDS32Insn_OPCODE_Rb(x) \
((NDS32Insn(x) & I_OPCODE_mskRb) >> I_OPCODE_offRb)
#define I_OPCODE_offbit1014 10
#define I_OPCODE_mskbit1014 (0x1fUL << I_OPCODE_offbit1014)
#define NDS32Insn_OPCODE_BIT1014(x) \
((NDS32Insn(x) & I_OPCODE_mskbit1014) >> I_OPCODE_offbit1014)
#define I_OPCODE_offbit69 6
#define I_OPCODE_mskbit69 (0xfUL << I_OPCODE_offbit69)
#define NDS32Insn_OPCODE_BIT69(x) \
((NDS32Insn(x) & I_OPCODE_mskbit69) >> I_OPCODE_offbit69)
#define I_OPCODE_offCOP0 0
#define I_OPCODE_mskCOP0 (0x3fUL << I_OPCODE_offCOP0)
#define NDS32Insn_OPCODE_COP0(x) \
((NDS32Insn(x) & I_OPCODE_mskCOP0) >> I_OPCODE_offCOP0)
#endif /* __NDS32_FPU_INST_H */

View File

@@ -0,0 +1,16 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (C) 2008-2018 Andes Technology Corporation */
#ifndef __ASM_PERF_EVENT_H
#define __ASM_PERF_EVENT_H
/*
* This file is request by Perf,
* please refer to tools/perf/design.txt for more details
*/
struct pt_regs;
unsigned long perf_instruction_pointer(struct pt_regs *regs);
unsigned long perf_misc_flags(struct pt_regs *regs);
#define perf_misc_flags(regs) perf_misc_flags(regs)
#endif

View File

@@ -0,0 +1,386 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (C) 2008-2018 Andes Technology Corporation */
#ifndef __ASM_PMU_H
#define __ASM_PMU_H
#include <linux/interrupt.h>
#include <linux/perf_event.h>
#include <asm/unistd.h>
#include <asm/bitfield.h>
/* Has special meaning for perf core implementation */
#define HW_OP_UNSUPPORTED 0x0
#define C(_x) PERF_COUNT_HW_CACHE_##_x
#define CACHE_OP_UNSUPPORTED 0x0
/* Enough for both software and hardware defined events */
#define SOFTWARE_EVENT_MASK 0xFF
#define PFM_OFFSET_MAGIC_0 2 /* DO NOT START FROM 0 */
#define PFM_OFFSET_MAGIC_1 (PFM_OFFSET_MAGIC_0 + 36)
#define PFM_OFFSET_MAGIC_2 (PFM_OFFSET_MAGIC_1 + 36)
enum { PFMC0, PFMC1, PFMC2, MAX_COUNTERS };
u32 PFM_CTL_OVF[3] = { PFM_CTL_mskOVF0, PFM_CTL_mskOVF1,
PFM_CTL_mskOVF2 };
u32 PFM_CTL_EN[3] = { PFM_CTL_mskEN0, PFM_CTL_mskEN1,
PFM_CTL_mskEN2 };
u32 PFM_CTL_OFFSEL[3] = { PFM_CTL_offSEL0, PFM_CTL_offSEL1,
PFM_CTL_offSEL2 };
u32 PFM_CTL_IE[3] = { PFM_CTL_mskIE0, PFM_CTL_mskIE1, PFM_CTL_mskIE2 };
u32 PFM_CTL_KS[3] = { PFM_CTL_mskKS0, PFM_CTL_mskKS1, PFM_CTL_mskKS2 };
u32 PFM_CTL_KU[3] = { PFM_CTL_mskKU0, PFM_CTL_mskKU1, PFM_CTL_mskKU2 };
u32 PFM_CTL_SEL[3] = { PFM_CTL_mskSEL0, PFM_CTL_mskSEL1, PFM_CTL_mskSEL2 };
/*
* Perf Events' indices
*/
#define NDS32_IDX_CYCLE_COUNTER 0
#define NDS32_IDX_COUNTER0 1
#define NDS32_IDX_COUNTER1 2
/* The events for a given PMU register set. */
struct pmu_hw_events {
/*
* The events that are active on the PMU for the given index.
*/
struct perf_event *events[MAX_COUNTERS];
/*
* A 1 bit for an index indicates that the counter is being used for
* an event. A 0 means that the counter can be used.
*/
unsigned long used_mask[BITS_TO_LONGS(MAX_COUNTERS)];
/*
* Hardware lock to serialize accesses to PMU registers. Needed for the
* read/modify/write sequences.
*/
raw_spinlock_t pmu_lock;
};
struct nds32_pmu {
struct pmu pmu;
cpumask_t active_irqs;
char *name;
irqreturn_t (*handle_irq)(int irq_num, void *dev);
void (*enable)(struct perf_event *event);
void (*disable)(struct perf_event *event);
int (*get_event_idx)(struct pmu_hw_events *hw_events,
struct perf_event *event);
int (*set_event_filter)(struct hw_perf_event *evt,
struct perf_event_attr *attr);
u32 (*read_counter)(struct perf_event *event);
void (*write_counter)(struct perf_event *event, u32 val);
void (*start)(struct nds32_pmu *nds32_pmu);
void (*stop)(struct nds32_pmu *nds32_pmu);
void (*reset)(void *data);
int (*request_irq)(struct nds32_pmu *nds32_pmu, irq_handler_t handler);
void (*free_irq)(struct nds32_pmu *nds32_pmu);
int (*map_event)(struct perf_event *event);
int num_events;
atomic_t active_events;
u64 max_period;
struct platform_device *plat_device;
struct pmu_hw_events *(*get_hw_events)(void);
};
#define to_nds32_pmu(p) (container_of(p, struct nds32_pmu, pmu))
int nds32_pmu_register(struct nds32_pmu *nds32_pmu, int type);
u64 nds32_pmu_event_update(struct perf_event *event);
int nds32_pmu_event_set_period(struct perf_event *event);
/*
* Common NDS32 SPAv3 event types
*
* Note: An implementation may not be able to count all of these events
* but the encodings are considered to be `reserved' in the case that
* they are not available.
*
* SEL_TOTAL_CYCLES will add an offset is due to ZERO is defined as
* NOT_SUPPORTED EVENT mapping in generic perf code.
* You will need to deal it in the event writing implementation.
*/
enum spav3_counter_0_perf_types {
SPAV3_0_SEL_BASE = -1 + PFM_OFFSET_MAGIC_0, /* counting symbol */
SPAV3_0_SEL_TOTAL_CYCLES = 0 + PFM_OFFSET_MAGIC_0,
SPAV3_0_SEL_COMPLETED_INSTRUCTION = 1 + PFM_OFFSET_MAGIC_0,
SPAV3_0_SEL_LAST /* counting symbol */
};
enum spav3_counter_1_perf_types {
SPAV3_1_SEL_BASE = -1 + PFM_OFFSET_MAGIC_1, /* counting symbol */
SPAV3_1_SEL_TOTAL_CYCLES = 0 + PFM_OFFSET_MAGIC_1,
SPAV3_1_SEL_COMPLETED_INSTRUCTION = 1 + PFM_OFFSET_MAGIC_1,
SPAV3_1_SEL_CONDITIONAL_BRANCH = 2 + PFM_OFFSET_MAGIC_1,
SPAV3_1_SEL_TAKEN_CONDITIONAL_BRANCH = 3 + PFM_OFFSET_MAGIC_1,
SPAV3_1_SEL_PREFETCH_INSTRUCTION = 4 + PFM_OFFSET_MAGIC_1,
SPAV3_1_SEL_RET_INST = 5 + PFM_OFFSET_MAGIC_1,
SPAV3_1_SEL_JR_INST = 6 + PFM_OFFSET_MAGIC_1,
SPAV3_1_SEL_JAL_JRAL_INST = 7 + PFM_OFFSET_MAGIC_1,
SPAV3_1_SEL_NOP_INST = 8 + PFM_OFFSET_MAGIC_1,
SPAV3_1_SEL_SCW_INST = 9 + PFM_OFFSET_MAGIC_1,
SPAV3_1_SEL_ISB_DSB_INST = 10 + PFM_OFFSET_MAGIC_1,
SPAV3_1_SEL_CCTL_INST = 11 + PFM_OFFSET_MAGIC_1,
SPAV3_1_SEL_TAKEN_INTERRUPTS = 12 + PFM_OFFSET_MAGIC_1,
SPAV3_1_SEL_LOADS_COMPLETED = 13 + PFM_OFFSET_MAGIC_1,
SPAV3_1_SEL_UITLB_ACCESS = 14 + PFM_OFFSET_MAGIC_1,
SPAV3_1_SEL_UDTLB_ACCESS = 15 + PFM_OFFSET_MAGIC_1,
SPAV3_1_SEL_MTLB_ACCESS = 16 + PFM_OFFSET_MAGIC_1,
SPAV3_1_SEL_CODE_CACHE_ACCESS = 17 + PFM_OFFSET_MAGIC_1,
SPAV3_1_SEL_DATA_DEPENDENCY_STALL_CYCLES = 18 + PFM_OFFSET_MAGIC_1,
SPAV3_1_SEL_DATA_CACHE_MISS_STALL_CYCLES = 19 + PFM_OFFSET_MAGIC_1,
SPAV3_1_SEL_DATA_CACHE_ACCESS = 20 + PFM_OFFSET_MAGIC_1,
SPAV3_1_SEL_DATA_CACHE_MISS = 21 + PFM_OFFSET_MAGIC_1,
SPAV3_1_SEL_LOAD_DATA_CACHE_ACCESS = 22 + PFM_OFFSET_MAGIC_1,
SPAV3_1_SEL_STORE_DATA_CACHE_ACCESS = 23 + PFM_OFFSET_MAGIC_1,
SPAV3_1_SEL_ILM_ACCESS = 24 + PFM_OFFSET_MAGIC_1,
SPAV3_1_SEL_LSU_BIU_CYCLES = 25 + PFM_OFFSET_MAGIC_1,
SPAV3_1_SEL_HPTWK_BIU_CYCLES = 26 + PFM_OFFSET_MAGIC_1,
SPAV3_1_SEL_DMA_BIU_CYCLES = 27 + PFM_OFFSET_MAGIC_1,
SPAV3_1_SEL_CODE_CACHE_FILL_BIU_CYCLES = 28 + PFM_OFFSET_MAGIC_1,
SPAV3_1_SEL_LEGAL_UNALIGN_DCACHE_ACCESS = 29 + PFM_OFFSET_MAGIC_1,
SPAV3_1_SEL_PUSH25 = 30 + PFM_OFFSET_MAGIC_1,
SPAV3_1_SEL_SYSCALLS_INST = 31 + PFM_OFFSET_MAGIC_1,
SPAV3_1_SEL_LAST /* counting symbol */
};
enum spav3_counter_2_perf_types {
SPAV3_2_SEL_BASE = -1 + PFM_OFFSET_MAGIC_2, /* counting symbol */
SPAV3_2_SEL_TOTAL_CYCLES = 0 + PFM_OFFSET_MAGIC_2,
SPAV3_2_SEL_COMPLETED_INSTRUCTION = 1 + PFM_OFFSET_MAGIC_2,
SPAV3_2_SEL_CONDITIONAL_BRANCH_MISPREDICT = 2 + PFM_OFFSET_MAGIC_2,
SPAV3_2_SEL_TAKEN_CONDITIONAL_BRANCH_MISPREDICT =
3 + PFM_OFFSET_MAGIC_2,
SPAV3_2_SEL_PREFETCH_INSTRUCTION_CACHE_HIT = 4 + PFM_OFFSET_MAGIC_2,
SPAV3_1_SEL_RET_MISPREDICT = 5 + PFM_OFFSET_MAGIC_2,
SPAV3_1_SEL_IMMEDIATE_J_INST = 6 + PFM_OFFSET_MAGIC_2,
SPAV3_1_SEL_MULTIPLY_INST = 7 + PFM_OFFSET_MAGIC_2,
SPAV3_1_SEL_16_BIT_INST = 8 + PFM_OFFSET_MAGIC_2,
SPAV3_1_SEL_FAILED_SCW_INST = 9 + PFM_OFFSET_MAGIC_2,
SPAV3_1_SEL_LD_AFTER_ST_CONFLICT_REPLAYS = 10 + PFM_OFFSET_MAGIC_2,
SPAV3_1_SEL_TAKEN_EXCEPTIONS = 12 + PFM_OFFSET_MAGIC_2,
SPAV3_1_SEL_STORES_COMPLETED = 13 + PFM_OFFSET_MAGIC_2,
SPAV3_2_SEL_UITLB_MISS = 14 + PFM_OFFSET_MAGIC_2,
SPAV3_2_SEL_UDTLB_MISS = 15 + PFM_OFFSET_MAGIC_2,
SPAV3_2_SEL_MTLB_MISS = 16 + PFM_OFFSET_MAGIC_2,
SPAV3_2_SEL_CODE_CACHE_MISS = 17 + PFM_OFFSET_MAGIC_2,
SPAV3_1_SEL_EMPTY_INST_QUEUE_STALL_CYCLES = 18 + PFM_OFFSET_MAGIC_2,
SPAV3_1_SEL_DATA_WRITE_BACK = 19 + PFM_OFFSET_MAGIC_2,
SPAV3_2_SEL_DATA_CACHE_MISS = 21 + PFM_OFFSET_MAGIC_2,
SPAV3_2_SEL_LOAD_DATA_CACHE_MISS = 22 + PFM_OFFSET_MAGIC_2,
SPAV3_2_SEL_STORE_DATA_CACHE_MISS = 23 + PFM_OFFSET_MAGIC_2,
SPAV3_1_SEL_DLM_ACCESS = 24 + PFM_OFFSET_MAGIC_2,
SPAV3_1_SEL_LSU_BIU_REQUEST = 25 + PFM_OFFSET_MAGIC_2,
SPAV3_1_SEL_HPTWK_BIU_REQUEST = 26 + PFM_OFFSET_MAGIC_2,
SPAV3_1_SEL_DMA_BIU_REQUEST = 27 + PFM_OFFSET_MAGIC_2,
SPAV3_1_SEL_CODE_CACHE_FILL_BIU_REQUEST = 28 + PFM_OFFSET_MAGIC_2,
SPAV3_1_SEL_EXTERNAL_EVENTS = 29 + PFM_OFFSET_MAGIC_2,
SPAV3_1_SEL_POP25 = 30 + PFM_OFFSET_MAGIC_2,
SPAV3_2_SEL_LAST /* counting symbol */
};
/* Get converted event counter index */
static inline int get_converted_event_idx(unsigned long event)
{
int idx;
if ((event) > SPAV3_0_SEL_BASE && event < SPAV3_0_SEL_LAST) {
idx = 0;
} else if ((event) > SPAV3_1_SEL_BASE && event < SPAV3_1_SEL_LAST) {
idx = 1;
} else if ((event) > SPAV3_2_SEL_BASE && event < SPAV3_2_SEL_LAST) {
idx = 2;
} else {
pr_err("GET_CONVERTED_EVENT_IDX PFM counter range error\n");
return -EPERM;
}
return idx;
}
/* Get converted hardware event number */
static inline u32 get_converted_evet_hw_num(u32 event)
{
if (event > SPAV3_0_SEL_BASE && event < SPAV3_0_SEL_LAST)
event -= PFM_OFFSET_MAGIC_0;
else if (event > SPAV3_1_SEL_BASE && event < SPAV3_1_SEL_LAST)
event -= PFM_OFFSET_MAGIC_1;
else if (event > SPAV3_2_SEL_BASE && event < SPAV3_2_SEL_LAST)
event -= PFM_OFFSET_MAGIC_2;
else if (event != 0)
pr_err("GET_CONVERTED_EVENT_HW_NUM PFM counter range error\n");
return event;
}
/*
* NDS32 HW events mapping
*
* The hardware events that we support. We do support cache operations but
* we have harvard caches and no way to combine instruction and data
* accesses/misses in hardware.
*/
static const unsigned int nds32_pfm_perf_map[PERF_COUNT_HW_MAX] = {
[PERF_COUNT_HW_CPU_CYCLES] = SPAV3_0_SEL_TOTAL_CYCLES,
[PERF_COUNT_HW_INSTRUCTIONS] = SPAV3_1_SEL_COMPLETED_INSTRUCTION,
[PERF_COUNT_HW_CACHE_REFERENCES] = SPAV3_1_SEL_DATA_CACHE_ACCESS,
[PERF_COUNT_HW_CACHE_MISSES] = SPAV3_2_SEL_DATA_CACHE_MISS,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_BRANCH_MISSES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_REF_CPU_CYCLES] = HW_OP_UNSUPPORTED
};
static const unsigned int nds32_pfm_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
[C(L1D)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] =
SPAV3_1_SEL_LOAD_DATA_CACHE_ACCESS,
[C(RESULT_MISS)] =
SPAV3_2_SEL_LOAD_DATA_CACHE_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] =
SPAV3_1_SEL_STORE_DATA_CACHE_ACCESS,
[C(RESULT_MISS)] =
SPAV3_2_SEL_STORE_DATA_CACHE_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] =
CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] =
CACHE_OP_UNSUPPORTED,
},
},
[C(L1I)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] =
SPAV3_1_SEL_CODE_CACHE_ACCESS,
[C(RESULT_MISS)] =
SPAV3_2_SEL_CODE_CACHE_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] =
SPAV3_1_SEL_CODE_CACHE_ACCESS,
[C(RESULT_MISS)] =
SPAV3_2_SEL_CODE_CACHE_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] =
CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
/* TODO: L2CC */
[C(LL)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] =
CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
/* NDS32 PMU does not support TLB read/write hit/miss,
* However, it can count access/miss, which mixed with read and write.
* Therefore, only READ counter will use it.
* We do as possible as we can.
*/
[C(DTLB)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] =
SPAV3_1_SEL_UDTLB_ACCESS,
[C(RESULT_MISS)] =
SPAV3_2_SEL_UDTLB_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] =
CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] =
CACHE_OP_UNSUPPORTED,
},
},
[C(ITLB)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] =
SPAV3_1_SEL_UITLB_ACCESS,
[C(RESULT_MISS)] =
SPAV3_2_SEL_UITLB_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] =
CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] =
CACHE_OP_UNSUPPORTED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] =
CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] =
CACHE_OP_UNSUPPORTED,
},
},
[C(BPU)] = { /* What is BPU? */
[C(OP_READ)] = {
[C(RESULT_ACCESS)] =
CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] =
CACHE_OP_UNSUPPORTED,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] =
CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] =
CACHE_OP_UNSUPPORTED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] =
CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] =
CACHE_OP_UNSUPPORTED,
},
},
[C(NODE)] = { /* What is NODE? */
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] =
CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] =
CACHE_OP_UNSUPPORTED,
},
},
};
int nds32_pmu_map_event(struct perf_event *event,
const unsigned int (*event_map)[PERF_COUNT_HW_MAX],
const unsigned int (*cache_map)[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX], u32 raw_event_mask);
#endif /* __ASM_PMU_H */

View File

@@ -35,6 +35,8 @@ struct thread_struct {
unsigned long address;
unsigned long trap_no;
unsigned long error_code;
struct fpu_struct fpu;
};
#define INIT_THREAD { }
@@ -72,6 +74,11 @@ struct task_struct;
/* Free all resources held by a thread. */
#define release_thread(thread) do { } while(0)
#if IS_ENABLED(CONFIG_FPU)
#if !IS_ENABLED(CONFIG_UNLAZU_FPU)
extern struct task_struct *last_task_used_math;
#endif
#endif
/* Prepare to copy thread state - unlazy all lazy status */
#define prepare_to_copy(tsk) do { } while (0)

View File

@@ -0,0 +1,158 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (C) 2005-2018 Andes Technology Corporation */
#include <asm/bitfield.h>
#define _FP_W_TYPE_SIZE 32
#define _FP_W_TYPE unsigned long
#define _FP_WS_TYPE signed long
#define _FP_I_TYPE long
#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
#define _FP_MUL_MEAT_S(R, X, Y) \
_FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S, R, X, Y, umul_ppmm)
#define _FP_MUL_MEAT_D(R, X, Y) \
_FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D, R, X, Y, umul_ppmm)
#define _FP_MUL_MEAT_Q(R, X, Y) \
_FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q, R, X, Y, umul_ppmm)
#define _FP_MUL_MEAT_DW_S(R, X, Y) \
_FP_MUL_MEAT_DW_1_wide(_FP_WFRACBITS_S, R, X, Y, umul_ppmm)
#define _FP_MUL_MEAT_DW_D(R, X, Y) \
_FP_MUL_MEAT_DW_2_wide(_FP_WFRACBITS_D, R, X, Y, umul_ppmm)
#define _FP_DIV_MEAT_S(R, X, Y) _FP_DIV_MEAT_1_udiv_norm(S, R, X, Y)
#define _FP_DIV_MEAT_D(R, X, Y) _FP_DIV_MEAT_2_udiv(D, R, X, Y)
#define _FP_NANFRAC_S ((_FP_QNANBIT_S << 1) - 1)
#define _FP_NANFRAC_D ((_FP_QNANBIT_D << 1) - 1), -1
#define _FP_NANFRAC_Q ((_FP_QNANBIT_Q << 1) - 1), -1, -1, -1
#define _FP_NANSIGN_S 0
#define _FP_NANSIGN_D 0
#define _FP_NANSIGN_Q 0
#define _FP_KEEPNANFRACP 1
#define _FP_QNANNEGATEDP 0
#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP) \
do { \
if ((_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs) \
&& !(_FP_FRAC_HIGH_RAW_##fs(Y) & _FP_QNANBIT_##fs)) { \
R##_s = Y##_s; \
_FP_FRAC_COPY_##wc(R, Y); \
} else { \
R##_s = X##_s; \
_FP_FRAC_COPY_##wc(R, X); \
} \
R##_c = FP_CLS_NAN; \
} while (0)
#define __FPU_FPCSR (current->thread.fpu.fpcsr)
/* Obtain the current rounding mode. */
#define FP_ROUNDMODE \
({ \
__FPU_FPCSR & FPCSR_mskRM; \
})
#define FP_RND_NEAREST 0
#define FP_RND_PINF 1
#define FP_RND_MINF 2
#define FP_RND_ZERO 3
#define FP_EX_INVALID FPCSR_mskIVO
#define FP_EX_DIVZERO FPCSR_mskDBZ
#define FP_EX_OVERFLOW FPCSR_mskOVF
#define FP_EX_UNDERFLOW FPCSR_mskUDF
#define FP_EX_INEXACT FPCSR_mskIEX
#define SF_CEQ 2
#define SF_CLT 1
#define SF_CGT 3
#define SF_CUN 4
#include <asm/byteorder.h>
#ifdef __BIG_ENDIAN__
#define __BYTE_ORDER __BIG_ENDIAN
#define __LITTLE_ENDIAN 0
#else
#define __BYTE_ORDER __LITTLE_ENDIAN
#define __BIG_ENDIAN 0
#endif
#define abort() do { } while (0)
#define umul_ppmm(w1, w0, u, v) \
do { \
UWtype __x0, __x1, __x2, __x3; \
UHWtype __ul, __vl, __uh, __vh; \
\
__ul = __ll_lowpart(u); \
__uh = __ll_highpart(u); \
__vl = __ll_lowpart(v); \
__vh = __ll_highpart(v); \
\
__x0 = (UWtype) __ul * __vl; \
__x1 = (UWtype) __ul * __vh; \
__x2 = (UWtype) __uh * __vl; \
__x3 = (UWtype) __uh * __vh; \
\
__x1 += __ll_highpart(__x0); \
__x1 += __x2; \
if (__x1 < __x2) \
__x3 += __ll_B; \
\
(w1) = __x3 + __ll_highpart(__x1); \
(w0) = __ll_lowpart(__x1) * __ll_B + __ll_lowpart(__x0); \
} while (0)
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
do { \
UWtype __x; \
__x = (al) + (bl); \
(sh) = (ah) + (bh) + (__x < (al)); \
(sl) = __x; \
} while (0)
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
do { \
UWtype __x; \
__x = (al) - (bl); \
(sh) = (ah) - (bh) - (__x > (al)); \
(sl) = __x; \
} while (0)
#define udiv_qrnnd(q, r, n1, n0, d) \
do { \
UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \
__d1 = __ll_highpart(d); \
__d0 = __ll_lowpart(d); \
\
__r1 = (n1) % __d1; \
__q1 = (n1) / __d1; \
__m = (UWtype) __q1 * __d0; \
__r1 = __r1 * __ll_B | __ll_highpart(n0); \
if (__r1 < __m) { \
__q1--, __r1 += (d); \
if (__r1 >= (d)) \
if (__r1 < __m) \
__q1--, __r1 += (d); \
} \
__r1 -= __m; \
__r0 = __r1 % __d1; \
__q0 = __r1 / __d1; \
__m = (UWtype) __q0 * __d0; \
__r0 = __r0 * __ll_B | __ll_lowpart(n0); \
if (__r0 < __m) { \
__q0--, __r0 += (d); \
if (__r0 >= (d)) \
if (__r0 < __m) \
__q0--, __r0 += (d); \
} \
__r0 -= __m; \
(q) = (UWtype) __q1 * __ll_B | __q0; \
(r) = __r0; \
} while (0)

View File

@@ -0,0 +1,39 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (C) 2008-2018 Andes Technology Corporation */
#ifndef __ASM_STACKTRACE_H
#define __ASM_STACKTRACE_H
/* Kernel callchain */
struct stackframe {
unsigned long fp;
unsigned long sp;
unsigned long lp;
};
/*
* struct frame_tail: User callchain
* IMPORTANT:
* This struct is used for call-stack walking,
* the order and types matters.
* Do not use array, it only stores sizeof(pointer)
*
* The details can refer to arch/arm/kernel/perf_event.c
*/
struct frame_tail {
unsigned long stack_fp;
unsigned long stack_lp;
};
/* For User callchain with optimize for size */
struct frame_tail_opt_size {
unsigned long stack_r6;
unsigned long stack_fp;
unsigned long stack_gp;
unsigned long stack_lp;
};
extern void
get_real_ret_addr(unsigned long *addr, struct task_struct *tsk, int *graph);
#endif /* __ASM_STACKTRACE_H */

View File

@@ -0,0 +1,11 @@
/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (C) 2008-2017 Andes Technology Corporation
#ifndef __ASM_NDS32_SUSPEND_H
#define __ASM_NDS32_SUSPEND_H
extern void suspend2ram(void);
extern void cpu_resume(void);
extern unsigned long wake_mask;
#endif

View File

@@ -7,6 +7,7 @@
asmlinkage long sys_cacheflush(unsigned long addr, unsigned long len, unsigned int op);
asmlinkage long sys_fadvise64_64_wrapper(int fd, int advice, loff_t offset, loff_t len);
asmlinkage long sys_rt_sigreturn_wrapper(void);
asmlinkage long sys_udftrap(int option);
#include <asm-generic/syscalls.h>