Files
android_kernel_xiaomi_sm8450/arch/x86/include/asm/smp.h
Peter Zijlstra 9ed7d75b2f x86/percpu: Relax smp_processor_id()
Nadav reported that since this_cpu_read() became asm-volatile, many
smp_processor_id() users generated worse code due to the extra
constraints.

However since smp_processor_id() is reading a stable value, we can use
__this_cpu_read().

While this does reduce text size somewhat, this mostly results in code
movement to .text.unlikely as a result of more/larger .cold.
subfunctions. Less text on the hotpath is good for I$.

  $ ./compare.sh defconfig-build1 defconfig-build2 vmlinux.o
  setup_APIC_ibs                                             90         98   -12,+20
  force_ibs_eilvt_setup                                     400        413   -57,+70
  pci_serr_error                                            109        104   -54,+49
  pci_serr_error                                            109        104   -54,+49
  unknown_nmi_error                                         125        120   -76,+71
  unknown_nmi_error                                         125        120   -76,+71
  io_check_error                                            125        132   -97,+104
  intel_thermal_interrupt                                   730        822   +92,+0
  intel_init_thermal                                        951        945   -6,+0
  generic_get_mtrr                                          301        294   -7,+0
  generic_get_mtrr                                          301        294   -7,+0
  generic_set_all                                           749        754   -44,+49
  get_fixed_ranges                                          352        360   -41,+49
  x86_acpi_suspend_lowlevel                                 369        363   -6,+0
  check_tsc_sync_source                                     412        412   -71,+71
  irq_migrate_all_off_this_cpu                              662        674   -14,+26
  clocksource_watchdog                                      748        748   -113,+113
  __perf_event_account_interrupt                            204        197   -7,+0
  attempt_merge                                            1748       1741   -7,+0
  intel_guc_send_ct                                        1424       1409   -15,+0
  __fini_doorbell                                           235        231   -4,+0
  bdw_set_cdclk                                             928        923   -5,+0
  gen11_dsi_disable                                        1571       1556   -15,+0
  gmbus_wait                                                493        488   -5,+0
  md_make_request                                           376        369   -7,+0
  __split_and_process_bio                                   543        536   -7,+0
  delay_tsc                                                  96         89   -7,+0
  hsw_disable_pc8                                           696        691   -5,+0
  tsc_verify_tsc_adjust                                     215        228   -22,+35
  cpuidle_driver_unref                                       56         49   -7,+0
  blk_account_io_completion                                 159        148   -11,+0
  mtrr_wrmsr                                                 95         99   -29,+33
  __intel_wait_for_register_fw                              401        419   +18,+0
  cpuidle_driver_ref                                         43         36   -7,+0
  cpuidle_get_driver                                         15          8   -7,+0
  blk_account_io_done                                       535        528   -7,+0
  irq_migrate_all_off_this_cpu                              662        674   -14,+26
  check_tsc_sync_source                                     412        412   -71,+71
  irq_wait_for_poll                                         170        163   -7,+0
  generic_end_io_acct                                       329        322   -7,+0
  x86_acpi_suspend_lowlevel                                 369        363   -6,+0
  nohz_balance_enter_idle                                   198        191   -7,+0
  generic_start_io_acct                                     254        247   -7,+0
  blk_account_io_start                                      341        334   -7,+0
  perf_event_task_tick                                      682        675   -7,+0
  intel_init_thermal                                        951        945   -6,+0
  amd_e400_c1e_apic_setup                                    47         51   -28,+32
  setup_APIC_eilvt                                          350        328   -22,+0
  hsw_enable_pc8                                           1611       1605   -6,+0
                                               total   12985947   12985892   -994,+939

Reported-by: Nadav Amit <nadav.amit@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2019-06-17 12:43:41 +02:00

200 lines
5.0 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_SMP_H
#define _ASM_X86_SMP_H
#ifndef __ASSEMBLY__
#include <linux/cpumask.h>
#include <asm/percpu.h>
/*
* We need the APIC definitions automatically as part of 'smp.h'
*/
#ifdef CONFIG_X86_LOCAL_APIC
# include <asm/mpspec.h>
# include <asm/apic.h>
# ifdef CONFIG_X86_IO_APIC
# include <asm/io_apic.h>
# endif
#endif
#include <asm/thread_info.h>
#include <asm/cpumask.h>
extern int smp_num_siblings;
extern unsigned int num_processors;
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map);
/* cpus sharing the last level cache: */
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id);
DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number);
static inline struct cpumask *cpu_llc_shared_mask(int cpu)
{
return per_cpu(cpu_llc_shared_map, cpu);
}
DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid);
DECLARE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid);
DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid);
#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
DECLARE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid);
#endif
struct task_struct;
struct smp_ops {
void (*smp_prepare_boot_cpu)(void);
void (*smp_prepare_cpus)(unsigned max_cpus);
void (*smp_cpus_done)(unsigned max_cpus);
void (*stop_other_cpus)(int wait);
void (*crash_stop_other_cpus)(void);
void (*smp_send_reschedule)(int cpu);
int (*cpu_up)(unsigned cpu, struct task_struct *tidle);
int (*cpu_disable)(void);
void (*cpu_die)(unsigned int cpu);
void (*play_dead)(void);
void (*send_call_func_ipi)(const struct cpumask *mask);
void (*send_call_func_single_ipi)(int cpu);
};
/* Globals due to paravirt */
extern void set_cpu_sibling_map(int cpu);
#ifdef CONFIG_SMP
extern struct smp_ops smp_ops;
static inline void smp_send_stop(void)
{
smp_ops.stop_other_cpus(0);
}
static inline void stop_other_cpus(void)
{
smp_ops.stop_other_cpus(1);
}
static inline void smp_prepare_boot_cpu(void)
{
smp_ops.smp_prepare_boot_cpu();
}
static inline void smp_prepare_cpus(unsigned int max_cpus)
{
smp_ops.smp_prepare_cpus(max_cpus);
}
static inline void smp_cpus_done(unsigned int max_cpus)
{
smp_ops.smp_cpus_done(max_cpus);
}
static inline int __cpu_up(unsigned int cpu, struct task_struct *tidle)
{
return smp_ops.cpu_up(cpu, tidle);
}
static inline int __cpu_disable(void)
{
return smp_ops.cpu_disable();
}
static inline void __cpu_die(unsigned int cpu)
{
smp_ops.cpu_die(cpu);
}
static inline void play_dead(void)
{
smp_ops.play_dead();
}
static inline void smp_send_reschedule(int cpu)
{
smp_ops.smp_send_reschedule(cpu);
}
static inline void arch_send_call_function_single_ipi(int cpu)
{
smp_ops.send_call_func_single_ipi(cpu);
}
static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
{
smp_ops.send_call_func_ipi(mask);
}
void cpu_disable_common(void);
void native_smp_prepare_boot_cpu(void);
void native_smp_prepare_cpus(unsigned int max_cpus);
void calculate_max_logical_packages(void);
void native_smp_cpus_done(unsigned int max_cpus);
int common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
int native_cpu_up(unsigned int cpunum, struct task_struct *tidle);
int native_cpu_disable(void);
int common_cpu_die(unsigned int cpu);
void native_cpu_die(unsigned int cpu);
void hlt_play_dead(void);
void native_play_dead(void);
void play_dead_common(void);
void wbinvd_on_cpu(int cpu);
int wbinvd_on_all_cpus(void);
void native_send_call_func_ipi(const struct cpumask *mask);
void native_send_call_func_single_ipi(int cpu);
void x86_idle_thread_init(unsigned int cpu, struct task_struct *idle);
void smp_store_boot_cpu_info(void);
void smp_store_cpu_info(int id);
asmlinkage __visible void smp_reboot_interrupt(void);
__visible void smp_reschedule_interrupt(struct pt_regs *regs);
__visible void smp_call_function_interrupt(struct pt_regs *regs);
__visible void smp_call_function_single_interrupt(struct pt_regs *r);
#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu)
#define cpu_acpi_id(cpu) per_cpu(x86_cpu_to_acpiid, cpu)
/*
* This function is needed by all SMP systems. It must _always_ be valid
* from the initial startup. We map APIC_BASE very early in page_setup(),
* so this is correct in the x86 case.
*/
#define raw_smp_processor_id() this_cpu_read(cpu_number)
#define __smp_processor_id() __this_cpu_read(cpu_number)
#ifdef CONFIG_X86_32
extern int safe_smp_processor_id(void);
#else
# define safe_smp_processor_id() smp_processor_id()
#endif
#else /* !CONFIG_SMP */
#define wbinvd_on_cpu(cpu) wbinvd()
static inline int wbinvd_on_all_cpus(void)
{
wbinvd();
return 0;
}
#endif /* CONFIG_SMP */
extern unsigned disabled_cpus;
#ifdef CONFIG_X86_LOCAL_APIC
extern int hard_smp_processor_id(void);
#else /* CONFIG_X86_LOCAL_APIC */
#define hard_smp_processor_id() 0
#endif /* CONFIG_X86_LOCAL_APIC */
#ifdef CONFIG_DEBUG_NMI_SELFTEST
extern void nmi_selftest(void);
#else
#define nmi_selftest() do { } while (0)
#endif
#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_SMP_H */