 3193c0836f
			
		
	
	3193c0836f
	
	
	
		
			
			On x86-64, with CONFIG_RETPOLINE=n, GCC's "global common subexpression
elimination" optimization results in ___bpf_prog_run()'s jumptable code
changing from this:
	select_insn:
		jmp *jumptable(, %rax, 8)
		...
	ALU64_ADD_X:
		...
		jmp *jumptable(, %rax, 8)
	ALU_ADD_X:
		...
		jmp *jumptable(, %rax, 8)
to this:
	select_insn:
		mov jumptable, %r12
		jmp *(%r12, %rax, 8)
		...
	ALU64_ADD_X:
		...
		jmp *(%r12, %rax, 8)
	ALU_ADD_X:
		...
		jmp *(%r12, %rax, 8)
The jumptable address is placed in a register once, at the beginning of
the function.  The function execution can then go through multiple
indirect jumps which rely on that same register value.  This has a few
issues:
1) Objtool isn't smart enough to be able to track such a register value
   across multiple recursive indirect jumps through the jump table.
2) With CONFIG_RETPOLINE enabled, this optimization actually results in
   a small slowdown.  I measured a ~4.7% slowdown in the test_bpf
   "tcpdump port 22" selftest.
   This slowdown is actually predicted by the GCC manual:
     Note: When compiling a program using computed gotos, a GCC
     extension, you may get better run-time performance if you
     disable the global common subexpression elimination pass by
     adding -fno-gcse to the command line.
So just disable the optimization for this function.
Fixes: e55a73251d ("bpf: Fix ORC unwinding in non-JIT BPF code")
Reported-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/30c3ca29ba037afcbd860a8672eef0021addf9fe.1563413318.git.jpoimboe@redhat.com
		
	
		
			
				
	
	
		
			175 lines
		
	
	
		
			5.6 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			175 lines
		
	
	
		
			5.6 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /* SPDX-License-Identifier: GPL-2.0 */
 | |
| #ifndef __LINUX_COMPILER_TYPES_H
 | |
| #error "Please don't include <linux/compiler-gcc.h> directly, include <linux/compiler.h> instead."
 | |
| #endif
 | |
| 
 | |
| /*
 | |
|  * Common definitions for all gcc versions go here.
 | |
|  */
 | |
| #define GCC_VERSION (__GNUC__ * 10000		\
 | |
| 		     + __GNUC_MINOR__ * 100	\
 | |
| 		     + __GNUC_PATCHLEVEL__)
 | |
| 
 | |
| #if GCC_VERSION < 40600
 | |
| # error Sorry, your compiler is too old - please upgrade it.
 | |
| #endif
 | |
| 
 | |
| /* Optimization barrier */
 | |
| 
 | |
| /* The "volatile" is due to gcc bugs */
 | |
| #define barrier() __asm__ __volatile__("": : :"memory")
 | |
| /*
 | |
|  * This version is i.e. to prevent dead stores elimination on @ptr
 | |
|  * where gcc and llvm may behave differently when otherwise using
 | |
|  * normal barrier(): while gcc behavior gets along with a normal
 | |
|  * barrier(), llvm needs an explicit input variable to be assumed
 | |
|  * clobbered. The issue is as follows: while the inline asm might
 | |
|  * access any memory it wants, the compiler could have fit all of
 | |
|  * @ptr into memory registers instead, and since @ptr never escaped
 | |
|  * from that, it proved that the inline asm wasn't touching any of
 | |
|  * it. This version works well with both compilers, i.e. we're telling
 | |
|  * the compiler that the inline asm absolutely may see the contents
 | |
|  * of @ptr. See also: https://llvm.org/bugs/show_bug.cgi?id=15495
 | |
|  */
 | |
| #define barrier_data(ptr) __asm__ __volatile__("": :"r"(ptr) :"memory")
 | |
| 
 | |
| /*
 | |
|  * This macro obfuscates arithmetic on a variable address so that gcc
 | |
|  * shouldn't recognize the original var, and make assumptions about it.
 | |
|  *
 | |
|  * This is needed because the C standard makes it undefined to do
 | |
|  * pointer arithmetic on "objects" outside their boundaries and the
 | |
|  * gcc optimizers assume this is the case. In particular they
 | |
|  * assume such arithmetic does not wrap.
 | |
|  *
 | |
|  * A miscompilation has been observed because of this on PPC.
 | |
|  * To work around it we hide the relationship of the pointer and the object
 | |
|  * using this macro.
 | |
|  *
 | |
|  * Versions of the ppc64 compiler before 4.1 had a bug where use of
 | |
|  * RELOC_HIDE could trash r30. The bug can be worked around by changing
 | |
|  * the inline assembly constraint from =g to =r, in this particular
 | |
|  * case either is valid.
 | |
|  */
 | |
| #define RELOC_HIDE(ptr, off)						\
 | |
| ({									\
 | |
| 	unsigned long __ptr;						\
 | |
| 	__asm__ ("" : "=r"(__ptr) : "0"(ptr));				\
 | |
| 	(typeof(ptr)) (__ptr + (off));					\
 | |
| })
 | |
| 
 | |
| /*
 | |
|  * A trick to suppress uninitialized variable warning without generating any
 | |
|  * code
 | |
|  */
 | |
| #define uninitialized_var(x) x = x
 | |
| 
 | |
| #ifdef CONFIG_RETPOLINE
 | |
| #define __noretpoline __attribute__((__indirect_branch__("keep")))
 | |
| #endif
 | |
| 
 | |
| #define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
 | |
| 
 | |
| #define __compiletime_object_size(obj) __builtin_object_size(obj, 0)
 | |
| 
 | |
| #define __compiletime_warning(message) __attribute__((__warning__(message)))
 | |
| #define __compiletime_error(message) __attribute__((__error__(message)))
 | |
| 
 | |
| #if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__)
 | |
| #define __latent_entropy __attribute__((latent_entropy))
 | |
| #endif
 | |
| 
 | |
| /*
 | |
|  * calling noreturn functions, __builtin_unreachable() and __builtin_trap()
 | |
|  * confuse the stack allocation in gcc, leading to overly large stack
 | |
|  * frames, see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82365
 | |
|  *
 | |
|  * Adding an empty inline assembly before it works around the problem
 | |
|  */
 | |
| #define barrier_before_unreachable() asm volatile("")
 | |
| 
 | |
| /*
 | |
|  * Mark a position in code as unreachable.  This can be used to
 | |
|  * suppress control flow warnings after asm blocks that transfer
 | |
|  * control elsewhere.
 | |
|  */
 | |
| #define unreachable() \
 | |
| 	do {					\
 | |
| 		annotate_unreachable();		\
 | |
| 		barrier_before_unreachable();	\
 | |
| 		__builtin_unreachable();	\
 | |
| 	} while (0)
 | |
| 
 | |
| #if defined(RANDSTRUCT_PLUGIN) && !defined(__CHECKER__)
 | |
| #define __randomize_layout __attribute__((randomize_layout))
 | |
| #define __no_randomize_layout __attribute__((no_randomize_layout))
 | |
| /* This anon struct can add padding, so only enable it under randstruct. */
 | |
| #define randomized_struct_fields_start	struct {
 | |
| #define randomized_struct_fields_end	} __randomize_layout;
 | |
| #endif
 | |
| 
 | |
| /*
 | |
|  * GCC 'asm goto' miscompiles certain code sequences:
 | |
|  *
 | |
|  *   http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670
 | |
|  *
 | |
|  * Work it around via a compiler barrier quirk suggested by Jakub Jelinek.
 | |
|  *
 | |
|  * (asm goto is automatically volatile - the naming reflects this.)
 | |
|  */
 | |
| #define asm_volatile_goto(x...)	do { asm goto(x); asm (""); } while (0)
 | |
| 
 | |
| /*
 | |
|  * sparse (__CHECKER__) pretends to be gcc, but can't do constant
 | |
|  * folding in __builtin_bswap*() (yet), so don't set these for it.
 | |
|  */
 | |
| #if defined(CONFIG_ARCH_USE_BUILTIN_BSWAP) && !defined(__CHECKER__)
 | |
| #define __HAVE_BUILTIN_BSWAP32__
 | |
| #define __HAVE_BUILTIN_BSWAP64__
 | |
| #if GCC_VERSION >= 40800
 | |
| #define __HAVE_BUILTIN_BSWAP16__
 | |
| #endif
 | |
| #endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP && !__CHECKER__ */
 | |
| 
 | |
| #if GCC_VERSION >= 70000
 | |
| #define KASAN_ABI_VERSION 5
 | |
| #elif GCC_VERSION >= 50000
 | |
| #define KASAN_ABI_VERSION 4
 | |
| #elif GCC_VERSION >= 40902
 | |
| #define KASAN_ABI_VERSION 3
 | |
| #endif
 | |
| 
 | |
| #if __has_attribute(__no_sanitize_address__)
 | |
| #define __no_sanitize_address __attribute__((no_sanitize_address))
 | |
| #else
 | |
| #define __no_sanitize_address
 | |
| #endif
 | |
| 
 | |
| #if GCC_VERSION >= 50100
 | |
| #define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1
 | |
| #endif
 | |
| 
 | |
| /*
 | |
|  * Turn individual warnings and errors on and off locally, depending
 | |
|  * on version.
 | |
|  */
 | |
| #define __diag_GCC(version, severity, s) \
 | |
| 	__diag_GCC_ ## version(__diag_GCC_ ## severity s)
 | |
| 
 | |
| /* Severity used in pragma directives */
 | |
| #define __diag_GCC_ignore	ignored
 | |
| #define __diag_GCC_warn		warning
 | |
| #define __diag_GCC_error	error
 | |
| 
 | |
| #define __diag_str1(s)		#s
 | |
| #define __diag_str(s)		__diag_str1(s)
 | |
| #define __diag(s)		_Pragma(__diag_str(GCC diagnostic s))
 | |
| 
 | |
| #if GCC_VERSION >= 80000
 | |
| #define __diag_GCC_8(s)		__diag(s)
 | |
| #else
 | |
| #define __diag_GCC_8(s)
 | |
| #endif
 | |
| 
 | |
| #define __no_fgcse __attribute__((optimize("-fno-gcse")))
 |