bitops.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. #ifndef _ASM_X86_BITOPS_H
  3. #define _ASM_X86_BITOPS_H
  4. /*
  5. * Copyright 1992, Linus Torvalds.
  6. *
  7. * Note: inlines with more than a single statement should be marked
  8. * __always_inline to avoid problems with older gcc's inlining heuristics.
  9. */
  10. #ifndef _LINUX_BITOPS_H
  11. #error only <linux/bitops.h> can be included directly
  12. #endif
  13. #include <linux/compiler.h>
  14. #include <asm/alternative.h>
  15. #include <asm/rmwcc.h>
  16. #include <asm/barrier.h>
  17. #if BITS_PER_LONG == 32
  18. # define _BITOPS_LONG_SHIFT 5
  19. #elif BITS_PER_LONG == 64
  20. # define _BITOPS_LONG_SHIFT 6
  21. #else
  22. # error "Unexpected BITS_PER_LONG"
  23. #endif
  24. #define BIT_64(n) (U64_C(1) << (n))
  25. /*
  26. * These have to be done with inline assembly: that way the bit-setting
  27. * is guaranteed to be atomic. All bit operations return 0 if the bit
  28. * was cleared before the operation and != 0 if it was not.
  29. *
  30. * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
  31. */
  32. #define RLONG_ADDR(x) "m" (*(volatile long *) (x))
  33. #define WBYTE_ADDR(x) "+m" (*(volatile char *) (x))
  34. #define ADDR RLONG_ADDR(addr)
  35. /*
  36. * We do the locked ops that don't return the old value as
  37. * a mask operation on a byte.
  38. */
  39. #define CONST_MASK_ADDR(nr, addr) WBYTE_ADDR((void *)(addr) + ((nr)>>3))
  40. #define CONST_MASK(nr) (1 << ((nr) & 7))
  41. static __always_inline void
  42. arch_set_bit(long nr, volatile unsigned long *addr)
  43. {
  44. if (__builtin_constant_p(nr)) {
  45. asm volatile(LOCK_PREFIX "orb %b1,%0"
  46. : CONST_MASK_ADDR(nr, addr)
  47. : "iq" (CONST_MASK(nr))
  48. : "memory");
  49. } else {
  50. asm volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0"
  51. : : RLONG_ADDR(addr), "Ir" (nr) : "memory");
  52. }
  53. }
  54. static __always_inline void
  55. arch___set_bit(unsigned long nr, volatile unsigned long *addr)
  56. {
  57. asm volatile(__ASM_SIZE(bts) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
  58. }
  59. static __always_inline void
  60. arch_clear_bit(long nr, volatile unsigned long *addr)
  61. {
  62. if (__builtin_constant_p(nr)) {
  63. asm volatile(LOCK_PREFIX "andb %b1,%0"
  64. : CONST_MASK_ADDR(nr, addr)
  65. : "iq" (~CONST_MASK(nr)));
  66. } else {
  67. asm volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0"
  68. : : RLONG_ADDR(addr), "Ir" (nr) : "memory");
  69. }
  70. }
  71. static __always_inline void
  72. arch_clear_bit_unlock(long nr, volatile unsigned long *addr)
  73. {
  74. barrier();
  75. arch_clear_bit(nr, addr);
  76. }
  77. static __always_inline void
  78. arch___clear_bit(unsigned long nr, volatile unsigned long *addr)
  79. {
  80. asm volatile(__ASM_SIZE(btr) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
  81. }
  82. static __always_inline bool
  83. arch_clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
  84. {
  85. bool negative;
  86. asm volatile(LOCK_PREFIX "andb %2,%1"
  87. CC_SET(s)
  88. : CC_OUT(s) (negative), WBYTE_ADDR(addr)
  89. : "ir" ((char) ~(1 << nr)) : "memory");
  90. return negative;
  91. }
  92. #define arch_clear_bit_unlock_is_negative_byte \
  93. arch_clear_bit_unlock_is_negative_byte
  94. static __always_inline void
  95. arch___clear_bit_unlock(long nr, volatile unsigned long *addr)
  96. {
  97. arch___clear_bit(nr, addr);
  98. }
  99. static __always_inline void
  100. arch___change_bit(unsigned long nr, volatile unsigned long *addr)
  101. {
  102. asm volatile(__ASM_SIZE(btc) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
  103. }
  104. static __always_inline void
  105. arch_change_bit(long nr, volatile unsigned long *addr)
  106. {
  107. if (__builtin_constant_p(nr)) {
  108. asm volatile(LOCK_PREFIX "xorb %b1,%0"
  109. : CONST_MASK_ADDR(nr, addr)
  110. : "iq" (CONST_MASK(nr)));
  111. } else {
  112. asm volatile(LOCK_PREFIX __ASM_SIZE(btc) " %1,%0"
  113. : : RLONG_ADDR(addr), "Ir" (nr) : "memory");
  114. }
  115. }
  116. static __always_inline bool
  117. arch_test_and_set_bit(long nr, volatile unsigned long *addr)
  118. {
  119. return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(bts), *addr, c, "Ir", nr);
  120. }
  121. static __always_inline bool
  122. arch_test_and_set_bit_lock(long nr, volatile unsigned long *addr)
  123. {
  124. return arch_test_and_set_bit(nr, addr);
  125. }
  126. static __always_inline bool
  127. arch___test_and_set_bit(unsigned long nr, volatile unsigned long *addr)
  128. {
  129. bool oldbit;
  130. asm(__ASM_SIZE(bts) " %2,%1"
  131. CC_SET(c)
  132. : CC_OUT(c) (oldbit)
  133. : ADDR, "Ir" (nr) : "memory");
  134. return oldbit;
  135. }
  136. static __always_inline bool
  137. arch_test_and_clear_bit(long nr, volatile unsigned long *addr)
  138. {
  139. return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btr), *addr, c, "Ir", nr);
  140. }
  141. /*
  142. * Note: the operation is performed atomically with respect to
  143. * the local CPU, but not other CPUs. Portable code should not
  144. * rely on this behaviour.
  145. * KVM relies on this behaviour on x86 for modifying memory that is also
  146. * accessed from a hypervisor on the same CPU if running in a VM: don't change
  147. * this without also updating arch/x86/kernel/kvm.c
  148. */
  149. static __always_inline bool
  150. arch___test_and_clear_bit(unsigned long nr, volatile unsigned long *addr)
  151. {
  152. bool oldbit;
  153. asm volatile(__ASM_SIZE(btr) " %2,%1"
  154. CC_SET(c)
  155. : CC_OUT(c) (oldbit)
  156. : ADDR, "Ir" (nr) : "memory");
  157. return oldbit;
  158. }
  159. static __always_inline bool
  160. arch___test_and_change_bit(unsigned long nr, volatile unsigned long *addr)
  161. {
  162. bool oldbit;
  163. asm volatile(__ASM_SIZE(btc) " %2,%1"
  164. CC_SET(c)
  165. : CC_OUT(c) (oldbit)
  166. : ADDR, "Ir" (nr) : "memory");
  167. return oldbit;
  168. }
  169. static __always_inline bool
  170. arch_test_and_change_bit(long nr, volatile unsigned long *addr)
  171. {
  172. return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btc), *addr, c, "Ir", nr);
  173. }
  174. static __always_inline bool constant_test_bit(long nr, const volatile unsigned long *addr)
  175. {
  176. return ((1UL << (nr & (BITS_PER_LONG-1))) &
  177. (addr[nr >> _BITOPS_LONG_SHIFT])) != 0;
  178. }
  179. static __always_inline bool constant_test_bit_acquire(long nr, const volatile unsigned long *addr)
  180. {
  181. bool oldbit;
  182. asm volatile("testb %2,%1"
  183. CC_SET(nz)
  184. : CC_OUT(nz) (oldbit)
  185. : "m" (((unsigned char *)addr)[nr >> 3]),
  186. "i" (1 << (nr & 7))
  187. :"memory");
  188. return oldbit;
  189. }
  190. static __always_inline bool variable_test_bit(long nr, volatile const unsigned long *addr)
  191. {
  192. bool oldbit;
  193. asm volatile(__ASM_SIZE(bt) " %2,%1"
  194. CC_SET(c)
  195. : CC_OUT(c) (oldbit)
  196. : "m" (*(unsigned long *)addr), "Ir" (nr) : "memory");
  197. return oldbit;
  198. }
  199. static __always_inline bool
  200. arch_test_bit(unsigned long nr, const volatile unsigned long *addr)
  201. {
  202. return __builtin_constant_p(nr) ? constant_test_bit(nr, addr) :
  203. variable_test_bit(nr, addr);
  204. }
  205. static __always_inline bool
  206. arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr)
  207. {
  208. return __builtin_constant_p(nr) ? constant_test_bit_acquire(nr, addr) :
  209. variable_test_bit(nr, addr);
  210. }
  211. static __always_inline unsigned long variable__ffs(unsigned long word)
  212. {
  213. asm("rep; bsf %1,%0"
  214. : "=r" (word)
  215. : "rm" (word));
  216. return word;
  217. }
  218. /**
  219. * __ffs - find first set bit in word
  220. * @word: The word to search
  221. *
  222. * Undefined if no bit exists, so code should check against 0 first.
  223. */
  224. #define __ffs(word) \
  225. (__builtin_constant_p(word) ? \
  226. (unsigned long)__builtin_ctzl(word) : \
  227. variable__ffs(word))
  228. static __always_inline unsigned long variable_ffz(unsigned long word)
  229. {
  230. asm("rep; bsf %1,%0"
  231. : "=r" (word)
  232. : "r" (~word));
  233. return word;
  234. }
  235. /**
  236. * ffz - find first zero bit in word
  237. * @word: The word to search
  238. *
  239. * Undefined if no zero exists, so code should check against ~0UL first.
  240. */
  241. #define ffz(word) \
  242. (__builtin_constant_p(word) ? \
  243. (unsigned long)__builtin_ctzl(~word) : \
  244. variable_ffz(word))
  245. /*
  246. * __fls: find last set bit in word
  247. * @word: The word to search
  248. *
  249. * Undefined if no set bit exists, so code should check against 0 first.
  250. */
  251. static __always_inline unsigned long __fls(unsigned long word)
  252. {
  253. asm("bsr %1,%0"
  254. : "=r" (word)
  255. : "rm" (word));
  256. return word;
  257. }
  258. #undef ADDR
  259. #ifdef __KERNEL__
  260. static __always_inline int variable_ffs(int x)
  261. {
  262. int r;
  263. #ifdef CONFIG_X86_64
  264. /*
  265. * AMD64 says BSFL won't clobber the dest reg if x==0; Intel64 says the
  266. * dest reg is undefined if x==0, but their CPU architect says its
  267. * value is written to set it to the same as before, except that the
  268. * top 32 bits will be cleared.
  269. *
  270. * We cannot do this on 32 bits because at the very least some
  271. * 486 CPUs did not behave this way.
  272. */
  273. asm("bsfl %1,%0"
  274. : "=r" (r)
  275. : "rm" (x), "0" (-1));
  276. #elif defined(CONFIG_X86_CMOV)
  277. asm("bsfl %1,%0\n\t"
  278. "cmovzl %2,%0"
  279. : "=&r" (r) : "rm" (x), "r" (-1));
  280. #else
  281. asm("bsfl %1,%0\n\t"
  282. "jnz 1f\n\t"
  283. "movl $-1,%0\n"
  284. "1:" : "=r" (r) : "rm" (x));
  285. #endif
  286. return r + 1;
  287. }
  288. /**
  289. * ffs - find first set bit in word
  290. * @x: the word to search
  291. *
  292. * This is defined the same way as the libc and compiler builtin ffs
  293. * routines, therefore differs in spirit from the other bitops.
  294. *
  295. * ffs(value) returns 0 if value is 0 or the position of the first
  296. * set bit if value is nonzero. The first (least significant) bit
  297. * is at position 1.
  298. */
  299. #define ffs(x) (__builtin_constant_p(x) ? __builtin_ffs(x) : variable_ffs(x))
  300. /**
  301. * fls - find last set bit in word
  302. * @x: the word to search
  303. *
  304. * This is defined in a similar way as the libc and compiler builtin
  305. * ffs, but returns the position of the most significant set bit.
  306. *
  307. * fls(value) returns 0 if value is 0 or the position of the last
  308. * set bit if value is nonzero. The last (most significant) bit is
  309. * at position 32.
  310. */
  311. static __always_inline int fls(unsigned int x)
  312. {
  313. int r;
  314. #ifdef CONFIG_X86_64
  315. /*
  316. * AMD64 says BSRL won't clobber the dest reg if x==0; Intel64 says the
  317. * dest reg is undefined if x==0, but their CPU architect says its
  318. * value is written to set it to the same as before, except that the
  319. * top 32 bits will be cleared.
  320. *
  321. * We cannot do this on 32 bits because at the very least some
  322. * 486 CPUs did not behave this way.
  323. */
  324. asm("bsrl %1,%0"
  325. : "=r" (r)
  326. : "rm" (x), "0" (-1));
  327. #elif defined(CONFIG_X86_CMOV)
  328. asm("bsrl %1,%0\n\t"
  329. "cmovzl %2,%0"
  330. : "=&r" (r) : "rm" (x), "rm" (-1));
  331. #else
  332. asm("bsrl %1,%0\n\t"
  333. "jnz 1f\n\t"
  334. "movl $-1,%0\n"
  335. "1:" : "=r" (r) : "rm" (x));
  336. #endif
  337. return r + 1;
  338. }
  339. /**
  340. * fls64 - find last set bit in a 64-bit word
  341. * @x: the word to search
  342. *
  343. * This is defined in a similar way as the libc and compiler builtin
  344. * ffsll, but returns the position of the most significant set bit.
  345. *
  346. * fls64(value) returns 0 if value is 0 or the position of the last
  347. * set bit if value is nonzero. The last (most significant) bit is
  348. * at position 64.
  349. */
  350. #ifdef CONFIG_X86_64
  351. static __always_inline int fls64(__u64 x)
  352. {
  353. int bitpos = -1;
  354. /*
  355. * AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the
  356. * dest reg is undefined if x==0, but their CPU architect says its
  357. * value is written to set it to the same as before.
  358. */
  359. asm("bsrq %1,%q0"
  360. : "+r" (bitpos)
  361. : "rm" (x));
  362. return bitpos + 1;
  363. }
  364. #else
  365. #include <asm-generic/bitops/fls64.h>
  366. #endif
  367. #include <asm-generic/bitops/sched.h>
  368. #include <asm/arch_hweight.h>
  369. #include <asm-generic/bitops/const_hweight.h>
  370. #include <asm-generic/bitops/instrumented-atomic.h>
  371. #include <asm-generic/bitops/instrumented-non-atomic.h>
  372. #include <asm-generic/bitops/instrumented-lock.h>
  373. #include <asm-generic/bitops/le.h>
  374. #include <asm-generic/bitops/ext2-atomic-setbit.h>
  375. #endif /* __KERNEL__ */
  376. #endif /* _ASM_X86_BITOPS_H */