sp_maddf.c 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * IEEE754 floating point arithmetic
  4. * single precision: MADDF.f (Fused Multiply Add)
  5. * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft])
  6. *
  7. * MIPS floating point support
  8. * Copyright (C) 2015 Imagination Technologies, Ltd.
  9. * Author: Markos Chandras <[email protected]>
  10. */
  11. #include "ieee754sp.h"
  12. static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
  13. union ieee754sp y, enum maddf_flags flags)
  14. {
  15. int re;
  16. int rs;
  17. unsigned int rm;
  18. u64 rm64;
  19. u64 zm64;
  20. int s;
  21. COMPXSP;
  22. COMPYSP;
  23. COMPZSP;
  24. EXPLODEXSP;
  25. EXPLODEYSP;
  26. EXPLODEZSP;
  27. FLUSHXSP;
  28. FLUSHYSP;
  29. FLUSHZSP;
  30. ieee754_clearcx();
  31. rs = xs ^ ys;
  32. if (flags & MADDF_NEGATE_PRODUCT)
  33. rs ^= 1;
  34. if (flags & MADDF_NEGATE_ADDITION)
  35. zs ^= 1;
  36. /*
  37. * Handle the cases when at least one of x, y or z is a NaN.
  38. * Order of precedence is sNaN, qNaN and z, x, y.
  39. */
  40. if (zc == IEEE754_CLASS_SNAN)
  41. return ieee754sp_nanxcpt(z);
  42. if (xc == IEEE754_CLASS_SNAN)
  43. return ieee754sp_nanxcpt(x);
  44. if (yc == IEEE754_CLASS_SNAN)
  45. return ieee754sp_nanxcpt(y);
  46. if (zc == IEEE754_CLASS_QNAN)
  47. return z;
  48. if (xc == IEEE754_CLASS_QNAN)
  49. return x;
  50. if (yc == IEEE754_CLASS_QNAN)
  51. return y;
  52. if (zc == IEEE754_CLASS_DNORM)
  53. SPDNORMZ;
  54. /* ZERO z cases are handled separately below */
  55. switch (CLPAIR(xc, yc)) {
  56. /*
  57. * Infinity handling
  58. */
  59. case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
  60. case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
  61. ieee754_setcx(IEEE754_INVALID_OPERATION);
  62. return ieee754sp_indef();
  63. case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
  64. case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
  65. case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
  66. case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
  67. case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
  68. if ((zc == IEEE754_CLASS_INF) && (zs != rs)) {
  69. /*
  70. * Cases of addition of infinities with opposite signs
  71. * or subtraction of infinities with same signs.
  72. */
  73. ieee754_setcx(IEEE754_INVALID_OPERATION);
  74. return ieee754sp_indef();
  75. }
  76. /*
  77. * z is here either not an infinity, or an infinity having the
  78. * same sign as product (x*y). The result must be an infinity,
  79. * and its sign is determined only by the sign of product (x*y).
  80. */
  81. return ieee754sp_inf(rs);
  82. case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
  83. case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
  84. case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
  85. case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
  86. case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
  87. if (zc == IEEE754_CLASS_INF)
  88. return ieee754sp_inf(zs);
  89. if (zc == IEEE754_CLASS_ZERO) {
  90. /* Handle cases +0 + (-0) and similar ones. */
  91. if (zs == rs)
  92. /*
  93. * Cases of addition of zeros of equal signs
  94. * or subtraction of zeroes of opposite signs.
  95. * The sign of the resulting zero is in any
  96. * such case determined only by the sign of z.
  97. */
  98. return z;
  99. return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
  100. }
  101. /* x*y is here 0, and z is not 0, so just return z */
  102. return z;
  103. case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
  104. SPDNORMX;
  105. fallthrough;
  106. case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
  107. if (zc == IEEE754_CLASS_INF)
  108. return ieee754sp_inf(zs);
  109. SPDNORMY;
  110. break;
  111. case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
  112. if (zc == IEEE754_CLASS_INF)
  113. return ieee754sp_inf(zs);
  114. SPDNORMX;
  115. break;
  116. case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
  117. if (zc == IEEE754_CLASS_INF)
  118. return ieee754sp_inf(zs);
  119. /* continue to real computations */
  120. }
  121. /* Finally get to do some computation */
  122. /*
  123. * Do the multiplication bit first
  124. *
  125. * rm = xm * ym, re = xe + ye basically
  126. *
  127. * At this point xm and ym should have been normalized.
  128. */
  129. /* rm = xm * ym, re = xe+ye basically */
  130. assert(xm & SP_HIDDEN_BIT);
  131. assert(ym & SP_HIDDEN_BIT);
  132. re = xe + ye;
  133. /* Multiple 24 bit xm and ym to give 48 bit results */
  134. rm64 = (uint64_t)xm * ym;
  135. /* Shunt to top of word */
  136. rm64 = rm64 << 16;
  137. /* Put explicit bit at bit 62 if necessary */
  138. if ((int64_t) rm64 < 0) {
  139. rm64 = rm64 >> 1;
  140. re++;
  141. }
  142. assert(rm64 & (1 << 62));
  143. if (zc == IEEE754_CLASS_ZERO) {
  144. /*
  145. * Move explicit bit from bit 62 to bit 26 since the
  146. * ieee754sp_format code expects the mantissa to be
  147. * 27 bits wide (24 + 3 rounding bits).
  148. */
  149. rm = XSPSRS64(rm64, (62 - 26));
  150. return ieee754sp_format(rs, re, rm);
  151. }
  152. /* Move explicit bit from bit 23 to bit 62 */
  153. zm64 = (uint64_t)zm << (62 - 23);
  154. assert(zm64 & (1 << 62));
  155. /* Make the exponents the same */
  156. if (ze > re) {
  157. /*
  158. * Have to shift r fraction right to align.
  159. */
  160. s = ze - re;
  161. rm64 = XSPSRS64(rm64, s);
  162. re += s;
  163. } else if (re > ze) {
  164. /*
  165. * Have to shift z fraction right to align.
  166. */
  167. s = re - ze;
  168. zm64 = XSPSRS64(zm64, s);
  169. ze += s;
  170. }
  171. assert(ze == re);
  172. assert(ze <= SP_EMAX);
  173. /* Do the addition */
  174. if (zs == rs) {
  175. /*
  176. * Generate 64 bit result by adding two 63 bit numbers
  177. * leaving result in zm64, zs and ze.
  178. */
  179. zm64 = zm64 + rm64;
  180. if ((int64_t)zm64 < 0) { /* carry out */
  181. zm64 = XSPSRS1(zm64);
  182. ze++;
  183. }
  184. } else {
  185. if (zm64 >= rm64) {
  186. zm64 = zm64 - rm64;
  187. } else {
  188. zm64 = rm64 - zm64;
  189. zs = rs;
  190. }
  191. if (zm64 == 0)
  192. return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
  193. /*
  194. * Put explicit bit at bit 62 if necessary.
  195. */
  196. while ((zm64 >> 62) == 0) {
  197. zm64 <<= 1;
  198. ze--;
  199. }
  200. }
  201. /*
  202. * Move explicit bit from bit 62 to bit 26 since the
  203. * ieee754sp_format code expects the mantissa to be
  204. * 27 bits wide (24 + 3 rounding bits).
  205. */
  206. zm = XSPSRS64(zm64, (62 - 26));
  207. return ieee754sp_format(zs, ze, zm);
  208. }
  209. union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x,
  210. union ieee754sp y)
  211. {
  212. return _sp_maddf(z, x, y, 0);
  213. }
  214. union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x,
  215. union ieee754sp y)
  216. {
  217. return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
  218. }
  219. union ieee754sp ieee754sp_madd(union ieee754sp z, union ieee754sp x,
  220. union ieee754sp y)
  221. {
  222. return _sp_maddf(z, x, y, 0);
  223. }
  224. union ieee754sp ieee754sp_msub(union ieee754sp z, union ieee754sp x,
  225. union ieee754sp y)
  226. {
  227. return _sp_maddf(z, x, y, MADDF_NEGATE_ADDITION);
  228. }
  229. union ieee754sp ieee754sp_nmadd(union ieee754sp z, union ieee754sp x,
  230. union ieee754sp y)
  231. {
  232. return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT|MADDF_NEGATE_ADDITION);
  233. }
  234. union ieee754sp ieee754sp_nmsub(union ieee754sp z, union ieee754sp x,
  235. union ieee754sp y)
  236. {
  237. return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
  238. }