dp_maddf.c 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * IEEE754 floating point arithmetic
  4. * double precision: MADDF.f (Fused Multiply Add)
  5. * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft])
  6. *
  7. * MIPS floating point support
  8. * Copyright (C) 2015 Imagination Technologies, Ltd.
  9. * Author: Markos Chandras <[email protected]>
  10. */
  11. #include "ieee754dp.h"
  12. /* 128 bits shift right logical with rounding. */
  13. static void srl128(u64 *hptr, u64 *lptr, int count)
  14. {
  15. u64 low;
  16. if (count >= 128) {
  17. *lptr = *hptr != 0 || *lptr != 0;
  18. *hptr = 0;
  19. } else if (count >= 64) {
  20. if (count == 64) {
  21. *lptr = *hptr | (*lptr != 0);
  22. } else {
  23. low = *lptr;
  24. *lptr = *hptr >> (count - 64);
  25. *lptr |= (*hptr << (128 - count)) != 0 || low != 0;
  26. }
  27. *hptr = 0;
  28. } else {
  29. low = *lptr;
  30. *lptr = low >> count | *hptr << (64 - count);
  31. *lptr |= (low << (64 - count)) != 0;
  32. *hptr = *hptr >> count;
  33. }
  34. }
  35. static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
  36. union ieee754dp y, enum maddf_flags flags)
  37. {
  38. int re;
  39. int rs;
  40. unsigned int lxm;
  41. unsigned int hxm;
  42. unsigned int lym;
  43. unsigned int hym;
  44. u64 lrm;
  45. u64 hrm;
  46. u64 lzm;
  47. u64 hzm;
  48. u64 t;
  49. u64 at;
  50. int s;
  51. COMPXDP;
  52. COMPYDP;
  53. COMPZDP;
  54. EXPLODEXDP;
  55. EXPLODEYDP;
  56. EXPLODEZDP;
  57. FLUSHXDP;
  58. FLUSHYDP;
  59. FLUSHZDP;
  60. ieee754_clearcx();
  61. rs = xs ^ ys;
  62. if (flags & MADDF_NEGATE_PRODUCT)
  63. rs ^= 1;
  64. if (flags & MADDF_NEGATE_ADDITION)
  65. zs ^= 1;
  66. /*
  67. * Handle the cases when at least one of x, y or z is a NaN.
  68. * Order of precedence is sNaN, qNaN and z, x, y.
  69. */
  70. if (zc == IEEE754_CLASS_SNAN)
  71. return ieee754dp_nanxcpt(z);
  72. if (xc == IEEE754_CLASS_SNAN)
  73. return ieee754dp_nanxcpt(x);
  74. if (yc == IEEE754_CLASS_SNAN)
  75. return ieee754dp_nanxcpt(y);
  76. if (zc == IEEE754_CLASS_QNAN)
  77. return z;
  78. if (xc == IEEE754_CLASS_QNAN)
  79. return x;
  80. if (yc == IEEE754_CLASS_QNAN)
  81. return y;
  82. if (zc == IEEE754_CLASS_DNORM)
  83. DPDNORMZ;
  84. /* ZERO z cases are handled separately below */
  85. switch (CLPAIR(xc, yc)) {
  86. /*
  87. * Infinity handling
  88. */
  89. case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
  90. case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
  91. ieee754_setcx(IEEE754_INVALID_OPERATION);
  92. return ieee754dp_indef();
  93. case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
  94. case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
  95. case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
  96. case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
  97. case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
  98. if ((zc == IEEE754_CLASS_INF) && (zs != rs)) {
  99. /*
  100. * Cases of addition of infinities with opposite signs
  101. * or subtraction of infinities with same signs.
  102. */
  103. ieee754_setcx(IEEE754_INVALID_OPERATION);
  104. return ieee754dp_indef();
  105. }
  106. /*
  107. * z is here either not an infinity, or an infinity having the
  108. * same sign as product (x*y). The result must be an infinity,
  109. * and its sign is determined only by the sign of product (x*y).
  110. */
  111. return ieee754dp_inf(rs);
  112. case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
  113. case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
  114. case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
  115. case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
  116. case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
  117. if (zc == IEEE754_CLASS_INF)
  118. return ieee754dp_inf(zs);
  119. if (zc == IEEE754_CLASS_ZERO) {
  120. /* Handle cases +0 + (-0) and similar ones. */
  121. if (zs == rs)
  122. /*
  123. * Cases of addition of zeros of equal signs
  124. * or subtraction of zeroes of opposite signs.
  125. * The sign of the resulting zero is in any
  126. * such case determined only by the sign of z.
  127. */
  128. return z;
  129. return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD);
  130. }
  131. /* x*y is here 0, and z is not 0, so just return z */
  132. return z;
  133. case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
  134. DPDNORMX;
  135. fallthrough;
  136. case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
  137. if (zc == IEEE754_CLASS_INF)
  138. return ieee754dp_inf(zs);
  139. DPDNORMY;
  140. break;
  141. case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
  142. if (zc == IEEE754_CLASS_INF)
  143. return ieee754dp_inf(zs);
  144. DPDNORMX;
  145. break;
  146. case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
  147. if (zc == IEEE754_CLASS_INF)
  148. return ieee754dp_inf(zs);
  149. /* continue to real computations */
  150. }
  151. /* Finally get to do some computation */
  152. /*
  153. * Do the multiplication bit first
  154. *
  155. * rm = xm * ym, re = xe + ye basically
  156. *
  157. * At this point xm and ym should have been normalized.
  158. */
  159. assert(xm & DP_HIDDEN_BIT);
  160. assert(ym & DP_HIDDEN_BIT);
  161. re = xe + ye;
  162. /* shunt to top of word */
  163. xm <<= 64 - (DP_FBITS + 1);
  164. ym <<= 64 - (DP_FBITS + 1);
  165. /*
  166. * Multiply 64 bits xm and ym to give 128 bits result in hrm:lrm.
  167. */
  168. lxm = xm;
  169. hxm = xm >> 32;
  170. lym = ym;
  171. hym = ym >> 32;
  172. lrm = DPXMULT(lxm, lym);
  173. hrm = DPXMULT(hxm, hym);
  174. t = DPXMULT(lxm, hym);
  175. at = lrm + (t << 32);
  176. hrm += at < lrm;
  177. lrm = at;
  178. hrm = hrm + (t >> 32);
  179. t = DPXMULT(hxm, lym);
  180. at = lrm + (t << 32);
  181. hrm += at < lrm;
  182. lrm = at;
  183. hrm = hrm + (t >> 32);
  184. /* Put explicit bit at bit 126 if necessary */
  185. if ((int64_t)hrm < 0) {
  186. lrm = (hrm << 63) | (lrm >> 1);
  187. hrm = hrm >> 1;
  188. re++;
  189. }
  190. assert(hrm & (1 << 62));
  191. if (zc == IEEE754_CLASS_ZERO) {
  192. /*
  193. * Move explicit bit from bit 126 to bit 55 since the
  194. * ieee754dp_format code expects the mantissa to be
  195. * 56 bits wide (53 + 3 rounding bits).
  196. */
  197. srl128(&hrm, &lrm, (126 - 55));
  198. return ieee754dp_format(rs, re, lrm);
  199. }
  200. /* Move explicit bit from bit 52 to bit 126 */
  201. lzm = 0;
  202. hzm = zm << 10;
  203. assert(hzm & (1 << 62));
  204. /* Make the exponents the same */
  205. if (ze > re) {
  206. /*
  207. * Have to shift y fraction right to align.
  208. */
  209. s = ze - re;
  210. srl128(&hrm, &lrm, s);
  211. re += s;
  212. } else if (re > ze) {
  213. /*
  214. * Have to shift x fraction right to align.
  215. */
  216. s = re - ze;
  217. srl128(&hzm, &lzm, s);
  218. ze += s;
  219. }
  220. assert(ze == re);
  221. assert(ze <= DP_EMAX);
  222. /* Do the addition */
  223. if (zs == rs) {
  224. /*
  225. * Generate 128 bit result by adding two 127 bit numbers
  226. * leaving result in hzm:lzm, zs and ze.
  227. */
  228. hzm = hzm + hrm + (lzm > (lzm + lrm));
  229. lzm = lzm + lrm;
  230. if ((int64_t)hzm < 0) { /* carry out */
  231. srl128(&hzm, &lzm, 1);
  232. ze++;
  233. }
  234. } else {
  235. if (hzm > hrm || (hzm == hrm && lzm >= lrm)) {
  236. hzm = hzm - hrm - (lzm < lrm);
  237. lzm = lzm - lrm;
  238. } else {
  239. hzm = hrm - hzm - (lrm < lzm);
  240. lzm = lrm - lzm;
  241. zs = rs;
  242. }
  243. if (lzm == 0 && hzm == 0)
  244. return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD);
  245. /*
  246. * Put explicit bit at bit 126 if necessary.
  247. */
  248. if (hzm == 0) {
  249. /* left shift by 63 or 64 bits */
  250. if ((int64_t)lzm < 0) {
  251. /* MSB of lzm is the explicit bit */
  252. hzm = lzm >> 1;
  253. lzm = lzm << 63;
  254. ze -= 63;
  255. } else {
  256. hzm = lzm;
  257. lzm = 0;
  258. ze -= 64;
  259. }
  260. }
  261. t = 0;
  262. while ((hzm >> (62 - t)) == 0)
  263. t++;
  264. assert(t <= 62);
  265. if (t) {
  266. hzm = hzm << t | lzm >> (64 - t);
  267. lzm = lzm << t;
  268. ze -= t;
  269. }
  270. }
  271. /*
  272. * Move explicit bit from bit 126 to bit 55 since the
  273. * ieee754dp_format code expects the mantissa to be
  274. * 56 bits wide (53 + 3 rounding bits).
  275. */
  276. srl128(&hzm, &lzm, (126 - 55));
  277. return ieee754dp_format(zs, ze, lzm);
  278. }
  279. union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x,
  280. union ieee754dp y)
  281. {
  282. return _dp_maddf(z, x, y, 0);
  283. }
  284. union ieee754dp ieee754dp_msubf(union ieee754dp z, union ieee754dp x,
  285. union ieee754dp y)
  286. {
  287. return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
  288. }
  289. union ieee754dp ieee754dp_madd(union ieee754dp z, union ieee754dp x,
  290. union ieee754dp y)
  291. {
  292. return _dp_maddf(z, x, y, 0);
  293. }
  294. union ieee754dp ieee754dp_msub(union ieee754dp z, union ieee754dp x,
  295. union ieee754dp y)
  296. {
  297. return _dp_maddf(z, x, y, MADDF_NEGATE_ADDITION);
  298. }
  299. union ieee754dp ieee754dp_nmadd(union ieee754dp z, union ieee754dp x,
  300. union ieee754dp y)
  301. {
  302. return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT|MADDF_NEGATE_ADDITION);
  303. }
  304. union ieee754dp ieee754dp_nmsub(union ieee754dp z, union ieee754dp x,
  305. union ieee754dp y)
  306. {
  307. return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
  308. }