dfmpy.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * Linux/PA-RISC Project (http://www.parisc-linux.org/)
  4. *
  5. * Floating-point emulation code
  6. * Copyright (C) 2001 Hewlett-Packard (Paul Bame) <[email protected]>
  7. */
  8. /*
  9. * BEGIN_DESC
  10. *
  11. * File:
  12. * @(#) pa/spmath/dfmpy.c $Revision: 1.1 $
  13. *
  14. * Purpose:
  15. * Double Precision Floating-point Multiply
  16. *
  17. * External Interfaces:
  18. * dbl_fmpy(srcptr1,srcptr2,dstptr,status)
  19. *
  20. * Internal Interfaces:
  21. *
  22. * Theory:
  23. * <<please update with a overview of the operation of this file>>
  24. *
  25. * END_DESC
  26. */
  27. #include "float.h"
  28. #include "dbl_float.h"
  29. /*
  30. * Double Precision Floating-point Multiply
  31. */
  32. int
  33. dbl_fmpy(
  34. dbl_floating_point *srcptr1,
  35. dbl_floating_point *srcptr2,
  36. dbl_floating_point *dstptr,
  37. unsigned int *status)
  38. {
  39. register unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2;
  40. register unsigned int opnd3p1, opnd3p2, resultp1, resultp2;
  41. register int dest_exponent, count;
  42. register boolean inexact = FALSE, guardbit = FALSE, stickybit = FALSE;
  43. boolean is_tiny;
  44. Dbl_copyfromptr(srcptr1,opnd1p1,opnd1p2);
  45. Dbl_copyfromptr(srcptr2,opnd2p1,opnd2p2);
  46. /*
  47. * set sign bit of result
  48. */
  49. if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1))
  50. Dbl_setnegativezerop1(resultp1);
  51. else Dbl_setzerop1(resultp1);
  52. /*
  53. * check first operand for NaN's or infinity
  54. */
  55. if (Dbl_isinfinity_exponent(opnd1p1)) {
  56. if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
  57. if (Dbl_isnotnan(opnd2p1,opnd2p2)) {
  58. if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) {
  59. /*
  60. * invalid since operands are infinity
  61. * and zero
  62. */
  63. if (Is_invalidtrap_enabled())
  64. return(INVALIDEXCEPTION);
  65. Set_invalidflag();
  66. Dbl_makequietnan(resultp1,resultp2);
  67. Dbl_copytoptr(resultp1,resultp2,dstptr);
  68. return(NOEXCEPTION);
  69. }
  70. /*
  71. * return infinity
  72. */
  73. Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
  74. Dbl_copytoptr(resultp1,resultp2,dstptr);
  75. return(NOEXCEPTION);
  76. }
  77. }
  78. else {
  79. /*
  80. * is NaN; signaling or quiet?
  81. */
  82. if (Dbl_isone_signaling(opnd1p1)) {
  83. /* trap if INVALIDTRAP enabled */
  84. if (Is_invalidtrap_enabled())
  85. return(INVALIDEXCEPTION);
  86. /* make NaN quiet */
  87. Set_invalidflag();
  88. Dbl_set_quiet(opnd1p1);
  89. }
  90. /*
  91. * is second operand a signaling NaN?
  92. */
  93. else if (Dbl_is_signalingnan(opnd2p1)) {
  94. /* trap if INVALIDTRAP enabled */
  95. if (Is_invalidtrap_enabled())
  96. return(INVALIDEXCEPTION);
  97. /* make NaN quiet */
  98. Set_invalidflag();
  99. Dbl_set_quiet(opnd2p1);
  100. Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
  101. return(NOEXCEPTION);
  102. }
  103. /*
  104. * return quiet NaN
  105. */
  106. Dbl_copytoptr(opnd1p1,opnd1p2,dstptr);
  107. return(NOEXCEPTION);
  108. }
  109. }
  110. /*
  111. * check second operand for NaN's or infinity
  112. */
  113. if (Dbl_isinfinity_exponent(opnd2p1)) {
  114. if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
  115. if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) {
  116. /* invalid since operands are zero & infinity */
  117. if (Is_invalidtrap_enabled())
  118. return(INVALIDEXCEPTION);
  119. Set_invalidflag();
  120. Dbl_makequietnan(opnd2p1,opnd2p2);
  121. Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
  122. return(NOEXCEPTION);
  123. }
  124. /*
  125. * return infinity
  126. */
  127. Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
  128. Dbl_copytoptr(resultp1,resultp2,dstptr);
  129. return(NOEXCEPTION);
  130. }
  131. /*
  132. * is NaN; signaling or quiet?
  133. */
  134. if (Dbl_isone_signaling(opnd2p1)) {
  135. /* trap if INVALIDTRAP enabled */
  136. if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION);
  137. /* make NaN quiet */
  138. Set_invalidflag();
  139. Dbl_set_quiet(opnd2p1);
  140. }
  141. /*
  142. * return quiet NaN
  143. */
  144. Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
  145. return(NOEXCEPTION);
  146. }
  147. /*
  148. * Generate exponent
  149. */
  150. dest_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) -DBL_BIAS;
  151. /*
  152. * Generate mantissa
  153. */
  154. if (Dbl_isnotzero_exponent(opnd1p1)) {
  155. /* set hidden bit */
  156. Dbl_clear_signexponent_set_hidden(opnd1p1);
  157. }
  158. else {
  159. /* check for zero */
  160. if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
  161. Dbl_setzero_exponentmantissa(resultp1,resultp2);
  162. Dbl_copytoptr(resultp1,resultp2,dstptr);
  163. return(NOEXCEPTION);
  164. }
  165. /* is denormalized, adjust exponent */
  166. Dbl_clear_signexponent(opnd1p1);
  167. Dbl_leftshiftby1(opnd1p1,opnd1p2);
  168. Dbl_normalize(opnd1p1,opnd1p2,dest_exponent);
  169. }
  170. /* opnd2 needs to have hidden bit set with msb in hidden bit */
  171. if (Dbl_isnotzero_exponent(opnd2p1)) {
  172. Dbl_clear_signexponent_set_hidden(opnd2p1);
  173. }
  174. else {
  175. /* check for zero */
  176. if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
  177. Dbl_setzero_exponentmantissa(resultp1,resultp2);
  178. Dbl_copytoptr(resultp1,resultp2,dstptr);
  179. return(NOEXCEPTION);
  180. }
  181. /* is denormalized; want to normalize */
  182. Dbl_clear_signexponent(opnd2p1);
  183. Dbl_leftshiftby1(opnd2p1,opnd2p2);
  184. Dbl_normalize(opnd2p1,opnd2p2,dest_exponent);
  185. }
  186. /* Multiply two source mantissas together */
  187. /* make room for guard bits */
  188. Dbl_leftshiftby7(opnd2p1,opnd2p2);
  189. Dbl_setzero(opnd3p1,opnd3p2);
  190. /*
  191. * Four bits at a time are inspected in each loop, and a
  192. * simple shift and add multiply algorithm is used.
  193. */
  194. for (count=1;count<=DBL_P;count+=4) {
  195. stickybit |= Dlow4p2(opnd3p2);
  196. Dbl_rightshiftby4(opnd3p1,opnd3p2);
  197. if (Dbit28p2(opnd1p2)) {
  198. /* Twoword_add should be an ADDC followed by an ADD. */
  199. Twoword_add(opnd3p1, opnd3p2, opnd2p1<<3 | opnd2p2>>29,
  200. opnd2p2<<3);
  201. }
  202. if (Dbit29p2(opnd1p2)) {
  203. Twoword_add(opnd3p1, opnd3p2, opnd2p1<<2 | opnd2p2>>30,
  204. opnd2p2<<2);
  205. }
  206. if (Dbit30p2(opnd1p2)) {
  207. Twoword_add(opnd3p1, opnd3p2, opnd2p1<<1 | opnd2p2>>31,
  208. opnd2p2<<1);
  209. }
  210. if (Dbit31p2(opnd1p2)) {
  211. Twoword_add(opnd3p1, opnd3p2, opnd2p1, opnd2p2);
  212. }
  213. Dbl_rightshiftby4(opnd1p1,opnd1p2);
  214. }
  215. if (Dbit3p1(opnd3p1)==0) {
  216. Dbl_leftshiftby1(opnd3p1,opnd3p2);
  217. }
  218. else {
  219. /* result mantissa >= 2. */
  220. dest_exponent++;
  221. }
  222. /* check for denormalized result */
  223. while (Dbit3p1(opnd3p1)==0) {
  224. Dbl_leftshiftby1(opnd3p1,opnd3p2);
  225. dest_exponent--;
  226. }
  227. /*
  228. * check for guard, sticky and inexact bits
  229. */
  230. stickybit |= Dallp2(opnd3p2) << 25;
  231. guardbit = (Dallp2(opnd3p2) << 24) >> 31;
  232. inexact = guardbit | stickybit;
  233. /* align result mantissa */
  234. Dbl_rightshiftby8(opnd3p1,opnd3p2);
  235. /*
  236. * round result
  237. */
  238. if (inexact && (dest_exponent>0 || Is_underflowtrap_enabled())) {
  239. Dbl_clear_signexponent(opnd3p1);
  240. switch (Rounding_mode()) {
  241. case ROUNDPLUS:
  242. if (Dbl_iszero_sign(resultp1))
  243. Dbl_increment(opnd3p1,opnd3p2);
  244. break;
  245. case ROUNDMINUS:
  246. if (Dbl_isone_sign(resultp1))
  247. Dbl_increment(opnd3p1,opnd3p2);
  248. break;
  249. case ROUNDNEAREST:
  250. if (guardbit) {
  251. if (stickybit || Dbl_isone_lowmantissap2(opnd3p2))
  252. Dbl_increment(opnd3p1,opnd3p2);
  253. }
  254. }
  255. if (Dbl_isone_hidden(opnd3p1)) dest_exponent++;
  256. }
  257. Dbl_set_mantissa(resultp1,resultp2,opnd3p1,opnd3p2);
  258. /*
  259. * Test for overflow
  260. */
  261. if (dest_exponent >= DBL_INFINITY_EXPONENT) {
  262. /* trap if OVERFLOWTRAP enabled */
  263. if (Is_overflowtrap_enabled()) {
  264. /*
  265. * Adjust bias of result
  266. */
  267. Dbl_setwrapped_exponent(resultp1,dest_exponent,ovfl);
  268. Dbl_copytoptr(resultp1,resultp2,dstptr);
  269. if (inexact)
  270. if (Is_inexacttrap_enabled())
  271. return (OVERFLOWEXCEPTION | INEXACTEXCEPTION);
  272. else Set_inexactflag();
  273. return (OVERFLOWEXCEPTION);
  274. }
  275. inexact = TRUE;
  276. Set_overflowflag();
  277. /* set result to infinity or largest number */
  278. Dbl_setoverflow(resultp1,resultp2);
  279. }
  280. /*
  281. * Test for underflow
  282. */
  283. else if (dest_exponent <= 0) {
  284. /* trap if UNDERFLOWTRAP enabled */
  285. if (Is_underflowtrap_enabled()) {
  286. /*
  287. * Adjust bias of result
  288. */
  289. Dbl_setwrapped_exponent(resultp1,dest_exponent,unfl);
  290. Dbl_copytoptr(resultp1,resultp2,dstptr);
  291. if (inexact)
  292. if (Is_inexacttrap_enabled())
  293. return (UNDERFLOWEXCEPTION | INEXACTEXCEPTION);
  294. else Set_inexactflag();
  295. return (UNDERFLOWEXCEPTION);
  296. }
  297. /* Determine if should set underflow flag */
  298. is_tiny = TRUE;
  299. if (dest_exponent == 0 && inexact) {
  300. switch (Rounding_mode()) {
  301. case ROUNDPLUS:
  302. if (Dbl_iszero_sign(resultp1)) {
  303. Dbl_increment(opnd3p1,opnd3p2);
  304. if (Dbl_isone_hiddenoverflow(opnd3p1))
  305. is_tiny = FALSE;
  306. Dbl_decrement(opnd3p1,opnd3p2);
  307. }
  308. break;
  309. case ROUNDMINUS:
  310. if (Dbl_isone_sign(resultp1)) {
  311. Dbl_increment(opnd3p1,opnd3p2);
  312. if (Dbl_isone_hiddenoverflow(opnd3p1))
  313. is_tiny = FALSE;
  314. Dbl_decrement(opnd3p1,opnd3p2);
  315. }
  316. break;
  317. case ROUNDNEAREST:
  318. if (guardbit && (stickybit ||
  319. Dbl_isone_lowmantissap2(opnd3p2))) {
  320. Dbl_increment(opnd3p1,opnd3p2);
  321. if (Dbl_isone_hiddenoverflow(opnd3p1))
  322. is_tiny = FALSE;
  323. Dbl_decrement(opnd3p1,opnd3p2);
  324. }
  325. break;
  326. }
  327. }
  328. /*
  329. * denormalize result or set to signed zero
  330. */
  331. stickybit = inexact;
  332. Dbl_denormalize(opnd3p1,opnd3p2,dest_exponent,guardbit,
  333. stickybit,inexact);
  334. /* return zero or smallest number */
  335. if (inexact) {
  336. switch (Rounding_mode()) {
  337. case ROUNDPLUS:
  338. if (Dbl_iszero_sign(resultp1)) {
  339. Dbl_increment(opnd3p1,opnd3p2);
  340. }
  341. break;
  342. case ROUNDMINUS:
  343. if (Dbl_isone_sign(resultp1)) {
  344. Dbl_increment(opnd3p1,opnd3p2);
  345. }
  346. break;
  347. case ROUNDNEAREST:
  348. if (guardbit && (stickybit ||
  349. Dbl_isone_lowmantissap2(opnd3p2))) {
  350. Dbl_increment(opnd3p1,opnd3p2);
  351. }
  352. break;
  353. }
  354. if (is_tiny) Set_underflowflag();
  355. }
  356. Dbl_set_exponentmantissa(resultp1,resultp2,opnd3p1,opnd3p2);
  357. }
  358. else Dbl_set_exponent(resultp1,dest_exponent);
  359. /* check for inexact */
  360. Dbl_copytoptr(resultp1,resultp2,dstptr);
  361. if (inexact) {
  362. if (Is_inexacttrap_enabled()) return(INEXACTEXCEPTION);
  363. else Set_inexactflag();
  364. }
  365. return(NOEXCEPTION);
  366. }