huf_decompress.c 48 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206
  1. /* ******************************************************************
  2. * huff0 huffman decoder,
  3. * part of Finite State Entropy library
  4. * Copyright (c) Yann Collet, Facebook, Inc.
  5. *
  6. * You can contact the author at :
  7. * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
  8. *
  9. * This source code is licensed under both the BSD-style license (found in the
  10. * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  11. * in the COPYING file in the root directory of this source tree).
  12. * You may select, at your option, one of the above-listed licenses.
  13. ****************************************************************** */
  14. /* **************************************************************
  15. * Dependencies
  16. ****************************************************************/
  17. #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset */
  18. #include "../common/compiler.h"
  19. #include "../common/bitstream.h" /* BIT_* */
  20. #include "../common/fse.h" /* to compress headers */
  21. #define HUF_STATIC_LINKING_ONLY
  22. #include "../common/huf.h"
  23. #include "../common/error_private.h"
  24. /* **************************************************************
  25. * Macros
  26. ****************************************************************/
  27. /* These two optional macros force the use one way or another of the two
  28. * Huffman decompression implementations. You can't force in both directions
  29. * at the same time.
  30. */
  31. #if defined(HUF_FORCE_DECOMPRESS_X1) && \
  32. defined(HUF_FORCE_DECOMPRESS_X2)
  33. #error "Cannot force the use of the X1 and X2 decoders at the same time!"
  34. #endif
  35. /* **************************************************************
  36. * Error Management
  37. ****************************************************************/
  38. #define HUF_isError ERR_isError
  39. /* **************************************************************
  40. * Byte alignment for workSpace management
  41. ****************************************************************/
  42. #define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1)
  43. #define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
  44. /* **************************************************************
  45. * BMI2 Variant Wrappers
  46. ****************************************************************/
  47. #if DYNAMIC_BMI2
  48. #define HUF_DGEN(fn) \
  49. \
  50. static size_t fn##_default( \
  51. void* dst, size_t dstSize, \
  52. const void* cSrc, size_t cSrcSize, \
  53. const HUF_DTable* DTable) \
  54. { \
  55. return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
  56. } \
  57. \
  58. static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \
  59. void* dst, size_t dstSize, \
  60. const void* cSrc, size_t cSrcSize, \
  61. const HUF_DTable* DTable) \
  62. { \
  63. return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
  64. } \
  65. \
  66. static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
  67. size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
  68. { \
  69. if (bmi2) { \
  70. return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \
  71. } \
  72. return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \
  73. }
  74. #else
  75. #define HUF_DGEN(fn) \
  76. static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
  77. size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
  78. { \
  79. (void)bmi2; \
  80. return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
  81. }
  82. #endif
  83. /*-***************************/
  84. /* generic DTableDesc */
  85. /*-***************************/
  86. typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc;
  87. static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
  88. {
  89. DTableDesc dtd;
  90. ZSTD_memcpy(&dtd, table, sizeof(dtd));
  91. return dtd;
  92. }
  93. #ifndef HUF_FORCE_DECOMPRESS_X2
  94. /*-***************************/
  95. /* single-symbol decoding */
  96. /*-***************************/
  97. typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decoding */
  98. /*
  99. * Packs 4 HUF_DEltX1 structs into a U64. This is used to lay down 4 entries at
  100. * a time.
  101. */
  102. static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) {
  103. U64 D4;
  104. if (MEM_isLittleEndian()) {
  105. D4 = symbol + (nbBits << 8);
  106. } else {
  107. D4 = (symbol << 8) + nbBits;
  108. }
  109. D4 *= 0x0001000100010001ULL;
  110. return D4;
  111. }
  112. typedef struct {
  113. U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];
  114. U32 rankStart[HUF_TABLELOG_ABSOLUTEMAX + 1];
  115. U32 statsWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
  116. BYTE symbols[HUF_SYMBOLVALUE_MAX + 1];
  117. BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];
  118. } HUF_ReadDTableX1_Workspace;
  119. size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
  120. {
  121. return HUF_readDTableX1_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0);
  122. }
  123. size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2)
  124. {
  125. U32 tableLog = 0;
  126. U32 nbSymbols = 0;
  127. size_t iSize;
  128. void* const dtPtr = DTable + 1;
  129. HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;
  130. HUF_ReadDTableX1_Workspace* wksp = (HUF_ReadDTableX1_Workspace*)workSpace;
  131. DEBUG_STATIC_ASSERT(HUF_DECOMPRESS_WORKSPACE_SIZE >= sizeof(*wksp));
  132. if (sizeof(*wksp) > wkspSize) return ERROR(tableLog_tooLarge);
  133. DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
  134. /* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
  135. iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), bmi2);
  136. if (HUF_isError(iSize)) return iSize;
  137. /* Table header */
  138. { DTableDesc dtd = HUF_getDTableDesc(DTable);
  139. if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */
  140. dtd.tableType = 0;
  141. dtd.tableLog = (BYTE)tableLog;
  142. ZSTD_memcpy(DTable, &dtd, sizeof(dtd));
  143. }
  144. /* Compute symbols and rankStart given rankVal:
  145. *
  146. * rankVal already contains the number of values of each weight.
  147. *
  148. * symbols contains the symbols ordered by weight. First are the rankVal[0]
  149. * weight 0 symbols, followed by the rankVal[1] weight 1 symbols, and so on.
  150. * symbols[0] is filled (but unused) to avoid a branch.
  151. *
  152. * rankStart contains the offset where each rank belongs in the DTable.
  153. * rankStart[0] is not filled because there are no entries in the table for
  154. * weight 0.
  155. */
  156. {
  157. int n;
  158. int nextRankStart = 0;
  159. int const unroll = 4;
  160. int const nLimit = (int)nbSymbols - unroll + 1;
  161. for (n=0; n<(int)tableLog+1; n++) {
  162. U32 const curr = nextRankStart;
  163. nextRankStart += wksp->rankVal[n];
  164. wksp->rankStart[n] = curr;
  165. }
  166. for (n=0; n < nLimit; n += unroll) {
  167. int u;
  168. for (u=0; u < unroll; ++u) {
  169. size_t const w = wksp->huffWeight[n+u];
  170. wksp->symbols[wksp->rankStart[w]++] = (BYTE)(n+u);
  171. }
  172. }
  173. for (; n < (int)nbSymbols; ++n) {
  174. size_t const w = wksp->huffWeight[n];
  175. wksp->symbols[wksp->rankStart[w]++] = (BYTE)n;
  176. }
  177. }
  178. /* fill DTable
  179. * We fill all entries of each weight in order.
  180. * That way length is a constant for each iteration of the outter loop.
  181. * We can switch based on the length to a different inner loop which is
  182. * optimized for that particular case.
  183. */
  184. {
  185. U32 w;
  186. int symbol=wksp->rankVal[0];
  187. int rankStart=0;
  188. for (w=1; w<tableLog+1; ++w) {
  189. int const symbolCount = wksp->rankVal[w];
  190. int const length = (1 << w) >> 1;
  191. int uStart = rankStart;
  192. BYTE const nbBits = (BYTE)(tableLog + 1 - w);
  193. int s;
  194. int u;
  195. switch (length) {
  196. case 1:
  197. for (s=0; s<symbolCount; ++s) {
  198. HUF_DEltX1 D;
  199. D.byte = wksp->symbols[symbol + s];
  200. D.nbBits = nbBits;
  201. dt[uStart] = D;
  202. uStart += 1;
  203. }
  204. break;
  205. case 2:
  206. for (s=0; s<symbolCount; ++s) {
  207. HUF_DEltX1 D;
  208. D.byte = wksp->symbols[symbol + s];
  209. D.nbBits = nbBits;
  210. dt[uStart+0] = D;
  211. dt[uStart+1] = D;
  212. uStart += 2;
  213. }
  214. break;
  215. case 4:
  216. for (s=0; s<symbolCount; ++s) {
  217. U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
  218. MEM_write64(dt + uStart, D4);
  219. uStart += 4;
  220. }
  221. break;
  222. case 8:
  223. for (s=0; s<symbolCount; ++s) {
  224. U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
  225. MEM_write64(dt + uStart, D4);
  226. MEM_write64(dt + uStart + 4, D4);
  227. uStart += 8;
  228. }
  229. break;
  230. default:
  231. for (s=0; s<symbolCount; ++s) {
  232. U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
  233. for (u=0; u < length; u += 16) {
  234. MEM_write64(dt + uStart + u + 0, D4);
  235. MEM_write64(dt + uStart + u + 4, D4);
  236. MEM_write64(dt + uStart + u + 8, D4);
  237. MEM_write64(dt + uStart + u + 12, D4);
  238. }
  239. assert(u == length);
  240. uStart += length;
  241. }
  242. break;
  243. }
  244. symbol += symbolCount;
  245. rankStart += symbolCount * length;
  246. }
  247. }
  248. return iSize;
  249. }
  250. FORCE_INLINE_TEMPLATE BYTE
  251. HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)
  252. {
  253. size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
  254. BYTE const c = dt[val].byte;
  255. BIT_skipBits(Dstream, dt[val].nbBits);
  256. return c;
  257. }
  258. #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
  259. *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
  260. #define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \
  261. if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
  262. HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
  263. #define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
  264. if (MEM_64bits()) \
  265. HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
  266. HINT_INLINE size_t
  267. HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
  268. {
  269. BYTE* const pStart = p;
  270. /* up to 4 symbols at a time */
  271. while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
  272. HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
  273. HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
  274. HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
  275. HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
  276. }
  277. /* [0-3] symbols remaining */
  278. if (MEM_32bits())
  279. while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))
  280. HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
  281. /* no more data to retrieve from bitstream, no need to reload */
  282. while (p < pEnd)
  283. HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
  284. return pEnd-pStart;
  285. }
  286. FORCE_INLINE_TEMPLATE size_t
  287. HUF_decompress1X1_usingDTable_internal_body(
  288. void* dst, size_t dstSize,
  289. const void* cSrc, size_t cSrcSize,
  290. const HUF_DTable* DTable)
  291. {
  292. BYTE* op = (BYTE*)dst;
  293. BYTE* const oend = op + dstSize;
  294. const void* dtPtr = DTable + 1;
  295. const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
  296. BIT_DStream_t bitD;
  297. DTableDesc const dtd = HUF_getDTableDesc(DTable);
  298. U32 const dtLog = dtd.tableLog;
  299. CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
  300. HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog);
  301. if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
  302. return dstSize;
  303. }
  304. FORCE_INLINE_TEMPLATE size_t
  305. HUF_decompress4X1_usingDTable_internal_body(
  306. void* dst, size_t dstSize,
  307. const void* cSrc, size_t cSrcSize,
  308. const HUF_DTable* DTable)
  309. {
  310. /* Check */
  311. if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
  312. { const BYTE* const istart = (const BYTE*) cSrc;
  313. BYTE* const ostart = (BYTE*) dst;
  314. BYTE* const oend = ostart + dstSize;
  315. BYTE* const olimit = oend - 3;
  316. const void* const dtPtr = DTable + 1;
  317. const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
  318. /* Init */
  319. BIT_DStream_t bitD1;
  320. BIT_DStream_t bitD2;
  321. BIT_DStream_t bitD3;
  322. BIT_DStream_t bitD4;
  323. size_t const length1 = MEM_readLE16(istart);
  324. size_t const length2 = MEM_readLE16(istart+2);
  325. size_t const length3 = MEM_readLE16(istart+4);
  326. size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
  327. const BYTE* const istart1 = istart + 6; /* jumpTable */
  328. const BYTE* const istart2 = istart1 + length1;
  329. const BYTE* const istart3 = istart2 + length2;
  330. const BYTE* const istart4 = istart3 + length3;
  331. const size_t segmentSize = (dstSize+3) / 4;
  332. BYTE* const opStart2 = ostart + segmentSize;
  333. BYTE* const opStart3 = opStart2 + segmentSize;
  334. BYTE* const opStart4 = opStart3 + segmentSize;
  335. BYTE* op1 = ostart;
  336. BYTE* op2 = opStart2;
  337. BYTE* op3 = opStart3;
  338. BYTE* op4 = opStart4;
  339. DTableDesc const dtd = HUF_getDTableDesc(DTable);
  340. U32 const dtLog = dtd.tableLog;
  341. U32 endSignal = 1;
  342. if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
  343. CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
  344. CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
  345. CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
  346. CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
  347. /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
  348. for ( ; (endSignal) & (op4 < olimit) ; ) {
  349. HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
  350. HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
  351. HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
  352. HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
  353. HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
  354. HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
  355. HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
  356. HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
  357. HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
  358. HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
  359. HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
  360. HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
  361. HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
  362. HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
  363. HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
  364. HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
  365. endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
  366. endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
  367. endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
  368. endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
  369. }
  370. /* check corruption */
  371. /* note : should not be necessary : op# advance in lock step, and we control op4.
  372. * but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */
  373. if (op1 > opStart2) return ERROR(corruption_detected);
  374. if (op2 > opStart3) return ERROR(corruption_detected);
  375. if (op3 > opStart4) return ERROR(corruption_detected);
  376. /* note : op4 supposed already verified within main loop */
  377. /* finish bitStreams one by one */
  378. HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog);
  379. HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog);
  380. HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog);
  381. HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog);
  382. /* check */
  383. { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
  384. if (!endCheck) return ERROR(corruption_detected); }
  385. /* decoded size */
  386. return dstSize;
  387. }
  388. }
  389. typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
  390. const void *cSrc,
  391. size_t cSrcSize,
  392. const HUF_DTable *DTable);
  393. HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
  394. HUF_DGEN(HUF_decompress4X1_usingDTable_internal)
  395. size_t HUF_decompress1X1_usingDTable(
  396. void* dst, size_t dstSize,
  397. const void* cSrc, size_t cSrcSize,
  398. const HUF_DTable* DTable)
  399. {
  400. DTableDesc dtd = HUF_getDTableDesc(DTable);
  401. if (dtd.tableType != 0) return ERROR(GENERIC);
  402. return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
  403. }
  404. size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
  405. const void* cSrc, size_t cSrcSize,
  406. void* workSpace, size_t wkspSize)
  407. {
  408. const BYTE* ip = (const BYTE*) cSrc;
  409. size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
  410. if (HUF_isError(hSize)) return hSize;
  411. if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
  412. ip += hSize; cSrcSize -= hSize;
  413. return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
  414. }
  415. size_t HUF_decompress4X1_usingDTable(
  416. void* dst, size_t dstSize,
  417. const void* cSrc, size_t cSrcSize,
  418. const HUF_DTable* DTable)
  419. {
  420. DTableDesc dtd = HUF_getDTableDesc(DTable);
  421. if (dtd.tableType != 0) return ERROR(GENERIC);
  422. return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
  423. }
  424. static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
  425. const void* cSrc, size_t cSrcSize,
  426. void* workSpace, size_t wkspSize, int bmi2)
  427. {
  428. const BYTE* ip = (const BYTE*) cSrc;
  429. size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
  430. if (HUF_isError(hSize)) return hSize;
  431. if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
  432. ip += hSize; cSrcSize -= hSize;
  433. return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
  434. }
  435. size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
  436. const void* cSrc, size_t cSrcSize,
  437. void* workSpace, size_t wkspSize)
  438. {
  439. return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
  440. }
  441. #endif /* HUF_FORCE_DECOMPRESS_X2 */
  442. #ifndef HUF_FORCE_DECOMPRESS_X1
  443. /* *************************/
  444. /* double-symbols decoding */
  445. /* *************************/
  446. typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */
  447. typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
  448. typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
  449. typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
  450. /* HUF_fillDTableX2Level2() :
  451. * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
  452. static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed,
  453. const U32* rankValOrigin, const int minWeight,
  454. const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
  455. U32 nbBitsBaseline, U16 baseSeq, U32* wksp, size_t wkspSize)
  456. {
  457. HUF_DEltX2 DElt;
  458. U32* rankVal = wksp;
  459. assert(wkspSize >= HUF_TABLELOG_MAX + 1);
  460. (void)wkspSize;
  461. /* get pre-calculated rankVal */
  462. ZSTD_memcpy(rankVal, rankValOrigin, sizeof(U32) * (HUF_TABLELOG_MAX + 1));
  463. /* fill skipped values */
  464. if (minWeight>1) {
  465. U32 i, skipSize = rankVal[minWeight];
  466. MEM_writeLE16(&(DElt.sequence), baseSeq);
  467. DElt.nbBits = (BYTE)(consumed);
  468. DElt.length = 1;
  469. for (i = 0; i < skipSize; i++)
  470. DTable[i] = DElt;
  471. }
  472. /* fill DTable */
  473. { U32 s; for (s=0; s<sortedListSize; s++) { /* note : sortedSymbols already skipped */
  474. const U32 symbol = sortedSymbols[s].symbol;
  475. const U32 weight = sortedSymbols[s].weight;
  476. const U32 nbBits = nbBitsBaseline - weight;
  477. const U32 length = 1 << (sizeLog-nbBits);
  478. const U32 start = rankVal[weight];
  479. U32 i = start;
  480. const U32 end = start + length;
  481. MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
  482. DElt.nbBits = (BYTE)(nbBits + consumed);
  483. DElt.length = 2;
  484. do { DTable[i++] = DElt; } while (i<end); /* since length >= 1 */
  485. rankVal[weight] += length;
  486. } }
  487. }
  488. static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
  489. const sortedSymbol_t* sortedList, const U32 sortedListSize,
  490. const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
  491. const U32 nbBitsBaseline, U32* wksp, size_t wkspSize)
  492. {
  493. U32* rankVal = wksp;
  494. const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */
  495. const U32 minBits = nbBitsBaseline - maxWeight;
  496. U32 s;
  497. assert(wkspSize >= HUF_TABLELOG_MAX + 1);
  498. wksp += HUF_TABLELOG_MAX + 1;
  499. wkspSize -= HUF_TABLELOG_MAX + 1;
  500. ZSTD_memcpy(rankVal, rankValOrigin, sizeof(U32) * (HUF_TABLELOG_MAX + 1));
  501. /* fill DTable */
  502. for (s=0; s<sortedListSize; s++) {
  503. const U16 symbol = sortedList[s].symbol;
  504. const U32 weight = sortedList[s].weight;
  505. const U32 nbBits = nbBitsBaseline - weight;
  506. const U32 start = rankVal[weight];
  507. const U32 length = 1 << (targetLog-nbBits);
  508. if (targetLog-nbBits >= minBits) { /* enough room for a second symbol */
  509. U32 sortedRank;
  510. int minWeight = nbBits + scaleLog;
  511. if (minWeight < 1) minWeight = 1;
  512. sortedRank = rankStart[minWeight];
  513. HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits,
  514. rankValOrigin[nbBits], minWeight,
  515. sortedList+sortedRank, sortedListSize-sortedRank,
  516. nbBitsBaseline, symbol, wksp, wkspSize);
  517. } else {
  518. HUF_DEltX2 DElt;
  519. MEM_writeLE16(&(DElt.sequence), symbol);
  520. DElt.nbBits = (BYTE)(nbBits);
  521. DElt.length = 1;
  522. { U32 const end = start + length;
  523. U32 u;
  524. for (u = start; u < end; u++) DTable[u] = DElt;
  525. } }
  526. rankVal[weight] += length;
  527. }
  528. }
  529. typedef struct {
  530. rankValCol_t rankVal[HUF_TABLELOG_MAX];
  531. U32 rankStats[HUF_TABLELOG_MAX + 1];
  532. U32 rankStart0[HUF_TABLELOG_MAX + 2];
  533. sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1];
  534. BYTE weightList[HUF_SYMBOLVALUE_MAX + 1];
  535. U32 calleeWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
  536. } HUF_ReadDTableX2_Workspace;
  537. size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
  538. const void* src, size_t srcSize,
  539. void* workSpace, size_t wkspSize)
  540. {
  541. U32 tableLog, maxW, sizeOfSort, nbSymbols;
  542. DTableDesc dtd = HUF_getDTableDesc(DTable);
  543. U32 const maxTableLog = dtd.maxTableLog;
  544. size_t iSize;
  545. void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */
  546. HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
  547. U32 *rankStart;
  548. HUF_ReadDTableX2_Workspace* const wksp = (HUF_ReadDTableX2_Workspace*)workSpace;
  549. if (sizeof(*wksp) > wkspSize) return ERROR(GENERIC);
  550. rankStart = wksp->rankStart0 + 1;
  551. ZSTD_memset(wksp->rankStats, 0, sizeof(wksp->rankStats));
  552. ZSTD_memset(wksp->rankStart0, 0, sizeof(wksp->rankStart0));
  553. DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
  554. if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
  555. /* ZSTD_memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
  556. iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), /* bmi2 */ 0);
  557. if (HUF_isError(iSize)) return iSize;
  558. /* check result */
  559. if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */
  560. /* find maxWeight */
  561. for (maxW = tableLog; wksp->rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */
  562. /* Get start index of each weight */
  563. { U32 w, nextRankStart = 0;
  564. for (w=1; w<maxW+1; w++) {
  565. U32 curr = nextRankStart;
  566. nextRankStart += wksp->rankStats[w];
  567. rankStart[w] = curr;
  568. }
  569. rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/
  570. sizeOfSort = nextRankStart;
  571. }
  572. /* sort symbols by weight */
  573. { U32 s;
  574. for (s=0; s<nbSymbols; s++) {
  575. U32 const w = wksp->weightList[s];
  576. U32 const r = rankStart[w]++;
  577. wksp->sortedSymbol[r].symbol = (BYTE)s;
  578. wksp->sortedSymbol[r].weight = (BYTE)w;
  579. }
  580. rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
  581. }
  582. /* Build rankVal */
  583. { U32* const rankVal0 = wksp->rankVal[0];
  584. { int const rescale = (maxTableLog-tableLog) - 1; /* tableLog <= maxTableLog */
  585. U32 nextRankVal = 0;
  586. U32 w;
  587. for (w=1; w<maxW+1; w++) {
  588. U32 curr = nextRankVal;
  589. nextRankVal += wksp->rankStats[w] << (w+rescale);
  590. rankVal0[w] = curr;
  591. } }
  592. { U32 const minBits = tableLog+1 - maxW;
  593. U32 consumed;
  594. for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
  595. U32* const rankValPtr = wksp->rankVal[consumed];
  596. U32 w;
  597. for (w = 1; w < maxW+1; w++) {
  598. rankValPtr[w] = rankVal0[w] >> consumed;
  599. } } } }
  600. HUF_fillDTableX2(dt, maxTableLog,
  601. wksp->sortedSymbol, sizeOfSort,
  602. wksp->rankStart0, wksp->rankVal, maxW,
  603. tableLog+1,
  604. wksp->calleeWksp, sizeof(wksp->calleeWksp) / sizeof(U32));
  605. dtd.tableLog = (BYTE)maxTableLog;
  606. dtd.tableType = 1;
  607. ZSTD_memcpy(DTable, &dtd, sizeof(dtd));
  608. return iSize;
  609. }
  610. FORCE_INLINE_TEMPLATE U32
  611. HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
  612. {
  613. size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
  614. ZSTD_memcpy(op, dt+val, 2);
  615. BIT_skipBits(DStream, dt[val].nbBits);
  616. return dt[val].length;
  617. }
  618. FORCE_INLINE_TEMPLATE U32
  619. HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
  620. {
  621. size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
  622. ZSTD_memcpy(op, dt+val, 1);
  623. if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
  624. else {
  625. if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
  626. BIT_skipBits(DStream, dt[val].nbBits);
  627. if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
  628. /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
  629. DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
  630. } }
  631. return 1;
  632. }
  633. #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
  634. ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
  635. #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
  636. if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
  637. ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
  638. #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
  639. if (MEM_64bits()) \
  640. ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
  641. HINT_INLINE size_t
  642. HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
  643. const HUF_DEltX2* const dt, const U32 dtLog)
  644. {
  645. BYTE* const pStart = p;
  646. /* up to 8 symbols at a time */
  647. while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
  648. HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
  649. HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
  650. HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
  651. HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
  652. }
  653. /* closer to end : up to 2 symbols at a time */
  654. while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
  655. HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
  656. while (p <= pEnd-2)
  657. HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
  658. if (p < pEnd)
  659. p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);
  660. return p-pStart;
  661. }
  662. FORCE_INLINE_TEMPLATE size_t
  663. HUF_decompress1X2_usingDTable_internal_body(
  664. void* dst, size_t dstSize,
  665. const void* cSrc, size_t cSrcSize,
  666. const HUF_DTable* DTable)
  667. {
  668. BIT_DStream_t bitD;
  669. /* Init */
  670. CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
  671. /* decode */
  672. { BYTE* const ostart = (BYTE*) dst;
  673. BYTE* const oend = ostart + dstSize;
  674. const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
  675. const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
  676. DTableDesc const dtd = HUF_getDTableDesc(DTable);
  677. HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog);
  678. }
  679. /* check */
  680. if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
  681. /* decoded size */
  682. return dstSize;
  683. }
  684. FORCE_INLINE_TEMPLATE size_t
  685. HUF_decompress4X2_usingDTable_internal_body(
  686. void* dst, size_t dstSize,
  687. const void* cSrc, size_t cSrcSize,
  688. const HUF_DTable* DTable)
  689. {
  690. if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
  691. { const BYTE* const istart = (const BYTE*) cSrc;
  692. BYTE* const ostart = (BYTE*) dst;
  693. BYTE* const oend = ostart + dstSize;
  694. BYTE* const olimit = oend - (sizeof(size_t)-1);
  695. const void* const dtPtr = DTable+1;
  696. const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
  697. /* Init */
  698. BIT_DStream_t bitD1;
  699. BIT_DStream_t bitD2;
  700. BIT_DStream_t bitD3;
  701. BIT_DStream_t bitD4;
  702. size_t const length1 = MEM_readLE16(istart);
  703. size_t const length2 = MEM_readLE16(istart+2);
  704. size_t const length3 = MEM_readLE16(istart+4);
  705. size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
  706. const BYTE* const istart1 = istart + 6; /* jumpTable */
  707. const BYTE* const istart2 = istart1 + length1;
  708. const BYTE* const istart3 = istart2 + length2;
  709. const BYTE* const istart4 = istart3 + length3;
  710. size_t const segmentSize = (dstSize+3) / 4;
  711. BYTE* const opStart2 = ostart + segmentSize;
  712. BYTE* const opStart3 = opStart2 + segmentSize;
  713. BYTE* const opStart4 = opStart3 + segmentSize;
  714. BYTE* op1 = ostart;
  715. BYTE* op2 = opStart2;
  716. BYTE* op3 = opStart3;
  717. BYTE* op4 = opStart4;
  718. U32 endSignal = 1;
  719. DTableDesc const dtd = HUF_getDTableDesc(DTable);
  720. U32 const dtLog = dtd.tableLog;
  721. if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
  722. CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
  723. CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
  724. CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
  725. CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
  726. /* 16-32 symbols per loop (4-8 symbols per stream) */
  727. for ( ; (endSignal) & (op4 < olimit); ) {
  728. #if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
  729. HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
  730. HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
  731. HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
  732. HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
  733. HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
  734. HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
  735. HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
  736. HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
  737. endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
  738. endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
  739. HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
  740. HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
  741. HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
  742. HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
  743. HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
  744. HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
  745. HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
  746. HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
  747. endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
  748. endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
  749. #else
  750. HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
  751. HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
  752. HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
  753. HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
  754. HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
  755. HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
  756. HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
  757. HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
  758. HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
  759. HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
  760. HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
  761. HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
  762. HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
  763. HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
  764. HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
  765. HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
  766. endSignal = (U32)LIKELY((U32)
  767. (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished)
  768. & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished)
  769. & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished)
  770. & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished));
  771. #endif
  772. }
  773. /* check corruption */
  774. if (op1 > opStart2) return ERROR(corruption_detected);
  775. if (op2 > opStart3) return ERROR(corruption_detected);
  776. if (op3 > opStart4) return ERROR(corruption_detected);
  777. /* note : op4 already verified within main loop */
  778. /* finish bitStreams one by one */
  779. HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
  780. HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
  781. HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
  782. HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
  783. /* check */
  784. { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
  785. if (!endCheck) return ERROR(corruption_detected); }
  786. /* decoded size */
  787. return dstSize;
  788. }
  789. }
  790. HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
  791. HUF_DGEN(HUF_decompress4X2_usingDTable_internal)
  792. size_t HUF_decompress1X2_usingDTable(
  793. void* dst, size_t dstSize,
  794. const void* cSrc, size_t cSrcSize,
  795. const HUF_DTable* DTable)
  796. {
  797. DTableDesc dtd = HUF_getDTableDesc(DTable);
  798. if (dtd.tableType != 1) return ERROR(GENERIC);
  799. return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
  800. }
  801. size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
  802. const void* cSrc, size_t cSrcSize,
  803. void* workSpace, size_t wkspSize)
  804. {
  805. const BYTE* ip = (const BYTE*) cSrc;
  806. size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
  807. workSpace, wkspSize);
  808. if (HUF_isError(hSize)) return hSize;
  809. if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
  810. ip += hSize; cSrcSize -= hSize;
  811. return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
  812. }
  813. size_t HUF_decompress4X2_usingDTable(
  814. void* dst, size_t dstSize,
  815. const void* cSrc, size_t cSrcSize,
  816. const HUF_DTable* DTable)
  817. {
  818. DTableDesc dtd = HUF_getDTableDesc(DTable);
  819. if (dtd.tableType != 1) return ERROR(GENERIC);
  820. return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
  821. }
  822. static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
  823. const void* cSrc, size_t cSrcSize,
  824. void* workSpace, size_t wkspSize, int bmi2)
  825. {
  826. const BYTE* ip = (const BYTE*) cSrc;
  827. size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
  828. workSpace, wkspSize);
  829. if (HUF_isError(hSize)) return hSize;
  830. if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
  831. ip += hSize; cSrcSize -= hSize;
  832. return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
  833. }
  834. size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
  835. const void* cSrc, size_t cSrcSize,
  836. void* workSpace, size_t wkspSize)
  837. {
  838. return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
  839. }
  840. #endif /* HUF_FORCE_DECOMPRESS_X1 */
  841. /* ***********************************/
  842. /* Universal decompression selectors */
  843. /* ***********************************/
  844. size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
  845. const void* cSrc, size_t cSrcSize,
  846. const HUF_DTable* DTable)
  847. {
  848. DTableDesc const dtd = HUF_getDTableDesc(DTable);
  849. #if defined(HUF_FORCE_DECOMPRESS_X1)
  850. (void)dtd;
  851. assert(dtd.tableType == 0);
  852. return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
  853. #elif defined(HUF_FORCE_DECOMPRESS_X2)
  854. (void)dtd;
  855. assert(dtd.tableType == 1);
  856. return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
  857. #else
  858. return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
  859. HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
  860. #endif
  861. }
  862. size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
  863. const void* cSrc, size_t cSrcSize,
  864. const HUF_DTable* DTable)
  865. {
  866. DTableDesc const dtd = HUF_getDTableDesc(DTable);
  867. #if defined(HUF_FORCE_DECOMPRESS_X1)
  868. (void)dtd;
  869. assert(dtd.tableType == 0);
  870. return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
  871. #elif defined(HUF_FORCE_DECOMPRESS_X2)
  872. (void)dtd;
  873. assert(dtd.tableType == 1);
  874. return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
  875. #else
  876. return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
  877. HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
  878. #endif
  879. }
  880. #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
  881. typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
  882. static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
  883. {
  884. /* single, double, quad */
  885. {{0,0}, {1,1}, {2,2}}, /* Q==0 : impossible */
  886. {{0,0}, {1,1}, {2,2}}, /* Q==1 : impossible */
  887. {{ 38,130}, {1313, 74}, {2151, 38}}, /* Q == 2 : 12-18% */
  888. {{ 448,128}, {1353, 74}, {2238, 41}}, /* Q == 3 : 18-25% */
  889. {{ 556,128}, {1353, 74}, {2238, 47}}, /* Q == 4 : 25-32% */
  890. {{ 714,128}, {1418, 74}, {2436, 53}}, /* Q == 5 : 32-38% */
  891. {{ 883,128}, {1437, 74}, {2464, 61}}, /* Q == 6 : 38-44% */
  892. {{ 897,128}, {1515, 75}, {2622, 68}}, /* Q == 7 : 44-50% */
  893. {{ 926,128}, {1613, 75}, {2730, 75}}, /* Q == 8 : 50-56% */
  894. {{ 947,128}, {1729, 77}, {3359, 77}}, /* Q == 9 : 56-62% */
  895. {{1107,128}, {2083, 81}, {4006, 84}}, /* Q ==10 : 62-69% */
  896. {{1177,128}, {2379, 87}, {4785, 88}}, /* Q ==11 : 69-75% */
  897. {{1242,128}, {2415, 93}, {5155, 84}}, /* Q ==12 : 75-81% */
  898. {{1349,128}, {2644,106}, {5260,106}}, /* Q ==13 : 81-87% */
  899. {{1455,128}, {2422,124}, {4174,124}}, /* Q ==14 : 87-93% */
  900. {{ 722,128}, {1891,145}, {1936,146}}, /* Q ==15 : 93-99% */
  901. };
  902. #endif
  903. /* HUF_selectDecoder() :
  904. * Tells which decoder is likely to decode faster,
  905. * based on a set of pre-computed metrics.
  906. * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
  907. * Assumption : 0 < dstSize <= 128 KB */
  908. U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
  909. {
  910. assert(dstSize > 0);
  911. assert(dstSize <= 128*1024);
  912. #if defined(HUF_FORCE_DECOMPRESS_X1)
  913. (void)dstSize;
  914. (void)cSrcSize;
  915. return 0;
  916. #elif defined(HUF_FORCE_DECOMPRESS_X2)
  917. (void)dstSize;
  918. (void)cSrcSize;
  919. return 1;
  920. #else
  921. /* decoder timing evaluation */
  922. { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */
  923. U32 const D256 = (U32)(dstSize >> 8);
  924. U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
  925. U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
  926. DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, to reduce cache eviction */
  927. return DTime1 < DTime0;
  928. }
  929. #endif
  930. }
  931. size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
  932. size_t dstSize, const void* cSrc,
  933. size_t cSrcSize, void* workSpace,
  934. size_t wkspSize)
  935. {
  936. /* validation checks */
  937. if (dstSize == 0) return ERROR(dstSize_tooSmall);
  938. if (cSrcSize == 0) return ERROR(corruption_detected);
  939. { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
  940. #if defined(HUF_FORCE_DECOMPRESS_X1)
  941. (void)algoNb;
  942. assert(algoNb == 0);
  943. return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
  944. #elif defined(HUF_FORCE_DECOMPRESS_X2)
  945. (void)algoNb;
  946. assert(algoNb == 1);
  947. return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
  948. #else
  949. return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
  950. cSrcSize, workSpace, wkspSize):
  951. HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
  952. #endif
  953. }
  954. }
  955. size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
  956. const void* cSrc, size_t cSrcSize,
  957. void* workSpace, size_t wkspSize)
  958. {
  959. /* validation checks */
  960. if (dstSize == 0) return ERROR(dstSize_tooSmall);
  961. if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
  962. if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
  963. if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
  964. { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
  965. #if defined(HUF_FORCE_DECOMPRESS_X1)
  966. (void)algoNb;
  967. assert(algoNb == 0);
  968. return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
  969. cSrcSize, workSpace, wkspSize);
  970. #elif defined(HUF_FORCE_DECOMPRESS_X2)
  971. (void)algoNb;
  972. assert(algoNb == 1);
  973. return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
  974. cSrcSize, workSpace, wkspSize);
  975. #else
  976. return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
  977. cSrcSize, workSpace, wkspSize):
  978. HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
  979. cSrcSize, workSpace, wkspSize);
  980. #endif
  981. }
  982. }
  983. size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
  984. {
  985. DTableDesc const dtd = HUF_getDTableDesc(DTable);
  986. #if defined(HUF_FORCE_DECOMPRESS_X1)
  987. (void)dtd;
  988. assert(dtd.tableType == 0);
  989. return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
  990. #elif defined(HUF_FORCE_DECOMPRESS_X2)
  991. (void)dtd;
  992. assert(dtd.tableType == 1);
  993. return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
  994. #else
  995. return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
  996. HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
  997. #endif
  998. }
  999. #ifndef HUF_FORCE_DECOMPRESS_X2
  1000. size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
  1001. {
  1002. const BYTE* ip = (const BYTE*) cSrc;
  1003. size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
  1004. if (HUF_isError(hSize)) return hSize;
  1005. if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
  1006. ip += hSize; cSrcSize -= hSize;
  1007. return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
  1008. }
  1009. #endif
  1010. size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
  1011. {
  1012. DTableDesc const dtd = HUF_getDTableDesc(DTable);
  1013. #if defined(HUF_FORCE_DECOMPRESS_X1)
  1014. (void)dtd;
  1015. assert(dtd.tableType == 0);
  1016. return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
  1017. #elif defined(HUF_FORCE_DECOMPRESS_X2)
  1018. (void)dtd;
  1019. assert(dtd.tableType == 1);
  1020. return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
  1021. #else
  1022. return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
  1023. HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
  1024. #endif
  1025. }
  1026. size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
  1027. {
  1028. /* validation checks */
  1029. if (dstSize == 0) return ERROR(dstSize_tooSmall);
  1030. if (cSrcSize == 0) return ERROR(corruption_detected);
  1031. { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
  1032. #if defined(HUF_FORCE_DECOMPRESS_X1)
  1033. (void)algoNb;
  1034. assert(algoNb == 0);
  1035. return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
  1036. #elif defined(HUF_FORCE_DECOMPRESS_X2)
  1037. (void)algoNb;
  1038. assert(algoNb == 1);
  1039. return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
  1040. #else
  1041. return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
  1042. HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
  1043. #endif
  1044. }
  1045. }