zstd_lazy.c 62 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414
  1. /*
  2. * Copyright (c) Yann Collet, Facebook, Inc.
  3. * All rights reserved.
  4. *
  5. * This source code is licensed under both the BSD-style license (found in the
  6. * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  7. * in the COPYING file in the root directory of this source tree).
  8. * You may select, at your option, one of the above-listed licenses.
  9. */
  10. #include "zstd_compress_internal.h"
  11. #include "zstd_lazy.h"
  12. /*-*************************************
  13. * Binary Tree search
  14. ***************************************/
  15. static void
  16. ZSTD_updateDUBT(ZSTD_matchState_t* ms,
  17. const BYTE* ip, const BYTE* iend,
  18. U32 mls)
  19. {
  20. const ZSTD_compressionParameters* const cParams = &ms->cParams;
  21. U32* const hashTable = ms->hashTable;
  22. U32 const hashLog = cParams->hashLog;
  23. U32* const bt = ms->chainTable;
  24. U32 const btLog = cParams->chainLog - 1;
  25. U32 const btMask = (1 << btLog) - 1;
  26. const BYTE* const base = ms->window.base;
  27. U32 const target = (U32)(ip - base);
  28. U32 idx = ms->nextToUpdate;
  29. if (idx != target)
  30. DEBUGLOG(7, "ZSTD_updateDUBT, from %u to %u (dictLimit:%u)",
  31. idx, target, ms->window.dictLimit);
  32. assert(ip + 8 <= iend); /* condition for ZSTD_hashPtr */
  33. (void)iend;
  34. assert(idx >= ms->window.dictLimit); /* condition for valid base+idx */
  35. for ( ; idx < target ; idx++) {
  36. size_t const h = ZSTD_hashPtr(base + idx, hashLog, mls); /* assumption : ip + 8 <= iend */
  37. U32 const matchIndex = hashTable[h];
  38. U32* const nextCandidatePtr = bt + 2*(idx&btMask);
  39. U32* const sortMarkPtr = nextCandidatePtr + 1;
  40. DEBUGLOG(8, "ZSTD_updateDUBT: insert %u", idx);
  41. hashTable[h] = idx; /* Update Hash Table */
  42. *nextCandidatePtr = matchIndex; /* update BT like a chain */
  43. *sortMarkPtr = ZSTD_DUBT_UNSORTED_MARK;
  44. }
  45. ms->nextToUpdate = target;
  46. }
  47. /* ZSTD_insertDUBT1() :
  48. * sort one already inserted but unsorted position
  49. * assumption : curr >= btlow == (curr - btmask)
  50. * doesn't fail */
  51. static void
  52. ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
  53. U32 curr, const BYTE* inputEnd,
  54. U32 nbCompares, U32 btLow,
  55. const ZSTD_dictMode_e dictMode)
  56. {
  57. const ZSTD_compressionParameters* const cParams = &ms->cParams;
  58. U32* const bt = ms->chainTable;
  59. U32 const btLog = cParams->chainLog - 1;
  60. U32 const btMask = (1 << btLog) - 1;
  61. size_t commonLengthSmaller=0, commonLengthLarger=0;
  62. const BYTE* const base = ms->window.base;
  63. const BYTE* const dictBase = ms->window.dictBase;
  64. const U32 dictLimit = ms->window.dictLimit;
  65. const BYTE* const ip = (curr>=dictLimit) ? base + curr : dictBase + curr;
  66. const BYTE* const iend = (curr>=dictLimit) ? inputEnd : dictBase + dictLimit;
  67. const BYTE* const dictEnd = dictBase + dictLimit;
  68. const BYTE* const prefixStart = base + dictLimit;
  69. const BYTE* match;
  70. U32* smallerPtr = bt + 2*(curr&btMask);
  71. U32* largerPtr = smallerPtr + 1;
  72. U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */
  73. U32 dummy32; /* to be nullified at the end */
  74. U32 const windowValid = ms->window.lowLimit;
  75. U32 const maxDistance = 1U << cParams->windowLog;
  76. U32 const windowLow = (curr - windowValid > maxDistance) ? curr - maxDistance : windowValid;
  77. DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
  78. curr, dictLimit, windowLow);
  79. assert(curr >= btLow);
  80. assert(ip < iend); /* condition for ZSTD_count */
  81. for (; nbCompares && (matchIndex > windowLow); --nbCompares) {
  82. U32* const nextPtr = bt + 2*(matchIndex & btMask);
  83. size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
  84. assert(matchIndex < curr);
  85. /* note : all candidates are now supposed sorted,
  86. * but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK
  87. * when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */
  88. if ( (dictMode != ZSTD_extDict)
  89. || (matchIndex+matchLength >= dictLimit) /* both in current segment*/
  90. || (curr < dictLimit) /* both in extDict */) {
  91. const BYTE* const mBase = ( (dictMode != ZSTD_extDict)
  92. || (matchIndex+matchLength >= dictLimit)) ?
  93. base : dictBase;
  94. assert( (matchIndex+matchLength >= dictLimit) /* might be wrong if extDict is incorrectly set to 0 */
  95. || (curr < dictLimit) );
  96. match = mBase + matchIndex;
  97. matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
  98. } else {
  99. match = dictBase + matchIndex;
  100. matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
  101. if (matchIndex+matchLength >= dictLimit)
  102. match = base + matchIndex; /* preparation for next read of match[matchLength] */
  103. }
  104. DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ",
  105. curr, matchIndex, (U32)matchLength);
  106. if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
  107. break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
  108. }
  109. if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */
  110. /* match is smaller than current */
  111. *smallerPtr = matchIndex; /* update smaller idx */
  112. commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
  113. if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */
  114. DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is smaller : next => %u",
  115. matchIndex, btLow, nextPtr[1]);
  116. smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */
  117. matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */
  118. } else {
  119. /* match is larger than current */
  120. *largerPtr = matchIndex;
  121. commonLengthLarger = matchLength;
  122. if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */
  123. DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is larger => %u",
  124. matchIndex, btLow, nextPtr[0]);
  125. largerPtr = nextPtr;
  126. matchIndex = nextPtr[0];
  127. } }
  128. *smallerPtr = *largerPtr = 0;
  129. }
  130. static size_t
  131. ZSTD_DUBT_findBetterDictMatch (
  132. ZSTD_matchState_t* ms,
  133. const BYTE* const ip, const BYTE* const iend,
  134. size_t* offsetPtr,
  135. size_t bestLength,
  136. U32 nbCompares,
  137. U32 const mls,
  138. const ZSTD_dictMode_e dictMode)
  139. {
  140. const ZSTD_matchState_t * const dms = ms->dictMatchState;
  141. const ZSTD_compressionParameters* const dmsCParams = &dms->cParams;
  142. const U32 * const dictHashTable = dms->hashTable;
  143. U32 const hashLog = dmsCParams->hashLog;
  144. size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
  145. U32 dictMatchIndex = dictHashTable[h];
  146. const BYTE* const base = ms->window.base;
  147. const BYTE* const prefixStart = base + ms->window.dictLimit;
  148. U32 const curr = (U32)(ip-base);
  149. const BYTE* const dictBase = dms->window.base;
  150. const BYTE* const dictEnd = dms->window.nextSrc;
  151. U32 const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base);
  152. U32 const dictLowLimit = dms->window.lowLimit;
  153. U32 const dictIndexDelta = ms->window.lowLimit - dictHighLimit;
  154. U32* const dictBt = dms->chainTable;
  155. U32 const btLog = dmsCParams->chainLog - 1;
  156. U32 const btMask = (1 << btLog) - 1;
  157. U32 const btLow = (btMask >= dictHighLimit - dictLowLimit) ? dictLowLimit : dictHighLimit - btMask;
  158. size_t commonLengthSmaller=0, commonLengthLarger=0;
  159. (void)dictMode;
  160. assert(dictMode == ZSTD_dictMatchState);
  161. for (; nbCompares && (dictMatchIndex > dictLowLimit); --nbCompares) {
  162. U32* const nextPtr = dictBt + 2*(dictMatchIndex & btMask);
  163. size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
  164. const BYTE* match = dictBase + dictMatchIndex;
  165. matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
  166. if (dictMatchIndex+matchLength >= dictHighLimit)
  167. match = base + dictMatchIndex + dictIndexDelta; /* to prepare for next usage of match[matchLength] */
  168. if (matchLength > bestLength) {
  169. U32 matchIndex = dictMatchIndex + dictIndexDelta;
  170. if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
  171. DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)",
  172. curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + curr - matchIndex, dictMatchIndex, matchIndex);
  173. bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex;
  174. }
  175. if (ip+matchLength == iend) { /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */
  176. break; /* drop, to guarantee consistency (miss a little bit of compression) */
  177. }
  178. }
  179. if (match[matchLength] < ip[matchLength]) {
  180. if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */
  181. commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
  182. dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
  183. } else {
  184. /* match is larger than current */
  185. if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */
  186. commonLengthLarger = matchLength;
  187. dictMatchIndex = nextPtr[0];
  188. }
  189. }
  190. if (bestLength >= MINMATCH) {
  191. U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
  192. DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
  193. curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
  194. }
  195. return bestLength;
  196. }
  197. static size_t
  198. ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
  199. const BYTE* const ip, const BYTE* const iend,
  200. size_t* offsetPtr,
  201. U32 const mls,
  202. const ZSTD_dictMode_e dictMode)
  203. {
  204. const ZSTD_compressionParameters* const cParams = &ms->cParams;
  205. U32* const hashTable = ms->hashTable;
  206. U32 const hashLog = cParams->hashLog;
  207. size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
  208. U32 matchIndex = hashTable[h];
  209. const BYTE* const base = ms->window.base;
  210. U32 const curr = (U32)(ip-base);
  211. U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
  212. U32* const bt = ms->chainTable;
  213. U32 const btLog = cParams->chainLog - 1;
  214. U32 const btMask = (1 << btLog) - 1;
  215. U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
  216. U32 const unsortLimit = MAX(btLow, windowLow);
  217. U32* nextCandidate = bt + 2*(matchIndex&btMask);
  218. U32* unsortedMark = bt + 2*(matchIndex&btMask) + 1;
  219. U32 nbCompares = 1U << cParams->searchLog;
  220. U32 nbCandidates = nbCompares;
  221. U32 previousCandidate = 0;
  222. DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", curr);
  223. assert(ip <= iend-8); /* required for h calculation */
  224. assert(dictMode != ZSTD_dedicatedDictSearch);
  225. /* reach end of unsorted candidates list */
  226. while ( (matchIndex > unsortLimit)
  227. && (*unsortedMark == ZSTD_DUBT_UNSORTED_MARK)
  228. && (nbCandidates > 1) ) {
  229. DEBUGLOG(8, "ZSTD_DUBT_findBestMatch: candidate %u is unsorted",
  230. matchIndex);
  231. *unsortedMark = previousCandidate; /* the unsortedMark becomes a reversed chain, to move up back to original position */
  232. previousCandidate = matchIndex;
  233. matchIndex = *nextCandidate;
  234. nextCandidate = bt + 2*(matchIndex&btMask);
  235. unsortedMark = bt + 2*(matchIndex&btMask) + 1;
  236. nbCandidates --;
  237. }
  238. /* nullify last candidate if it's still unsorted
  239. * simplification, detrimental to compression ratio, beneficial for speed */
  240. if ( (matchIndex > unsortLimit)
  241. && (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) {
  242. DEBUGLOG(7, "ZSTD_DUBT_findBestMatch: nullify last unsorted candidate %u",
  243. matchIndex);
  244. *nextCandidate = *unsortedMark = 0;
  245. }
  246. /* batch sort stacked candidates */
  247. matchIndex = previousCandidate;
  248. while (matchIndex) { /* will end on matchIndex == 0 */
  249. U32* const nextCandidateIdxPtr = bt + 2*(matchIndex&btMask) + 1;
  250. U32 const nextCandidateIdx = *nextCandidateIdxPtr;
  251. ZSTD_insertDUBT1(ms, matchIndex, iend,
  252. nbCandidates, unsortLimit, dictMode);
  253. matchIndex = nextCandidateIdx;
  254. nbCandidates++;
  255. }
  256. /* find longest match */
  257. { size_t commonLengthSmaller = 0, commonLengthLarger = 0;
  258. const BYTE* const dictBase = ms->window.dictBase;
  259. const U32 dictLimit = ms->window.dictLimit;
  260. const BYTE* const dictEnd = dictBase + dictLimit;
  261. const BYTE* const prefixStart = base + dictLimit;
  262. U32* smallerPtr = bt + 2*(curr&btMask);
  263. U32* largerPtr = bt + 2*(curr&btMask) + 1;
  264. U32 matchEndIdx = curr + 8 + 1;
  265. U32 dummy32; /* to be nullified at the end */
  266. size_t bestLength = 0;
  267. matchIndex = hashTable[h];
  268. hashTable[h] = curr; /* Update Hash Table */
  269. for (; nbCompares && (matchIndex > windowLow); --nbCompares) {
  270. U32* const nextPtr = bt + 2*(matchIndex & btMask);
  271. size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
  272. const BYTE* match;
  273. if ((dictMode != ZSTD_extDict) || (matchIndex+matchLength >= dictLimit)) {
  274. match = base + matchIndex;
  275. matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
  276. } else {
  277. match = dictBase + matchIndex;
  278. matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
  279. if (matchIndex+matchLength >= dictLimit)
  280. match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
  281. }
  282. if (matchLength > bestLength) {
  283. if (matchLength > matchEndIdx - matchIndex)
  284. matchEndIdx = matchIndex + (U32)matchLength;
  285. if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
  286. bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex;
  287. if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
  288. if (dictMode == ZSTD_dictMatchState) {
  289. nbCompares = 0; /* in addition to avoiding checking any
  290. * further in this loop, make sure we
  291. * skip checking in the dictionary. */
  292. }
  293. break; /* drop, to guarantee consistency (miss a little bit of compression) */
  294. }
  295. }
  296. if (match[matchLength] < ip[matchLength]) {
  297. /* match is smaller than current */
  298. *smallerPtr = matchIndex; /* update smaller idx */
  299. commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
  300. if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
  301. smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
  302. matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
  303. } else {
  304. /* match is larger than current */
  305. *largerPtr = matchIndex;
  306. commonLengthLarger = matchLength;
  307. if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
  308. largerPtr = nextPtr;
  309. matchIndex = nextPtr[0];
  310. } }
  311. *smallerPtr = *largerPtr = 0;
  312. assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
  313. if (dictMode == ZSTD_dictMatchState && nbCompares) {
  314. bestLength = ZSTD_DUBT_findBetterDictMatch(
  315. ms, ip, iend,
  316. offsetPtr, bestLength, nbCompares,
  317. mls, dictMode);
  318. }
  319. assert(matchEndIdx > curr+8); /* ensure nextToUpdate is increased */
  320. ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
  321. if (bestLength >= MINMATCH) {
  322. U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
  323. DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
  324. curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
  325. }
  326. return bestLength;
  327. }
  328. }
  329. /* ZSTD_BtFindBestMatch() : Tree updater, providing best match */
  330. FORCE_INLINE_TEMPLATE size_t
  331. ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms,
  332. const BYTE* const ip, const BYTE* const iLimit,
  333. size_t* offsetPtr,
  334. const U32 mls /* template */,
  335. const ZSTD_dictMode_e dictMode)
  336. {
  337. DEBUGLOG(7, "ZSTD_BtFindBestMatch");
  338. if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
  339. ZSTD_updateDUBT(ms, ip, iLimit, mls);
  340. return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offsetPtr, mls, dictMode);
  341. }
  342. static size_t
  343. ZSTD_BtFindBestMatch_selectMLS ( ZSTD_matchState_t* ms,
  344. const BYTE* ip, const BYTE* const iLimit,
  345. size_t* offsetPtr)
  346. {
  347. switch(ms->cParams.minMatch)
  348. {
  349. default : /* includes case 3 */
  350. case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
  351. case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
  352. case 7 :
  353. case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
  354. }
  355. }
  356. static size_t ZSTD_BtFindBestMatch_dictMatchState_selectMLS (
  357. ZSTD_matchState_t* ms,
  358. const BYTE* ip, const BYTE* const iLimit,
  359. size_t* offsetPtr)
  360. {
  361. switch(ms->cParams.minMatch)
  362. {
  363. default : /* includes case 3 */
  364. case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
  365. case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
  366. case 7 :
  367. case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
  368. }
  369. }
  370. static size_t ZSTD_BtFindBestMatch_extDict_selectMLS (
  371. ZSTD_matchState_t* ms,
  372. const BYTE* ip, const BYTE* const iLimit,
  373. size_t* offsetPtr)
  374. {
  375. switch(ms->cParams.minMatch)
  376. {
  377. default : /* includes case 3 */
  378. case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
  379. case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
  380. case 7 :
  381. case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
  382. }
  383. }
  384. /* *********************************
  385. * Hash Chain
  386. ***********************************/
  387. #define NEXT_IN_CHAIN(d, mask) chainTable[(d) & (mask)]
  388. /* Update chains up to ip (excluded)
  389. Assumption : always within prefix (i.e. not within extDict) */
  390. FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
  391. ZSTD_matchState_t* ms,
  392. const ZSTD_compressionParameters* const cParams,
  393. const BYTE* ip, U32 const mls)
  394. {
  395. U32* const hashTable = ms->hashTable;
  396. const U32 hashLog = cParams->hashLog;
  397. U32* const chainTable = ms->chainTable;
  398. const U32 chainMask = (1 << cParams->chainLog) - 1;
  399. const BYTE* const base = ms->window.base;
  400. const U32 target = (U32)(ip - base);
  401. U32 idx = ms->nextToUpdate;
  402. while(idx < target) { /* catch up */
  403. size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls);
  404. NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
  405. hashTable[h] = idx;
  406. idx++;
  407. }
  408. ms->nextToUpdate = target;
  409. return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
  410. }
  411. U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
  412. const ZSTD_compressionParameters* const cParams = &ms->cParams;
  413. return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
  414. }
  415. void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip)
  416. {
  417. const BYTE* const base = ms->window.base;
  418. U32 const target = (U32)(ip - base);
  419. U32* const hashTable = ms->hashTable;
  420. U32* const chainTable = ms->chainTable;
  421. U32 const chainSize = 1 << ms->cParams.chainLog;
  422. U32 idx = ms->nextToUpdate;
  423. U32 const minChain = chainSize < target ? target - chainSize : idx;
  424. U32 const bucketSize = 1 << ZSTD_LAZY_DDSS_BUCKET_LOG;
  425. U32 const cacheSize = bucketSize - 1;
  426. U32 const chainAttempts = (1 << ms->cParams.searchLog) - cacheSize;
  427. U32 const chainLimit = chainAttempts > 255 ? 255 : chainAttempts;
  428. /* We know the hashtable is oversized by a factor of `bucketSize`.
  429. * We are going to temporarily pretend `bucketSize == 1`, keeping only a
  430. * single entry. We will use the rest of the space to construct a temporary
  431. * chaintable.
  432. */
  433. U32 const hashLog = ms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG;
  434. U32* const tmpHashTable = hashTable;
  435. U32* const tmpChainTable = hashTable + ((size_t)1 << hashLog);
  436. U32 const tmpChainSize = ((1 << ZSTD_LAZY_DDSS_BUCKET_LOG) - 1) << hashLog;
  437. U32 const tmpMinChain = tmpChainSize < target ? target - tmpChainSize : idx;
  438. U32 hashIdx;
  439. assert(ms->cParams.chainLog <= 24);
  440. assert(ms->cParams.hashLog >= ms->cParams.chainLog);
  441. assert(idx != 0);
  442. assert(tmpMinChain <= minChain);
  443. /* fill conventional hash table and conventional chain table */
  444. for ( ; idx < target; idx++) {
  445. U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch);
  446. if (idx >= tmpMinChain) {
  447. tmpChainTable[idx - tmpMinChain] = hashTable[h];
  448. }
  449. tmpHashTable[h] = idx;
  450. }
  451. /* sort chains into ddss chain table */
  452. {
  453. U32 chainPos = 0;
  454. for (hashIdx = 0; hashIdx < (1U << hashLog); hashIdx++) {
  455. U32 count;
  456. U32 countBeyondMinChain = 0;
  457. U32 i = tmpHashTable[hashIdx];
  458. for (count = 0; i >= tmpMinChain && count < cacheSize; count++) {
  459. /* skip through the chain to the first position that won't be
  460. * in the hash cache bucket */
  461. if (i < minChain) {
  462. countBeyondMinChain++;
  463. }
  464. i = tmpChainTable[i - tmpMinChain];
  465. }
  466. if (count == cacheSize) {
  467. for (count = 0; count < chainLimit;) {
  468. if (i < minChain) {
  469. if (!i || countBeyondMinChain++ > cacheSize) {
  470. /* only allow pulling `cacheSize` number of entries
  471. * into the cache or chainTable beyond `minChain`,
  472. * to replace the entries pulled out of the
  473. * chainTable into the cache. This lets us reach
  474. * back further without increasing the total number
  475. * of entries in the chainTable, guaranteeing the
  476. * DDSS chain table will fit into the space
  477. * allocated for the regular one. */
  478. break;
  479. }
  480. }
  481. chainTable[chainPos++] = i;
  482. count++;
  483. if (i < tmpMinChain) {
  484. break;
  485. }
  486. i = tmpChainTable[i - tmpMinChain];
  487. }
  488. } else {
  489. count = 0;
  490. }
  491. if (count) {
  492. tmpHashTable[hashIdx] = ((chainPos - count) << 8) + count;
  493. } else {
  494. tmpHashTable[hashIdx] = 0;
  495. }
  496. }
  497. assert(chainPos <= chainSize); /* I believe this is guaranteed... */
  498. }
  499. /* move chain pointers into the last entry of each hash bucket */
  500. for (hashIdx = (1 << hashLog); hashIdx; ) {
  501. U32 const bucketIdx = --hashIdx << ZSTD_LAZY_DDSS_BUCKET_LOG;
  502. U32 const chainPackedPointer = tmpHashTable[hashIdx];
  503. U32 i;
  504. for (i = 0; i < cacheSize; i++) {
  505. hashTable[bucketIdx + i] = 0;
  506. }
  507. hashTable[bucketIdx + bucketSize - 1] = chainPackedPointer;
  508. }
  509. /* fill the buckets of the hash table */
  510. for (idx = ms->nextToUpdate; idx < target; idx++) {
  511. U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch)
  512. << ZSTD_LAZY_DDSS_BUCKET_LOG;
  513. U32 i;
  514. /* Shift hash cache down 1. */
  515. for (i = cacheSize - 1; i; i--)
  516. hashTable[h + i] = hashTable[h + i - 1];
  517. hashTable[h] = idx;
  518. }
  519. ms->nextToUpdate = target;
  520. }
  521. /* inlining is important to hardwire a hot branch (template emulation) */
  522. FORCE_INLINE_TEMPLATE
  523. size_t ZSTD_HcFindBestMatch_generic (
  524. ZSTD_matchState_t* ms,
  525. const BYTE* const ip, const BYTE* const iLimit,
  526. size_t* offsetPtr,
  527. const U32 mls, const ZSTD_dictMode_e dictMode)
  528. {
  529. const ZSTD_compressionParameters* const cParams = &ms->cParams;
  530. U32* const chainTable = ms->chainTable;
  531. const U32 chainSize = (1 << cParams->chainLog);
  532. const U32 chainMask = chainSize-1;
  533. const BYTE* const base = ms->window.base;
  534. const BYTE* const dictBase = ms->window.dictBase;
  535. const U32 dictLimit = ms->window.dictLimit;
  536. const BYTE* const prefixStart = base + dictLimit;
  537. const BYTE* const dictEnd = dictBase + dictLimit;
  538. const U32 curr = (U32)(ip-base);
  539. const U32 maxDistance = 1U << cParams->windowLog;
  540. const U32 lowestValid = ms->window.lowLimit;
  541. const U32 withinMaxDistance = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
  542. const U32 isDictionary = (ms->loadedDictEnd != 0);
  543. const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance;
  544. const U32 minChain = curr > chainSize ? curr - chainSize : 0;
  545. U32 nbAttempts = 1U << cParams->searchLog;
  546. size_t ml=4-1;
  547. const ZSTD_matchState_t* const dms = ms->dictMatchState;
  548. const U32 ddsHashLog = dictMode == ZSTD_dedicatedDictSearch
  549. ? dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG : 0;
  550. const size_t ddsIdx = dictMode == ZSTD_dedicatedDictSearch
  551. ? ZSTD_hashPtr(ip, ddsHashLog, mls) << ZSTD_LAZY_DDSS_BUCKET_LOG : 0;
  552. U32 matchIndex;
  553. if (dictMode == ZSTD_dedicatedDictSearch) {
  554. const U32* entry = &dms->hashTable[ddsIdx];
  555. PREFETCH_L1(entry);
  556. }
  557. /* HC4 match finder */
  558. matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
  559. for ( ; (matchIndex>=lowLimit) & (nbAttempts>0) ; nbAttempts--) {
  560. size_t currentMl=0;
  561. if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
  562. const BYTE* const match = base + matchIndex;
  563. assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */
  564. if (match[ml] == ip[ml]) /* potentially better */
  565. currentMl = ZSTD_count(ip, match, iLimit);
  566. } else {
  567. const BYTE* const match = dictBase + matchIndex;
  568. assert(match+4 <= dictEnd);
  569. if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */
  570. currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4;
  571. }
  572. /* save best solution */
  573. if (currentMl > ml) {
  574. ml = currentMl;
  575. *offsetPtr = curr - matchIndex + ZSTD_REP_MOVE;
  576. if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
  577. }
  578. if (matchIndex <= minChain) break;
  579. matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
  580. }
  581. assert(nbAttempts <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
  582. if (dictMode == ZSTD_dedicatedDictSearch) {
  583. const U32 ddsLowestIndex = dms->window.dictLimit;
  584. const BYTE* const ddsBase = dms->window.base;
  585. const BYTE* const ddsEnd = dms->window.nextSrc;
  586. const U32 ddsSize = (U32)(ddsEnd - ddsBase);
  587. const U32 ddsIndexDelta = dictLimit - ddsSize;
  588. const U32 bucketSize = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG);
  589. const U32 bucketLimit = nbAttempts < bucketSize - 1 ? nbAttempts : bucketSize - 1;
  590. U32 ddsAttempt;
  591. for (ddsAttempt = 0; ddsAttempt < bucketSize - 1; ddsAttempt++) {
  592. PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + ddsAttempt]);
  593. }
  594. {
  595. U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
  596. U32 const chainIndex = chainPackedPointer >> 8;
  597. PREFETCH_L1(&dms->chainTable[chainIndex]);
  598. }
  599. for (ddsAttempt = 0; ddsAttempt < bucketLimit; ddsAttempt++) {
  600. size_t currentMl=0;
  601. const BYTE* match;
  602. matchIndex = dms->hashTable[ddsIdx + ddsAttempt];
  603. match = ddsBase + matchIndex;
  604. if (!matchIndex) {
  605. return ml;
  606. }
  607. /* guaranteed by table construction */
  608. (void)ddsLowestIndex;
  609. assert(matchIndex >= ddsLowestIndex);
  610. assert(match+4 <= ddsEnd);
  611. if (MEM_read32(match) == MEM_read32(ip)) {
  612. /* assumption : matchIndex <= dictLimit-4 (by table construction) */
  613. currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
  614. }
  615. /* save best solution */
  616. if (currentMl > ml) {
  617. ml = currentMl;
  618. *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
  619. if (ip+currentMl == iLimit) {
  620. /* best possible, avoids read overflow on next attempt */
  621. return ml;
  622. }
  623. }
  624. }
  625. {
  626. U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
  627. U32 chainIndex = chainPackedPointer >> 8;
  628. U32 const chainLength = chainPackedPointer & 0xFF;
  629. U32 const chainAttempts = nbAttempts - ddsAttempt;
  630. U32 const chainLimit = chainAttempts > chainLength ? chainLength : chainAttempts;
  631. U32 chainAttempt;
  632. for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++) {
  633. PREFETCH_L1(ddsBase + dms->chainTable[chainIndex + chainAttempt]);
  634. }
  635. for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++, chainIndex++) {
  636. size_t currentMl=0;
  637. const BYTE* match;
  638. matchIndex = dms->chainTable[chainIndex];
  639. match = ddsBase + matchIndex;
  640. /* guaranteed by table construction */
  641. assert(matchIndex >= ddsLowestIndex);
  642. assert(match+4 <= ddsEnd);
  643. if (MEM_read32(match) == MEM_read32(ip)) {
  644. /* assumption : matchIndex <= dictLimit-4 (by table construction) */
  645. currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
  646. }
  647. /* save best solution */
  648. if (currentMl > ml) {
  649. ml = currentMl;
  650. *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
  651. if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
  652. }
  653. }
  654. }
  655. } else if (dictMode == ZSTD_dictMatchState) {
  656. const U32* const dmsChainTable = dms->chainTable;
  657. const U32 dmsChainSize = (1 << dms->cParams.chainLog);
  658. const U32 dmsChainMask = dmsChainSize - 1;
  659. const U32 dmsLowestIndex = dms->window.dictLimit;
  660. const BYTE* const dmsBase = dms->window.base;
  661. const BYTE* const dmsEnd = dms->window.nextSrc;
  662. const U32 dmsSize = (U32)(dmsEnd - dmsBase);
  663. const U32 dmsIndexDelta = dictLimit - dmsSize;
  664. const U32 dmsMinChain = dmsSize > dmsChainSize ? dmsSize - dmsChainSize : 0;
  665. matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)];
  666. for ( ; (matchIndex>=dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) {
  667. size_t currentMl=0;
  668. const BYTE* const match = dmsBase + matchIndex;
  669. assert(match+4 <= dmsEnd);
  670. if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */
  671. currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4;
  672. /* save best solution */
  673. if (currentMl > ml) {
  674. ml = currentMl;
  675. *offsetPtr = curr - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE;
  676. if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
  677. }
  678. if (matchIndex <= dmsMinChain) break;
  679. matchIndex = dmsChainTable[matchIndex & dmsChainMask];
  680. }
  681. }
  682. return ml;
  683. }
  684. FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS (
  685. ZSTD_matchState_t* ms,
  686. const BYTE* ip, const BYTE* const iLimit,
  687. size_t* offsetPtr)
  688. {
  689. switch(ms->cParams.minMatch)
  690. {
  691. default : /* includes case 3 */
  692. case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
  693. case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
  694. case 7 :
  695. case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
  696. }
  697. }
  698. static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS (
  699. ZSTD_matchState_t* ms,
  700. const BYTE* ip, const BYTE* const iLimit,
  701. size_t* offsetPtr)
  702. {
  703. switch(ms->cParams.minMatch)
  704. {
  705. default : /* includes case 3 */
  706. case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
  707. case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
  708. case 7 :
  709. case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
  710. }
  711. }
  712. static size_t ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS (
  713. ZSTD_matchState_t* ms,
  714. const BYTE* ip, const BYTE* const iLimit,
  715. size_t* offsetPtr)
  716. {
  717. switch(ms->cParams.minMatch)
  718. {
  719. default : /* includes case 3 */
  720. case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dedicatedDictSearch);
  721. case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dedicatedDictSearch);
  722. case 7 :
  723. case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dedicatedDictSearch);
  724. }
  725. }
  726. FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
  727. ZSTD_matchState_t* ms,
  728. const BYTE* ip, const BYTE* const iLimit,
  729. size_t* offsetPtr)
  730. {
  731. switch(ms->cParams.minMatch)
  732. {
  733. default : /* includes case 3 */
  734. case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
  735. case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
  736. case 7 :
  737. case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
  738. }
  739. }
  740. /* *******************************
  741. * Common parser - lazy strategy
  742. *********************************/
  743. typedef enum { search_hashChain, search_binaryTree } searchMethod_e;
  744. FORCE_INLINE_TEMPLATE size_t
  745. ZSTD_compressBlock_lazy_generic(
  746. ZSTD_matchState_t* ms, seqStore_t* seqStore,
  747. U32 rep[ZSTD_REP_NUM],
  748. const void* src, size_t srcSize,
  749. const searchMethod_e searchMethod, const U32 depth,
  750. ZSTD_dictMode_e const dictMode)
  751. {
  752. const BYTE* const istart = (const BYTE*)src;
  753. const BYTE* ip = istart;
  754. const BYTE* anchor = istart;
  755. const BYTE* const iend = istart + srcSize;
  756. const BYTE* const ilimit = iend - 8;
  757. const BYTE* const base = ms->window.base;
  758. const U32 prefixLowestIndex = ms->window.dictLimit;
  759. const BYTE* const prefixLowest = base + prefixLowestIndex;
  760. typedef size_t (*searchMax_f)(
  761. ZSTD_matchState_t* ms,
  762. const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
  763. /*
  764. * This table is indexed first by the four ZSTD_dictMode_e values, and then
  765. * by the two searchMethod_e values. NULLs are placed for configurations
  766. * that should never occur (extDict modes go to the other implementation
  767. * below and there is no DDSS for binary tree search yet).
  768. */
  769. const searchMax_f searchFuncs[4][2] = {
  770. {
  771. ZSTD_HcFindBestMatch_selectMLS,
  772. ZSTD_BtFindBestMatch_selectMLS
  773. },
  774. {
  775. NULL,
  776. NULL
  777. },
  778. {
  779. ZSTD_HcFindBestMatch_dictMatchState_selectMLS,
  780. ZSTD_BtFindBestMatch_dictMatchState_selectMLS
  781. },
  782. {
  783. ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS,
  784. NULL
  785. }
  786. };
  787. searchMax_f const searchMax = searchFuncs[dictMode][searchMethod == search_binaryTree];
  788. U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
  789. const int isDMS = dictMode == ZSTD_dictMatchState;
  790. const int isDDS = dictMode == ZSTD_dedicatedDictSearch;
  791. const int isDxS = isDMS || isDDS;
  792. const ZSTD_matchState_t* const dms = ms->dictMatchState;
  793. const U32 dictLowestIndex = isDxS ? dms->window.dictLimit : 0;
  794. const BYTE* const dictBase = isDxS ? dms->window.base : NULL;
  795. const BYTE* const dictLowest = isDxS ? dictBase + dictLowestIndex : NULL;
  796. const BYTE* const dictEnd = isDxS ? dms->window.nextSrc : NULL;
  797. const U32 dictIndexDelta = isDxS ?
  798. prefixLowestIndex - (U32)(dictEnd - dictBase) :
  799. 0;
  800. const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest));
  801. assert(searchMax != NULL);
  802. DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode);
  803. /* init */
  804. ip += (dictAndPrefixLength == 0);
  805. if (dictMode == ZSTD_noDict) {
  806. U32 const curr = (U32)(ip - base);
  807. U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, ms->cParams.windowLog);
  808. U32 const maxRep = curr - windowLow;
  809. if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
  810. if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
  811. }
  812. if (isDxS) {
  813. /* dictMatchState repCode checks don't currently handle repCode == 0
  814. * disabling. */
  815. assert(offset_1 <= dictAndPrefixLength);
  816. assert(offset_2 <= dictAndPrefixLength);
  817. }
  818. /* Match Loop */
  819. #if defined(__x86_64__)
  820. /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
  821. * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
  822. */
  823. __asm__(".p2align 5");
  824. #endif
  825. while (ip < ilimit) {
  826. size_t matchLength=0;
  827. size_t offset=0;
  828. const BYTE* start=ip+1;
  829. /* check repCode */
  830. if (isDxS) {
  831. const U32 repIndex = (U32)(ip - base) + 1 - offset_1;
  832. const BYTE* repMatch = ((dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch)
  833. && repIndex < prefixLowestIndex) ?
  834. dictBase + (repIndex - dictIndexDelta) :
  835. base + repIndex;
  836. if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
  837. && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
  838. const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
  839. matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
  840. if (depth==0) goto _storeSequence;
  841. }
  842. }
  843. if ( dictMode == ZSTD_noDict
  844. && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
  845. matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
  846. if (depth==0) goto _storeSequence;
  847. }
  848. /* first search (depth 0) */
  849. { size_t offsetFound = 999999999;
  850. size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
  851. if (ml2 > matchLength)
  852. matchLength = ml2, start = ip, offset=offsetFound;
  853. }
  854. if (matchLength < 4) {
  855. ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */
  856. continue;
  857. }
  858. /* let's try to find a better solution */
  859. if (depth>=1)
  860. while (ip<ilimit) {
  861. ip ++;
  862. if ( (dictMode == ZSTD_noDict)
  863. && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
  864. size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
  865. int const gain2 = (int)(mlRep * 3);
  866. int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
  867. if ((mlRep >= 4) && (gain2 > gain1))
  868. matchLength = mlRep, offset = 0, start = ip;
  869. }
  870. if (isDxS) {
  871. const U32 repIndex = (U32)(ip - base) - offset_1;
  872. const BYTE* repMatch = repIndex < prefixLowestIndex ?
  873. dictBase + (repIndex - dictIndexDelta) :
  874. base + repIndex;
  875. if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
  876. && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
  877. const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
  878. size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
  879. int const gain2 = (int)(mlRep * 3);
  880. int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
  881. if ((mlRep >= 4) && (gain2 > gain1))
  882. matchLength = mlRep, offset = 0, start = ip;
  883. }
  884. }
  885. { size_t offset2=999999999;
  886. size_t const ml2 = searchMax(ms, ip, iend, &offset2);
  887. int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
  888. int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
  889. if ((ml2 >= 4) && (gain2 > gain1)) {
  890. matchLength = ml2, offset = offset2, start = ip;
  891. continue; /* search a better one */
  892. } }
  893. /* let's find an even better one */
  894. if ((depth==2) && (ip<ilimit)) {
  895. ip ++;
  896. if ( (dictMode == ZSTD_noDict)
  897. && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
  898. size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
  899. int const gain2 = (int)(mlRep * 4);
  900. int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
  901. if ((mlRep >= 4) && (gain2 > gain1))
  902. matchLength = mlRep, offset = 0, start = ip;
  903. }
  904. if (isDxS) {
  905. const U32 repIndex = (U32)(ip - base) - offset_1;
  906. const BYTE* repMatch = repIndex < prefixLowestIndex ?
  907. dictBase + (repIndex - dictIndexDelta) :
  908. base + repIndex;
  909. if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
  910. && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
  911. const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
  912. size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
  913. int const gain2 = (int)(mlRep * 4);
  914. int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
  915. if ((mlRep >= 4) && (gain2 > gain1))
  916. matchLength = mlRep, offset = 0, start = ip;
  917. }
  918. }
  919. { size_t offset2=999999999;
  920. size_t const ml2 = searchMax(ms, ip, iend, &offset2);
  921. int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
  922. int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
  923. if ((ml2 >= 4) && (gain2 > gain1)) {
  924. matchLength = ml2, offset = offset2, start = ip;
  925. continue;
  926. } } }
  927. break; /* nothing found : store previous solution */
  928. }
  929. /* NOTE:
  930. * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior.
  931. * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which
  932. * overflows the pointer, which is undefined behavior.
  933. */
  934. /* catch up */
  935. if (offset) {
  936. if (dictMode == ZSTD_noDict) {
  937. while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > prefixLowest))
  938. && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) ) /* only search for offset within prefix */
  939. { start--; matchLength++; }
  940. }
  941. if (isDxS) {
  942. U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
  943. const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex;
  944. const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest;
  945. while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */
  946. }
  947. offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
  948. }
  949. /* store sequence */
  950. _storeSequence:
  951. { size_t const litLength = start - anchor;
  952. ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
  953. anchor = ip = start + matchLength;
  954. }
  955. /* check immediate repcode */
  956. if (isDxS) {
  957. while (ip <= ilimit) {
  958. U32 const current2 = (U32)(ip-base);
  959. U32 const repIndex = current2 - offset_2;
  960. const BYTE* repMatch = repIndex < prefixLowestIndex ?
  961. dictBase - dictIndexDelta + repIndex :
  962. base + repIndex;
  963. if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */)
  964. && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
  965. const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
  966. matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
  967. offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset_2 <=> offset_1 */
  968. ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
  969. ip += matchLength;
  970. anchor = ip;
  971. continue;
  972. }
  973. break;
  974. }
  975. }
  976. if (dictMode == ZSTD_noDict) {
  977. while ( ((ip <= ilimit) & (offset_2>0))
  978. && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) {
  979. /* store sequence */
  980. matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
  981. offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
  982. ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
  983. ip += matchLength;
  984. anchor = ip;
  985. continue; /* faster when present ... (?) */
  986. } } }
  987. /* Save reps for next block */
  988. rep[0] = offset_1 ? offset_1 : savedOffset;
  989. rep[1] = offset_2 ? offset_2 : savedOffset;
  990. /* Return the last literals size */
  991. return (size_t)(iend - anchor);
  992. }
  993. size_t ZSTD_compressBlock_btlazy2(
  994. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  995. void const* src, size_t srcSize)
  996. {
  997. return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);
  998. }
  999. size_t ZSTD_compressBlock_lazy2(
  1000. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  1001. void const* src, size_t srcSize)
  1002. {
  1003. return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);
  1004. }
  1005. size_t ZSTD_compressBlock_lazy(
  1006. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  1007. void const* src, size_t srcSize)
  1008. {
  1009. return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict);
  1010. }
  1011. size_t ZSTD_compressBlock_greedy(
  1012. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  1013. void const* src, size_t srcSize)
  1014. {
  1015. return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict);
  1016. }
  1017. size_t ZSTD_compressBlock_btlazy2_dictMatchState(
  1018. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  1019. void const* src, size_t srcSize)
  1020. {
  1021. return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);
  1022. }
  1023. size_t ZSTD_compressBlock_lazy2_dictMatchState(
  1024. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  1025. void const* src, size_t srcSize)
  1026. {
  1027. return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState);
  1028. }
  1029. size_t ZSTD_compressBlock_lazy_dictMatchState(
  1030. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  1031. void const* src, size_t srcSize)
  1032. {
  1033. return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);
  1034. }
  1035. size_t ZSTD_compressBlock_greedy_dictMatchState(
  1036. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  1037. void const* src, size_t srcSize)
  1038. {
  1039. return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState);
  1040. }
  1041. size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
  1042. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  1043. void const* src, size_t srcSize)
  1044. {
  1045. return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch);
  1046. }
  1047. size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
  1048. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  1049. void const* src, size_t srcSize)
  1050. {
  1051. return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch);
  1052. }
  1053. size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
  1054. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  1055. void const* src, size_t srcSize)
  1056. {
  1057. return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch);
  1058. }
  1059. FORCE_INLINE_TEMPLATE
  1060. size_t ZSTD_compressBlock_lazy_extDict_generic(
  1061. ZSTD_matchState_t* ms, seqStore_t* seqStore,
  1062. U32 rep[ZSTD_REP_NUM],
  1063. const void* src, size_t srcSize,
  1064. const searchMethod_e searchMethod, const U32 depth)
  1065. {
  1066. const BYTE* const istart = (const BYTE*)src;
  1067. const BYTE* ip = istart;
  1068. const BYTE* anchor = istart;
  1069. const BYTE* const iend = istart + srcSize;
  1070. const BYTE* const ilimit = iend - 8;
  1071. const BYTE* const base = ms->window.base;
  1072. const U32 dictLimit = ms->window.dictLimit;
  1073. const BYTE* const prefixStart = base + dictLimit;
  1074. const BYTE* const dictBase = ms->window.dictBase;
  1075. const BYTE* const dictEnd = dictBase + dictLimit;
  1076. const BYTE* const dictStart = dictBase + ms->window.lowLimit;
  1077. const U32 windowLog = ms->cParams.windowLog;
  1078. typedef size_t (*searchMax_f)(
  1079. ZSTD_matchState_t* ms,
  1080. const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
  1081. searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS;
  1082. U32 offset_1 = rep[0], offset_2 = rep[1];
  1083. DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic");
  1084. /* init */
  1085. ip += (ip == prefixStart);
  1086. /* Match Loop */
  1087. #if defined(__x86_64__)
  1088. /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
  1089. * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
  1090. */
  1091. __asm__(".p2align 5");
  1092. #endif
  1093. while (ip < ilimit) {
  1094. size_t matchLength=0;
  1095. size_t offset=0;
  1096. const BYTE* start=ip+1;
  1097. U32 curr = (U32)(ip-base);
  1098. /* check repCode */
  1099. { const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr+1, windowLog);
  1100. const U32 repIndex = (U32)(curr+1 - offset_1);
  1101. const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
  1102. const BYTE* const repMatch = repBase + repIndex;
  1103. if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
  1104. if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
  1105. /* repcode detected we should take it */
  1106. const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
  1107. matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4;
  1108. if (depth==0) goto _storeSequence;
  1109. } }
  1110. /* first search (depth 0) */
  1111. { size_t offsetFound = 999999999;
  1112. size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
  1113. if (ml2 > matchLength)
  1114. matchLength = ml2, start = ip, offset=offsetFound;
  1115. }
  1116. if (matchLength < 4) {
  1117. ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */
  1118. continue;
  1119. }
  1120. /* let's try to find a better solution */
  1121. if (depth>=1)
  1122. while (ip<ilimit) {
  1123. ip ++;
  1124. curr++;
  1125. /* check repCode */
  1126. if (offset) {
  1127. const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
  1128. const U32 repIndex = (U32)(curr - offset_1);
  1129. const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
  1130. const BYTE* const repMatch = repBase + repIndex;
  1131. if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
  1132. if (MEM_read32(ip) == MEM_read32(repMatch)) {
  1133. /* repcode detected */
  1134. const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
  1135. size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
  1136. int const gain2 = (int)(repLength * 3);
  1137. int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
  1138. if ((repLength >= 4) && (gain2 > gain1))
  1139. matchLength = repLength, offset = 0, start = ip;
  1140. } }
  1141. /* search match, depth 1 */
  1142. { size_t offset2=999999999;
  1143. size_t const ml2 = searchMax(ms, ip, iend, &offset2);
  1144. int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
  1145. int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
  1146. if ((ml2 >= 4) && (gain2 > gain1)) {
  1147. matchLength = ml2, offset = offset2, start = ip;
  1148. continue; /* search a better one */
  1149. } }
  1150. /* let's find an even better one */
  1151. if ((depth==2) && (ip<ilimit)) {
  1152. ip ++;
  1153. curr++;
  1154. /* check repCode */
  1155. if (offset) {
  1156. const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
  1157. const U32 repIndex = (U32)(curr - offset_1);
  1158. const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
  1159. const BYTE* const repMatch = repBase + repIndex;
  1160. if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
  1161. if (MEM_read32(ip) == MEM_read32(repMatch)) {
  1162. /* repcode detected */
  1163. const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
  1164. size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
  1165. int const gain2 = (int)(repLength * 4);
  1166. int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
  1167. if ((repLength >= 4) && (gain2 > gain1))
  1168. matchLength = repLength, offset = 0, start = ip;
  1169. } }
  1170. /* search match, depth 2 */
  1171. { size_t offset2=999999999;
  1172. size_t const ml2 = searchMax(ms, ip, iend, &offset2);
  1173. int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
  1174. int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
  1175. if ((ml2 >= 4) && (gain2 > gain1)) {
  1176. matchLength = ml2, offset = offset2, start = ip;
  1177. continue;
  1178. } } }
  1179. break; /* nothing found : store previous solution */
  1180. }
  1181. /* catch up */
  1182. if (offset) {
  1183. U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
  1184. const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
  1185. const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
  1186. while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */
  1187. offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
  1188. }
  1189. /* store sequence */
  1190. _storeSequence:
  1191. { size_t const litLength = start - anchor;
  1192. ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
  1193. anchor = ip = start + matchLength;
  1194. }
  1195. /* check immediate repcode */
  1196. while (ip <= ilimit) {
  1197. const U32 repCurrent = (U32)(ip-base);
  1198. const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent, windowLog);
  1199. const U32 repIndex = repCurrent - offset_2;
  1200. const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
  1201. const BYTE* const repMatch = repBase + repIndex;
  1202. if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
  1203. if (MEM_read32(ip) == MEM_read32(repMatch)) {
  1204. /* repcode detected we should take it */
  1205. const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
  1206. matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
  1207. offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */
  1208. ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
  1209. ip += matchLength;
  1210. anchor = ip;
  1211. continue; /* faster when present ... (?) */
  1212. }
  1213. break;
  1214. } }
  1215. /* Save reps for next block */
  1216. rep[0] = offset_1;
  1217. rep[1] = offset_2;
  1218. /* Return the last literals size */
  1219. return (size_t)(iend - anchor);
  1220. }
  1221. size_t ZSTD_compressBlock_greedy_extDict(
  1222. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  1223. void const* src, size_t srcSize)
  1224. {
  1225. return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0);
  1226. }
  1227. size_t ZSTD_compressBlock_lazy_extDict(
  1228. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  1229. void const* src, size_t srcSize)
  1230. {
  1231. return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1);
  1232. }
  1233. size_t ZSTD_compressBlock_lazy2_extDict(
  1234. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  1235. void const* src, size_t srcSize)
  1236. {
  1237. return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2);
  1238. }
  1239. size_t ZSTD_compressBlock_btlazy2_extDict(
  1240. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  1241. void const* src, size_t srcSize)
  1242. {
  1243. return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
  1244. }