zstd_lazy.c 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824
  1. /*
  2. * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  3. * All rights reserved.
  4. *
  5. * This source code is licensed under both the BSD-style license (found in the
  6. * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  7. * in the COPYING file in the root directory of this source tree).
  8. * You may select, at your option, one of the above-listed licenses.
  9. */
  10. #include "zstd_compress_internal.h"
  11. #include "zstd_lazy.h"
  12. /*-*************************************
  13. * Binary Tree search
  14. ***************************************/
  15. void ZSTD_updateDUBT(
  16. ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
  17. const BYTE* ip, const BYTE* iend,
  18. U32 mls)
  19. {
  20. U32* const hashTable = ms->hashTable;
  21. U32 const hashLog = cParams->hashLog;
  22. U32* const bt = ms->chainTable;
  23. U32 const btLog = cParams->chainLog - 1;
  24. U32 const btMask = (1 << btLog) - 1;
  25. const BYTE* const base = ms->window.base;
  26. U32 const target = (U32)(ip - base);
  27. U32 idx = ms->nextToUpdate;
  28. if (idx != target)
  29. DEBUGLOG(7, "ZSTD_updateDUBT, from %u to %u (dictLimit:%u)",
  30. idx, target, ms->window.dictLimit);
  31. assert(ip + 8 <= iend); /* condition for ZSTD_hashPtr */
  32. (void)iend;
  33. assert(idx >= ms->window.dictLimit); /* condition for valid base+idx */
  34. for ( ; idx < target ; idx++) {
  35. size_t const h = ZSTD_hashPtr(base + idx, hashLog, mls); /* assumption : ip + 8 <= iend */
  36. U32 const matchIndex = hashTable[h];
  37. U32* const nextCandidatePtr = bt + 2*(idx&btMask);
  38. U32* const sortMarkPtr = nextCandidatePtr + 1;
  39. DEBUGLOG(8, "ZSTD_updateDUBT: insert %u", idx);
  40. hashTable[h] = idx; /* Update Hash Table */
  41. *nextCandidatePtr = matchIndex; /* update BT like a chain */
  42. *sortMarkPtr = ZSTD_DUBT_UNSORTED_MARK;
  43. }
  44. ms->nextToUpdate = target;
  45. }
  46. /** ZSTD_insertDUBT1() :
  47. * sort one already inserted but unsorted position
  48. * assumption : current >= btlow == (current - btmask)
  49. * doesn't fail */
  50. static void ZSTD_insertDUBT1(
  51. ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
  52. U32 current, const BYTE* inputEnd,
  53. U32 nbCompares, U32 btLow, int extDict)
  54. {
  55. U32* const bt = ms->chainTable;
  56. U32 const btLog = cParams->chainLog - 1;
  57. U32 const btMask = (1 << btLog) - 1;
  58. size_t commonLengthSmaller=0, commonLengthLarger=0;
  59. const BYTE* const base = ms->window.base;
  60. const BYTE* const dictBase = ms->window.dictBase;
  61. const U32 dictLimit = ms->window.dictLimit;
  62. const BYTE* const ip = (current>=dictLimit) ? base + current : dictBase + current;
  63. const BYTE* const iend = (current>=dictLimit) ? inputEnd : dictBase + dictLimit;
  64. const BYTE* const dictEnd = dictBase + dictLimit;
  65. const BYTE* const prefixStart = base + dictLimit;
  66. const BYTE* match;
  67. U32* smallerPtr = bt + 2*(current&btMask);
  68. U32* largerPtr = smallerPtr + 1;
  69. U32 matchIndex = *smallerPtr;
  70. U32 dummy32; /* to be nullified at the end */
  71. U32 const windowLow = ms->window.lowLimit;
  72. DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
  73. current, dictLimit, windowLow);
  74. assert(current >= btLow);
  75. assert(ip < iend); /* condition for ZSTD_count */
  76. while (nbCompares-- && (matchIndex > windowLow)) {
  77. U32* const nextPtr = bt + 2*(matchIndex & btMask);
  78. size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
  79. assert(matchIndex < current);
  80. if ( (!extDict)
  81. || (matchIndex+matchLength >= dictLimit) /* both in current segment*/
  82. || (current < dictLimit) /* both in extDict */) {
  83. const BYTE* const mBase = !extDict || ((matchIndex+matchLength) >= dictLimit) ? base : dictBase;
  84. assert( (matchIndex+matchLength >= dictLimit) /* might be wrong if extDict is incorrectly set to 0 */
  85. || (current < dictLimit) );
  86. match = mBase + matchIndex;
  87. matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
  88. } else {
  89. match = dictBase + matchIndex;
  90. matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
  91. if (matchIndex+matchLength >= dictLimit)
  92. match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
  93. }
  94. DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ",
  95. current, matchIndex, (U32)matchLength);
  96. if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
  97. break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
  98. }
  99. if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */
  100. /* match is smaller than current */
  101. *smallerPtr = matchIndex; /* update smaller idx */
  102. commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
  103. if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */
  104. DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is smaller : next => %u",
  105. matchIndex, btLow, nextPtr[1]);
  106. smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */
  107. matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */
  108. } else {
  109. /* match is larger than current */
  110. *largerPtr = matchIndex;
  111. commonLengthLarger = matchLength;
  112. if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */
  113. DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is larger => %u",
  114. matchIndex, btLow, nextPtr[0]);
  115. largerPtr = nextPtr;
  116. matchIndex = nextPtr[0];
  117. } }
  118. *smallerPtr = *largerPtr = 0;
  119. }
  120. static size_t ZSTD_DUBT_findBestMatch (
  121. ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
  122. const BYTE* const ip, const BYTE* const iend,
  123. size_t* offsetPtr,
  124. U32 const mls,
  125. U32 const extDict)
  126. {
  127. U32* const hashTable = ms->hashTable;
  128. U32 const hashLog = cParams->hashLog;
  129. size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
  130. U32 matchIndex = hashTable[h];
  131. const BYTE* const base = ms->window.base;
  132. U32 const current = (U32)(ip-base);
  133. U32 const windowLow = ms->window.lowLimit;
  134. U32* const bt = ms->chainTable;
  135. U32 const btLog = cParams->chainLog - 1;
  136. U32 const btMask = (1 << btLog) - 1;
  137. U32 const btLow = (btMask >= current) ? 0 : current - btMask;
  138. U32 const unsortLimit = MAX(btLow, windowLow);
  139. U32* nextCandidate = bt + 2*(matchIndex&btMask);
  140. U32* unsortedMark = bt + 2*(matchIndex&btMask) + 1;
  141. U32 nbCompares = 1U << cParams->searchLog;
  142. U32 nbCandidates = nbCompares;
  143. U32 previousCandidate = 0;
  144. DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", current);
  145. assert(ip <= iend-8); /* required for h calculation */
  146. /* reach end of unsorted candidates list */
  147. while ( (matchIndex > unsortLimit)
  148. && (*unsortedMark == ZSTD_DUBT_UNSORTED_MARK)
  149. && (nbCandidates > 1) ) {
  150. DEBUGLOG(8, "ZSTD_DUBT_findBestMatch: candidate %u is unsorted",
  151. matchIndex);
  152. *unsortedMark = previousCandidate;
  153. previousCandidate = matchIndex;
  154. matchIndex = *nextCandidate;
  155. nextCandidate = bt + 2*(matchIndex&btMask);
  156. unsortedMark = bt + 2*(matchIndex&btMask) + 1;
  157. nbCandidates --;
  158. }
  159. if ( (matchIndex > unsortLimit)
  160. && (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) {
  161. DEBUGLOG(7, "ZSTD_DUBT_findBestMatch: nullify last unsorted candidate %u",
  162. matchIndex);
  163. *nextCandidate = *unsortedMark = 0; /* nullify next candidate if it's still unsorted (note : simplification, detrimental to compression ratio, beneficial for speed) */
  164. }
  165. /* batch sort stacked candidates */
  166. matchIndex = previousCandidate;
  167. while (matchIndex) { /* will end on matchIndex == 0 */
  168. U32* const nextCandidateIdxPtr = bt + 2*(matchIndex&btMask) + 1;
  169. U32 const nextCandidateIdx = *nextCandidateIdxPtr;
  170. ZSTD_insertDUBT1(ms, cParams, matchIndex, iend,
  171. nbCandidates, unsortLimit, extDict);
  172. matchIndex = nextCandidateIdx;
  173. nbCandidates++;
  174. }
  175. /* find longest match */
  176. { size_t commonLengthSmaller=0, commonLengthLarger=0;
  177. const BYTE* const dictBase = ms->window.dictBase;
  178. const U32 dictLimit = ms->window.dictLimit;
  179. const BYTE* const dictEnd = dictBase + dictLimit;
  180. const BYTE* const prefixStart = base + dictLimit;
  181. U32* smallerPtr = bt + 2*(current&btMask);
  182. U32* largerPtr = bt + 2*(current&btMask) + 1;
  183. U32 matchEndIdx = current+8+1;
  184. U32 dummy32; /* to be nullified at the end */
  185. size_t bestLength = 0;
  186. matchIndex = hashTable[h];
  187. hashTable[h] = current; /* Update Hash Table */
  188. while (nbCompares-- && (matchIndex > windowLow)) {
  189. U32* const nextPtr = bt + 2*(matchIndex & btMask);
  190. size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
  191. const BYTE* match;
  192. if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
  193. match = base + matchIndex;
  194. matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
  195. } else {
  196. match = dictBase + matchIndex;
  197. matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
  198. if (matchIndex+matchLength >= dictLimit)
  199. match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
  200. }
  201. if (matchLength > bestLength) {
  202. if (matchLength > matchEndIdx - matchIndex)
  203. matchEndIdx = matchIndex + (U32)matchLength;
  204. if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
  205. bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
  206. if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
  207. break; /* drop, to guarantee consistency (miss a little bit of compression) */
  208. }
  209. }
  210. if (match[matchLength] < ip[matchLength]) {
  211. /* match is smaller than current */
  212. *smallerPtr = matchIndex; /* update smaller idx */
  213. commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
  214. if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
  215. smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
  216. matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
  217. } else {
  218. /* match is larger than current */
  219. *largerPtr = matchIndex;
  220. commonLengthLarger = matchLength;
  221. if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
  222. largerPtr = nextPtr;
  223. matchIndex = nextPtr[0];
  224. } }
  225. *smallerPtr = *largerPtr = 0;
  226. assert(matchEndIdx > current+8); /* ensure nextToUpdate is increased */
  227. ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
  228. if (bestLength >= MINMATCH) {
  229. U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
  230. DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
  231. current, (U32)bestLength, (U32)*offsetPtr, mIndex);
  232. }
  233. return bestLength;
  234. }
  235. }
  236. /** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
  237. static size_t ZSTD_BtFindBestMatch (
  238. ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
  239. const BYTE* const ip, const BYTE* const iLimit,
  240. size_t* offsetPtr,
  241. const U32 mls /* template */)
  242. {
  243. DEBUGLOG(7, "ZSTD_BtFindBestMatch");
  244. if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
  245. ZSTD_updateDUBT(ms, cParams, ip, iLimit, mls);
  246. return ZSTD_DUBT_findBestMatch(ms, cParams, ip, iLimit, offsetPtr, mls, 0);
  247. }
  248. static size_t ZSTD_BtFindBestMatch_selectMLS (
  249. ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
  250. const BYTE* ip, const BYTE* const iLimit,
  251. size_t* offsetPtr)
  252. {
  253. switch(cParams->searchLength)
  254. {
  255. default : /* includes case 3 */
  256. case 4 : return ZSTD_BtFindBestMatch(ms, cParams, ip, iLimit, offsetPtr, 4);
  257. case 5 : return ZSTD_BtFindBestMatch(ms, cParams, ip, iLimit, offsetPtr, 5);
  258. case 7 :
  259. case 6 : return ZSTD_BtFindBestMatch(ms, cParams, ip, iLimit, offsetPtr, 6);
  260. }
  261. }
  262. /** Tree updater, providing best match */
  263. static size_t ZSTD_BtFindBestMatch_extDict (
  264. ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
  265. const BYTE* const ip, const BYTE* const iLimit,
  266. size_t* offsetPtr,
  267. const U32 mls)
  268. {
  269. DEBUGLOG(7, "ZSTD_BtFindBestMatch_extDict");
  270. if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
  271. ZSTD_updateDUBT(ms, cParams, ip, iLimit, mls);
  272. return ZSTD_DUBT_findBestMatch(ms, cParams, ip, iLimit, offsetPtr, mls, 1);
  273. }
  274. static size_t ZSTD_BtFindBestMatch_selectMLS_extDict (
  275. ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
  276. const BYTE* ip, const BYTE* const iLimit,
  277. size_t* offsetPtr)
  278. {
  279. switch(cParams->searchLength)
  280. {
  281. default : /* includes case 3 */
  282. case 4 : return ZSTD_BtFindBestMatch_extDict(ms, cParams, ip, iLimit, offsetPtr, 4);
  283. case 5 : return ZSTD_BtFindBestMatch_extDict(ms, cParams, ip, iLimit, offsetPtr, 5);
  284. case 7 :
  285. case 6 : return ZSTD_BtFindBestMatch_extDict(ms, cParams, ip, iLimit, offsetPtr, 6);
  286. }
  287. }
  288. /* *********************************
  289. * Hash Chain
  290. ***********************************/
  291. #define NEXT_IN_CHAIN(d, mask) chainTable[(d) & mask]
  292. /* Update chains up to ip (excluded)
  293. Assumption : always within prefix (i.e. not within extDict) */
  294. static U32 ZSTD_insertAndFindFirstIndex_internal(
  295. ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
  296. const BYTE* ip, U32 const mls)
  297. {
  298. U32* const hashTable = ms->hashTable;
  299. const U32 hashLog = cParams->hashLog;
  300. U32* const chainTable = ms->chainTable;
  301. const U32 chainMask = (1 << cParams->chainLog) - 1;
  302. const BYTE* const base = ms->window.base;
  303. const U32 target = (U32)(ip - base);
  304. U32 idx = ms->nextToUpdate;
  305. while(idx < target) { /* catch up */
  306. size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls);
  307. NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
  308. hashTable[h] = idx;
  309. idx++;
  310. }
  311. ms->nextToUpdate = target;
  312. return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
  313. }
  314. U32 ZSTD_insertAndFindFirstIndex(
  315. ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
  316. const BYTE* ip)
  317. {
  318. return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, cParams->searchLength);
  319. }
  320. /* inlining is important to hardwire a hot branch (template emulation) */
  321. FORCE_INLINE_TEMPLATE
  322. size_t ZSTD_HcFindBestMatch_generic (
  323. ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
  324. const BYTE* const ip, const BYTE* const iLimit,
  325. size_t* offsetPtr,
  326. const U32 mls, const U32 extDict)
  327. {
  328. U32* const chainTable = ms->chainTable;
  329. const U32 chainSize = (1 << cParams->chainLog);
  330. const U32 chainMask = chainSize-1;
  331. const BYTE* const base = ms->window.base;
  332. const BYTE* const dictBase = ms->window.dictBase;
  333. const U32 dictLimit = ms->window.dictLimit;
  334. const BYTE* const prefixStart = base + dictLimit;
  335. const BYTE* const dictEnd = dictBase + dictLimit;
  336. const U32 lowLimit = ms->window.lowLimit;
  337. const U32 current = (U32)(ip-base);
  338. const U32 minChain = current > chainSize ? current - chainSize : 0;
  339. U32 nbAttempts = 1U << cParams->searchLog;
  340. size_t ml=4-1;
  341. /* HC4 match finder */
  342. U32 matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
  343. for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) {
  344. size_t currentMl=0;
  345. if ((!extDict) || matchIndex >= dictLimit) {
  346. const BYTE* const match = base + matchIndex;
  347. if (match[ml] == ip[ml]) /* potentially better */
  348. currentMl = ZSTD_count(ip, match, iLimit);
  349. } else {
  350. const BYTE* const match = dictBase + matchIndex;
  351. assert(match+4 <= dictEnd);
  352. if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */
  353. currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4;
  354. }
  355. /* save best solution */
  356. if (currentMl > ml) {
  357. ml = currentMl;
  358. *offsetPtr = current - matchIndex + ZSTD_REP_MOVE;
  359. if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
  360. }
  361. if (matchIndex <= minChain) break;
  362. matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
  363. }
  364. return ml;
  365. }
  366. FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS (
  367. ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
  368. const BYTE* ip, const BYTE* const iLimit,
  369. size_t* offsetPtr)
  370. {
  371. switch(cParams->searchLength)
  372. {
  373. default : /* includes case 3 */
  374. case 4 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 4, 0);
  375. case 5 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 5, 0);
  376. case 7 :
  377. case 6 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 6, 0);
  378. }
  379. }
  380. FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
  381. ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
  382. const BYTE* ip, const BYTE* const iLimit,
  383. size_t* const offsetPtr)
  384. {
  385. switch(cParams->searchLength)
  386. {
  387. default : /* includes case 3 */
  388. case 4 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 4, 1);
  389. case 5 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 5, 1);
  390. case 7 :
  391. case 6 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 6, 1);
  392. }
  393. }
  394. /* *******************************
  395. * Common parser - lazy strategy
  396. *********************************/
  397. FORCE_INLINE_TEMPLATE
  398. size_t ZSTD_compressBlock_lazy_generic(
  399. ZSTD_matchState_t* ms, seqStore_t* seqStore,
  400. U32 rep[ZSTD_REP_NUM],
  401. ZSTD_compressionParameters const* cParams,
  402. const void* src, size_t srcSize,
  403. const U32 searchMethod, const U32 depth)
  404. {
  405. const BYTE* const istart = (const BYTE*)src;
  406. const BYTE* ip = istart;
  407. const BYTE* anchor = istart;
  408. const BYTE* const iend = istart + srcSize;
  409. const BYTE* const ilimit = iend - 8;
  410. const BYTE* const base = ms->window.base + ms->window.dictLimit;
  411. typedef size_t (*searchMax_f)(
  412. ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
  413. const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
  414. searchMax_f const searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS;
  415. U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
  416. /* init */
  417. ip += (ip==base);
  418. ms->nextToUpdate3 = ms->nextToUpdate;
  419. { U32 const maxRep = (U32)(ip-base);
  420. if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
  421. if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
  422. }
  423. /* Match Loop */
  424. while (ip < ilimit) {
  425. size_t matchLength=0;
  426. size_t offset=0;
  427. const BYTE* start=ip+1;
  428. /* check repCode */
  429. if ((offset_1>0) & (MEM_read32(ip+1) == MEM_read32(ip+1 - offset_1))) {
  430. /* repcode : we take it */
  431. matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
  432. if (depth==0) goto _storeSequence;
  433. }
  434. /* first search (depth 0) */
  435. { size_t offsetFound = 99999999;
  436. size_t const ml2 = searchMax(ms, cParams, ip, iend, &offsetFound);
  437. if (ml2 > matchLength)
  438. matchLength = ml2, start = ip, offset=offsetFound;
  439. }
  440. if (matchLength < 4) {
  441. ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */
  442. continue;
  443. }
  444. /* let's try to find a better solution */
  445. if (depth>=1)
  446. while (ip<ilimit) {
  447. ip ++;
  448. if ((offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
  449. size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
  450. int const gain2 = (int)(mlRep * 3);
  451. int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
  452. if ((mlRep >= 4) && (gain2 > gain1))
  453. matchLength = mlRep, offset = 0, start = ip;
  454. }
  455. { size_t offset2=99999999;
  456. size_t const ml2 = searchMax(ms, cParams, ip, iend, &offset2);
  457. int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
  458. int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
  459. if ((ml2 >= 4) && (gain2 > gain1)) {
  460. matchLength = ml2, offset = offset2, start = ip;
  461. continue; /* search a better one */
  462. } }
  463. /* let's find an even better one */
  464. if ((depth==2) && (ip<ilimit)) {
  465. ip ++;
  466. if ((offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
  467. size_t const ml2 = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
  468. int const gain2 = (int)(ml2 * 4);
  469. int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
  470. if ((ml2 >= 4) && (gain2 > gain1))
  471. matchLength = ml2, offset = 0, start = ip;
  472. }
  473. { size_t offset2=99999999;
  474. size_t const ml2 = searchMax(ms, cParams, ip, iend, &offset2);
  475. int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
  476. int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
  477. if ((ml2 >= 4) && (gain2 > gain1)) {
  478. matchLength = ml2, offset = offset2, start = ip;
  479. continue;
  480. } } }
  481. break; /* nothing found : store previous solution */
  482. }
  483. /* NOTE:
  484. * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior.
  485. * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which
  486. * overflows the pointer, which is undefined behavior.
  487. */
  488. /* catch up */
  489. if (offset) {
  490. while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > base))
  491. && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) ) /* only search for offset within prefix */
  492. { start--; matchLength++; }
  493. offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
  494. }
  495. /* store sequence */
  496. _storeSequence:
  497. { size_t const litLength = start - anchor;
  498. ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH);
  499. anchor = ip = start + matchLength;
  500. }
  501. /* check immediate repcode */
  502. while ( ((ip <= ilimit) & (offset_2>0))
  503. && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) {
  504. /* store sequence */
  505. matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
  506. offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
  507. ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
  508. ip += matchLength;
  509. anchor = ip;
  510. continue; /* faster when present ... (?) */
  511. } }
  512. /* Save reps for next block */
  513. rep[0] = offset_1 ? offset_1 : savedOffset;
  514. rep[1] = offset_2 ? offset_2 : savedOffset;
  515. /* Return the last literals size */
  516. return iend - anchor;
  517. }
  518. size_t ZSTD_compressBlock_btlazy2(
  519. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  520. ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
  521. {
  522. return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 1, 2);
  523. }
  524. size_t ZSTD_compressBlock_lazy2(
  525. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  526. ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
  527. {
  528. return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 2);
  529. }
  530. size_t ZSTD_compressBlock_lazy(
  531. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  532. ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
  533. {
  534. return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 1);
  535. }
  536. size_t ZSTD_compressBlock_greedy(
  537. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  538. ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
  539. {
  540. return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 0);
  541. }
  542. FORCE_INLINE_TEMPLATE
  543. size_t ZSTD_compressBlock_lazy_extDict_generic(
  544. ZSTD_matchState_t* ms, seqStore_t* seqStore,
  545. U32 rep[ZSTD_REP_NUM],
  546. ZSTD_compressionParameters const* cParams,
  547. const void* src, size_t srcSize,
  548. const U32 searchMethod, const U32 depth)
  549. {
  550. const BYTE* const istart = (const BYTE*)src;
  551. const BYTE* ip = istart;
  552. const BYTE* anchor = istart;
  553. const BYTE* const iend = istart + srcSize;
  554. const BYTE* const ilimit = iend - 8;
  555. const BYTE* const base = ms->window.base;
  556. const U32 dictLimit = ms->window.dictLimit;
  557. const U32 lowestIndex = ms->window.lowLimit;
  558. const BYTE* const prefixStart = base + dictLimit;
  559. const BYTE* const dictBase = ms->window.dictBase;
  560. const BYTE* const dictEnd = dictBase + dictLimit;
  561. const BYTE* const dictStart = dictBase + lowestIndex;
  562. typedef size_t (*searchMax_f)(
  563. ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
  564. const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
  565. searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS_extDict : ZSTD_HcFindBestMatch_extDict_selectMLS;
  566. U32 offset_1 = rep[0], offset_2 = rep[1];
  567. /* init */
  568. ms->nextToUpdate3 = ms->nextToUpdate;
  569. ip += (ip == prefixStart);
  570. /* Match Loop */
  571. while (ip < ilimit) {
  572. size_t matchLength=0;
  573. size_t offset=0;
  574. const BYTE* start=ip+1;
  575. U32 current = (U32)(ip-base);
  576. /* check repCode */
  577. { const U32 repIndex = (U32)(current+1 - offset_1);
  578. const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
  579. const BYTE* const repMatch = repBase + repIndex;
  580. if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
  581. if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
  582. /* repcode detected we should take it */
  583. const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
  584. matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4;
  585. if (depth==0) goto _storeSequence;
  586. } }
  587. /* first search (depth 0) */
  588. { size_t offsetFound = 99999999;
  589. size_t const ml2 = searchMax(ms, cParams, ip, iend, &offsetFound);
  590. if (ml2 > matchLength)
  591. matchLength = ml2, start = ip, offset=offsetFound;
  592. }
  593. if (matchLength < 4) {
  594. ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */
  595. continue;
  596. }
  597. /* let's try to find a better solution */
  598. if (depth>=1)
  599. while (ip<ilimit) {
  600. ip ++;
  601. current++;
  602. /* check repCode */
  603. if (offset) {
  604. const U32 repIndex = (U32)(current - offset_1);
  605. const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
  606. const BYTE* const repMatch = repBase + repIndex;
  607. if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
  608. if (MEM_read32(ip) == MEM_read32(repMatch)) {
  609. /* repcode detected */
  610. const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
  611. size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
  612. int const gain2 = (int)(repLength * 3);
  613. int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
  614. if ((repLength >= 4) && (gain2 > gain1))
  615. matchLength = repLength, offset = 0, start = ip;
  616. } }
  617. /* search match, depth 1 */
  618. { size_t offset2=99999999;
  619. size_t const ml2 = searchMax(ms, cParams, ip, iend, &offset2);
  620. int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
  621. int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
  622. if ((ml2 >= 4) && (gain2 > gain1)) {
  623. matchLength = ml2, offset = offset2, start = ip;
  624. continue; /* search a better one */
  625. } }
  626. /* let's find an even better one */
  627. if ((depth==2) && (ip<ilimit)) {
  628. ip ++;
  629. current++;
  630. /* check repCode */
  631. if (offset) {
  632. const U32 repIndex = (U32)(current - offset_1);
  633. const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
  634. const BYTE* const repMatch = repBase + repIndex;
  635. if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
  636. if (MEM_read32(ip) == MEM_read32(repMatch)) {
  637. /* repcode detected */
  638. const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
  639. size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
  640. int const gain2 = (int)(repLength * 4);
  641. int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
  642. if ((repLength >= 4) && (gain2 > gain1))
  643. matchLength = repLength, offset = 0, start = ip;
  644. } }
  645. /* search match, depth 2 */
  646. { size_t offset2=99999999;
  647. size_t const ml2 = searchMax(ms, cParams, ip, iend, &offset2);
  648. int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
  649. int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
  650. if ((ml2 >= 4) && (gain2 > gain1)) {
  651. matchLength = ml2, offset = offset2, start = ip;
  652. continue;
  653. } } }
  654. break; /* nothing found : store previous solution */
  655. }
  656. /* catch up */
  657. if (offset) {
  658. U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
  659. const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
  660. const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
  661. while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */
  662. offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
  663. }
  664. /* store sequence */
  665. _storeSequence:
  666. { size_t const litLength = start - anchor;
  667. ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH);
  668. anchor = ip = start + matchLength;
  669. }
  670. /* check immediate repcode */
  671. while (ip <= ilimit) {
  672. const U32 repIndex = (U32)((ip-base) - offset_2);
  673. const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
  674. const BYTE* const repMatch = repBase + repIndex;
  675. if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
  676. if (MEM_read32(ip) == MEM_read32(repMatch)) {
  677. /* repcode detected we should take it */
  678. const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
  679. matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
  680. offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */
  681. ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
  682. ip += matchLength;
  683. anchor = ip;
  684. continue; /* faster when present ... (?) */
  685. }
  686. break;
  687. } }
  688. /* Save reps for next block */
  689. rep[0] = offset_1;
  690. rep[1] = offset_2;
  691. /* Return the last literals size */
  692. return iend - anchor;
  693. }
  694. size_t ZSTD_compressBlock_greedy_extDict(
  695. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  696. ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
  697. {
  698. return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 0);
  699. }
  700. size_t ZSTD_compressBlock_lazy_extDict(
  701. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  702. ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
  703. {
  704. return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 1);
  705. }
  706. size_t ZSTD_compressBlock_lazy2_extDict(
  707. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  708. ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
  709. {
  710. return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 2);
  711. }
  712. size_t ZSTD_compressBlock_btlazy2_extDict(
  713. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  714. ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
  715. {
  716. return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 1, 2);
  717. }