zstd_opt.c 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923
  1. /*
  2. * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
  3. * All rights reserved.
  4. *
  5. * This source code is licensed under both the BSD-style license (found in the
  6. * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  7. * in the COPYING file in the root directory of this source tree).
  8. * You may select, at your option, one of the above-listed licenses.
  9. */
  10. #include "zstd_compress_internal.h"
  11. #include "zstd_opt.h"
  12. #define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats. Also used for matchSum (?) */
  13. #define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */
  14. #define ZSTD_MAX_PRICE (1<<30)
  15. /*-*************************************
  16. * Price functions for optimal parser
  17. ***************************************/
  18. static void ZSTD_setLog2Prices(optState_t* optPtr)
  19. {
  20. optPtr->log2litSum = ZSTD_highbit32(optPtr->litSum+1);
  21. optPtr->log2litLengthSum = ZSTD_highbit32(optPtr->litLengthSum+1);
  22. optPtr->log2matchLengthSum = ZSTD_highbit32(optPtr->matchLengthSum+1);
  23. optPtr->log2offCodeSum = ZSTD_highbit32(optPtr->offCodeSum+1);
  24. }
  25. static void ZSTD_rescaleFreqs(optState_t* const optPtr,
  26. const BYTE* const src, size_t const srcSize)
  27. {
  28. optPtr->staticPrices = 0;
  29. if (optPtr->litLengthSum == 0) { /* first init */
  30. unsigned u;
  31. if (srcSize <= 1024) optPtr->staticPrices = 1;
  32. assert(optPtr->litFreq!=NULL);
  33. for (u=0; u<=MaxLit; u++)
  34. optPtr->litFreq[u] = 0;
  35. for (u=0; u<srcSize; u++)
  36. optPtr->litFreq[src[u]]++;
  37. optPtr->litSum = 0;
  38. for (u=0; u<=MaxLit; u++) {
  39. optPtr->litFreq[u] = 1 + (optPtr->litFreq[u] >> ZSTD_FREQ_DIV);
  40. optPtr->litSum += optPtr->litFreq[u];
  41. }
  42. for (u=0; u<=MaxLL; u++)
  43. optPtr->litLengthFreq[u] = 1;
  44. optPtr->litLengthSum = MaxLL+1;
  45. for (u=0; u<=MaxML; u++)
  46. optPtr->matchLengthFreq[u] = 1;
  47. optPtr->matchLengthSum = MaxML+1;
  48. for (u=0; u<=MaxOff; u++)
  49. optPtr->offCodeFreq[u] = 1;
  50. optPtr->offCodeSum = (MaxOff+1);
  51. } else {
  52. unsigned u;
  53. optPtr->litSum = 0;
  54. for (u=0; u<=MaxLit; u++) {
  55. optPtr->litFreq[u] = 1 + (optPtr->litFreq[u] >> (ZSTD_FREQ_DIV+1));
  56. optPtr->litSum += optPtr->litFreq[u];
  57. }
  58. optPtr->litLengthSum = 0;
  59. for (u=0; u<=MaxLL; u++) {
  60. optPtr->litLengthFreq[u] = 1 + (optPtr->litLengthFreq[u]>>(ZSTD_FREQ_DIV+1));
  61. optPtr->litLengthSum += optPtr->litLengthFreq[u];
  62. }
  63. optPtr->matchLengthSum = 0;
  64. for (u=0; u<=MaxML; u++) {
  65. optPtr->matchLengthFreq[u] = 1 + (optPtr->matchLengthFreq[u]>>ZSTD_FREQ_DIV);
  66. optPtr->matchLengthSum += optPtr->matchLengthFreq[u];
  67. }
  68. optPtr->offCodeSum = 0;
  69. for (u=0; u<=MaxOff; u++) {
  70. optPtr->offCodeFreq[u] = 1 + (optPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV);
  71. optPtr->offCodeSum += optPtr->offCodeFreq[u];
  72. }
  73. }
  74. ZSTD_setLog2Prices(optPtr);
  75. }
  76. /* ZSTD_rawLiteralsCost() :
  77. * cost of literals (only) in given segment (which length can be null)
  78. * does not include cost of literalLength symbol */
  79. static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
  80. const optState_t* const optPtr)
  81. {
  82. if (optPtr->staticPrices) return (litLength*6); /* 6 bit per literal - no statistic used */
  83. if (litLength == 0) return 0;
  84. /* literals */
  85. { U32 u;
  86. U32 cost = litLength * optPtr->log2litSum;
  87. for (u=0; u < litLength; u++)
  88. cost -= ZSTD_highbit32(optPtr->litFreq[literals[u]]+1);
  89. return cost;
  90. }
  91. }
  92. /* ZSTD_litLengthPrice() :
  93. * cost of literalLength symbol */
  94. static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr)
  95. {
  96. if (optPtr->staticPrices) return ZSTD_highbit32((U32)litLength+1);
  97. /* literal Length */
  98. { U32 const llCode = ZSTD_LLcode(litLength);
  99. U32 const price = LL_bits[llCode] + optPtr->log2litLengthSum - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1);
  100. return price;
  101. }
  102. }
  103. /* ZSTD_litLengthPrice() :
  104. * cost of the literal part of a sequence,
  105. * including literals themselves, and literalLength symbol */
  106. static U32 ZSTD_fullLiteralsCost(const BYTE* const literals, U32 const litLength,
  107. const optState_t* const optPtr)
  108. {
  109. return ZSTD_rawLiteralsCost(literals, litLength, optPtr)
  110. + ZSTD_litLengthPrice(litLength, optPtr);
  111. }
  112. /* ZSTD_litLengthContribution() :
  113. * @return ( cost(litlength) - cost(0) )
  114. * this value can then be added to rawLiteralsCost()
  115. * to provide a cost which is directly comparable to a match ending at same position */
  116. static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr)
  117. {
  118. if (optPtr->staticPrices) return ZSTD_highbit32(litLength+1);
  119. /* literal Length */
  120. { U32 const llCode = ZSTD_LLcode(litLength);
  121. int const contribution = LL_bits[llCode]
  122. + ZSTD_highbit32(optPtr->litLengthFreq[0]+1)
  123. - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1);
  124. #if 1
  125. return contribution;
  126. #else
  127. return MAX(0, contribution); /* sometimes better, sometimes not ... */
  128. #endif
  129. }
  130. }
  131. /* ZSTD_literalsContribution() :
  132. * creates a fake cost for the literals part of a sequence
  133. * which can be compared to the ending cost of a match
  134. * should a new match start at this position */
  135. static int ZSTD_literalsContribution(const BYTE* const literals, U32 const litLength,
  136. const optState_t* const optPtr)
  137. {
  138. int const contribution = ZSTD_rawLiteralsCost(literals, litLength, optPtr)
  139. + ZSTD_litLengthContribution(litLength, optPtr);
  140. return contribution;
  141. }
  142. /* ZSTD_getMatchPrice() :
  143. * Provides the cost of the match part (offset + matchLength) of a sequence
  144. * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
  145. * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */
  146. FORCE_INLINE_TEMPLATE U32 ZSTD_getMatchPrice(
  147. U32 const offset, U32 const matchLength,
  148. const optState_t* const optPtr,
  149. int const optLevel)
  150. {
  151. U32 price;
  152. U32 const offCode = ZSTD_highbit32(offset+1);
  153. U32 const mlBase = matchLength - MINMATCH;
  154. assert(matchLength >= MINMATCH);
  155. if (optPtr->staticPrices) /* fixed scheme, do not use statistics */
  156. return ZSTD_highbit32((U32)mlBase+1) + 16 + offCode;
  157. price = offCode + optPtr->log2offCodeSum - ZSTD_highbit32(optPtr->offCodeFreq[offCode]+1);
  158. if ((optLevel<2) /*static*/ && offCode >= 20) price += (offCode-19)*2; /* handicap for long distance offsets, favor decompression speed */
  159. /* match Length */
  160. { U32 const mlCode = ZSTD_MLcode(mlBase);
  161. price += ML_bits[mlCode] + optPtr->log2matchLengthSum - ZSTD_highbit32(optPtr->matchLengthFreq[mlCode]+1);
  162. }
  163. DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price);
  164. return price;
  165. }
  166. static void ZSTD_updateStats(optState_t* const optPtr,
  167. U32 litLength, const BYTE* literals,
  168. U32 offsetCode, U32 matchLength)
  169. {
  170. /* literals */
  171. { U32 u;
  172. for (u=0; u < litLength; u++)
  173. optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
  174. optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
  175. }
  176. /* literal Length */
  177. { U32 const llCode = ZSTD_LLcode(litLength);
  178. optPtr->litLengthFreq[llCode]++;
  179. optPtr->litLengthSum++;
  180. }
  181. /* match offset code (0-2=>repCode; 3+=>offset+2) */
  182. { U32 const offCode = ZSTD_highbit32(offsetCode+1);
  183. assert(offCode <= MaxOff);
  184. optPtr->offCodeFreq[offCode]++;
  185. optPtr->offCodeSum++;
  186. }
  187. /* match Length */
  188. { U32 const mlBase = matchLength - MINMATCH;
  189. U32 const mlCode = ZSTD_MLcode(mlBase);
  190. optPtr->matchLengthFreq[mlCode]++;
  191. optPtr->matchLengthSum++;
  192. }
  193. }
  194. /* ZSTD_readMINMATCH() :
  195. * function safe only for comparisons
  196. * assumption : memPtr must be at least 4 bytes before end of buffer */
  197. MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
  198. {
  199. switch (length)
  200. {
  201. default :
  202. case 4 : return MEM_read32(memPtr);
  203. case 3 : if (MEM_isLittleEndian())
  204. return MEM_read32(memPtr)<<8;
  205. else
  206. return MEM_read32(memPtr)>>8;
  207. }
  208. }
  209. /* Update hashTable3 up to ip (excluded)
  210. Assumption : always within prefix (i.e. not within extDict) */
  211. static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, const BYTE* const ip)
  212. {
  213. U32* const hashTable3 = ms->hashTable3;
  214. U32 const hashLog3 = ms->hashLog3;
  215. const BYTE* const base = ms->window.base;
  216. U32 idx = ms->nextToUpdate3;
  217. U32 const target = ms->nextToUpdate3 = (U32)(ip - base);
  218. size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
  219. assert(hashLog3 > 0);
  220. while(idx < target) {
  221. hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx;
  222. idx++;
  223. }
  224. return hashTable3[hash3];
  225. }
  226. /*-*************************************
  227. * Binary Tree search
  228. ***************************************/
  229. /** ZSTD_insertBt1() : add one or multiple positions to tree.
  230. * ip : assumed <= iend-8 .
  231. * @return : nb of positions added */
  232. static U32 ZSTD_insertBt1(
  233. ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
  234. const BYTE* const ip, const BYTE* const iend,
  235. U32 const mls, U32 const extDict)
  236. {
  237. U32* const hashTable = ms->hashTable;
  238. U32 const hashLog = cParams->hashLog;
  239. size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
  240. U32* const bt = ms->chainTable;
  241. U32 const btLog = cParams->chainLog - 1;
  242. U32 const btMask = (1 << btLog) - 1;
  243. U32 matchIndex = hashTable[h];
  244. size_t commonLengthSmaller=0, commonLengthLarger=0;
  245. const BYTE* const base = ms->window.base;
  246. const BYTE* const dictBase = ms->window.dictBase;
  247. const U32 dictLimit = ms->window.dictLimit;
  248. const BYTE* const dictEnd = dictBase + dictLimit;
  249. const BYTE* const prefixStart = base + dictLimit;
  250. const BYTE* match;
  251. const U32 current = (U32)(ip-base);
  252. const U32 btLow = btMask >= current ? 0 : current - btMask;
  253. U32* smallerPtr = bt + 2*(current&btMask);
  254. U32* largerPtr = smallerPtr + 1;
  255. U32 dummy32; /* to be nullified at the end */
  256. U32 const windowLow = ms->window.lowLimit;
  257. U32 matchEndIdx = current+8+1;
  258. size_t bestLength = 8;
  259. U32 nbCompares = 1U << cParams->searchLog;
  260. #ifdef ZSTD_C_PREDICT
  261. U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0);
  262. U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1);
  263. predictedSmall += (predictedSmall>0);
  264. predictedLarge += (predictedLarge>0);
  265. #endif /* ZSTD_C_PREDICT */
  266. DEBUGLOG(8, "ZSTD_insertBt1 (%u)", current);
  267. assert(ip <= iend-8); /* required for h calculation */
  268. hashTable[h] = current; /* Update Hash Table */
  269. while (nbCompares-- && (matchIndex > windowLow)) {
  270. U32* const nextPtr = bt + 2*(matchIndex & btMask);
  271. size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
  272. assert(matchIndex < current);
  273. #ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */
  274. const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */
  275. if (matchIndex == predictedSmall) {
  276. /* no need to check length, result known */
  277. *smallerPtr = matchIndex;
  278. if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
  279. smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
  280. matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
  281. predictedSmall = predictPtr[1] + (predictPtr[1]>0);
  282. continue;
  283. }
  284. if (matchIndex == predictedLarge) {
  285. *largerPtr = matchIndex;
  286. if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
  287. largerPtr = nextPtr;
  288. matchIndex = nextPtr[0];
  289. predictedLarge = predictPtr[0] + (predictPtr[0]>0);
  290. continue;
  291. }
  292. #endif
  293. if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
  294. assert(matchIndex+matchLength >= dictLimit); /* might be wrong if extDict is incorrectly set to 0 */
  295. match = base + matchIndex;
  296. matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
  297. } else {
  298. match = dictBase + matchIndex;
  299. matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
  300. if (matchIndex+matchLength >= dictLimit)
  301. match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
  302. }
  303. if (matchLength > bestLength) {
  304. bestLength = matchLength;
  305. if (matchLength > matchEndIdx - matchIndex)
  306. matchEndIdx = matchIndex + (U32)matchLength;
  307. }
  308. if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
  309. break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
  310. }
  311. if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */
  312. /* match is smaller than current */
  313. *smallerPtr = matchIndex; /* update smaller idx */
  314. commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
  315. if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */
  316. smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */
  317. matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */
  318. } else {
  319. /* match is larger than current */
  320. *largerPtr = matchIndex;
  321. commonLengthLarger = matchLength;
  322. if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */
  323. largerPtr = nextPtr;
  324. matchIndex = nextPtr[0];
  325. } }
  326. *smallerPtr = *largerPtr = 0;
  327. if (bestLength > 384) return MIN(192, (U32)(bestLength - 384)); /* speed optimization */
  328. assert(matchEndIdx > current + 8);
  329. return matchEndIdx - (current + 8);
  330. }
  331. FORCE_INLINE_TEMPLATE
  332. void ZSTD_updateTree_internal(
  333. ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
  334. const BYTE* const ip, const BYTE* const iend,
  335. const U32 mls, const U32 extDict)
  336. {
  337. const BYTE* const base = ms->window.base;
  338. U32 const target = (U32)(ip - base);
  339. U32 idx = ms->nextToUpdate;
  340. DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u (extDict:%u)",
  341. idx, target, extDict);
  342. while(idx < target)
  343. idx += ZSTD_insertBt1(ms, cParams, base+idx, iend, mls, extDict);
  344. ms->nextToUpdate = target;
  345. }
  346. void ZSTD_updateTree(
  347. ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
  348. const BYTE* ip, const BYTE* iend)
  349. {
  350. ZSTD_updateTree_internal(ms, cParams, ip, iend, cParams->searchLength, 0 /*extDict*/);
  351. }
  352. FORCE_INLINE_TEMPLATE
  353. U32 ZSTD_insertBtAndGetAllMatches (
  354. ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
  355. const BYTE* const ip, const BYTE* const iLimit, int const extDict,
  356. U32 rep[ZSTD_REP_NUM], U32 const ll0,
  357. ZSTD_match_t* matches, const U32 lengthToBeat, U32 const mls /* template */)
  358. {
  359. U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
  360. const BYTE* const base = ms->window.base;
  361. U32 const current = (U32)(ip-base);
  362. U32 const hashLog = cParams->hashLog;
  363. U32 const minMatch = (mls==3) ? 3 : 4;
  364. U32* const hashTable = ms->hashTable;
  365. size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
  366. U32 matchIndex = hashTable[h];
  367. U32* const bt = ms->chainTable;
  368. U32 const btLog = cParams->chainLog - 1;
  369. U32 const btMask= (1U << btLog) - 1;
  370. size_t commonLengthSmaller=0, commonLengthLarger=0;
  371. const BYTE* const dictBase = ms->window.dictBase;
  372. U32 const dictLimit = ms->window.dictLimit;
  373. const BYTE* const dictEnd = dictBase + dictLimit;
  374. const BYTE* const prefixStart = base + dictLimit;
  375. U32 const btLow = btMask >= current ? 0 : current - btMask;
  376. U32 const windowLow = ms->window.lowLimit;
  377. U32* smallerPtr = bt + 2*(current&btMask);
  378. U32* largerPtr = bt + 2*(current&btMask) + 1;
  379. U32 matchEndIdx = current+8+1; /* farthest referenced position of any match => detects repetitive patterns */
  380. U32 dummy32; /* to be nullified at the end */
  381. U32 mnum = 0;
  382. U32 nbCompares = 1U << cParams->searchLog;
  383. size_t bestLength = lengthToBeat-1;
  384. DEBUGLOG(7, "ZSTD_insertBtAndGetAllMatches");
  385. /* check repCode */
  386. { U32 const lastR = ZSTD_REP_NUM + ll0;
  387. U32 repCode;
  388. for (repCode = ll0; repCode < lastR; repCode++) {
  389. U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
  390. U32 const repIndex = current - repOffset;
  391. U32 repLen = 0;
  392. assert(current >= dictLimit);
  393. if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < current-dictLimit) { /* equivalent to `current > repIndex >= dictLimit` */
  394. if (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch)) {
  395. repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
  396. }
  397. } else { /* repIndex < dictLimit || repIndex >= current */
  398. const BYTE* const repMatch = dictBase + repIndex;
  399. assert(current >= windowLow);
  400. if ( extDict /* this case only valid in extDict mode */
  401. && ( ((repOffset-1) /*intentional overflow*/ < current - windowLow) /* equivalent to `current > repIndex >= windowLow` */
  402. & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
  403. && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
  404. repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
  405. } }
  406. /* save longer solution */
  407. if (repLen > bestLength) {
  408. DEBUGLOG(8, "found rep-match %u of length %u",
  409. repCode - ll0, (U32)repLen);
  410. bestLength = repLen;
  411. matches[mnum].off = repCode - ll0;
  412. matches[mnum].len = (U32)repLen;
  413. mnum++;
  414. if ( (repLen > sufficient_len)
  415. | (ip+repLen == iLimit) ) { /* best possible */
  416. return mnum;
  417. } } } }
  418. /* HC3 match finder */
  419. if ((mls == 3) /*static*/ && (bestLength < mls)) {
  420. U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, ip);
  421. if ((matchIndex3 > windowLow)
  422. & (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
  423. size_t mlen;
  424. if ((!extDict) /*static*/ || (matchIndex3 >= dictLimit)) {
  425. const BYTE* const match = base + matchIndex3;
  426. mlen = ZSTD_count(ip, match, iLimit);
  427. } else {
  428. const BYTE* const match = dictBase + matchIndex3;
  429. mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart);
  430. }
  431. /* save best solution */
  432. if (mlen >= mls /* == 3 > bestLength */) {
  433. DEBUGLOG(8, "found small match with hlog3, of length %u",
  434. (U32)mlen);
  435. bestLength = mlen;
  436. assert(current > matchIndex3);
  437. assert(mnum==0); /* no prior solution */
  438. matches[0].off = (current - matchIndex3) + ZSTD_REP_MOVE;
  439. matches[0].len = (U32)mlen;
  440. mnum = 1;
  441. if ( (mlen > sufficient_len) |
  442. (ip+mlen == iLimit) ) { /* best possible length */
  443. ms->nextToUpdate = current+1; /* skip insertion */
  444. return 1;
  445. } } } }
  446. hashTable[h] = current; /* Update Hash Table */
  447. while (nbCompares-- && (matchIndex > windowLow)) {
  448. U32* const nextPtr = bt + 2*(matchIndex & btMask);
  449. size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
  450. const BYTE* match;
  451. assert(current > matchIndex);
  452. if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
  453. assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */
  454. match = base + matchIndex;
  455. matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
  456. } else {
  457. match = dictBase + matchIndex;
  458. matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
  459. if (matchIndex+matchLength >= dictLimit)
  460. match = base + matchIndex; /* prepare for match[matchLength] */
  461. }
  462. if (matchLength > bestLength) {
  463. DEBUGLOG(8, "found match of length %u at distance %u",
  464. (U32)matchLength, current - matchIndex);
  465. assert(matchEndIdx > matchIndex);
  466. if (matchLength > matchEndIdx - matchIndex)
  467. matchEndIdx = matchIndex + (U32)matchLength;
  468. bestLength = matchLength;
  469. matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE;
  470. matches[mnum].len = (U32)matchLength;
  471. mnum++;
  472. if (matchLength > ZSTD_OPT_NUM) break;
  473. if (ip+matchLength == iLimit) { /* equal : no way to know if inf or sup */
  474. break; /* drop, to preserve bt consistency (miss a little bit of compression) */
  475. }
  476. }
  477. if (match[matchLength] < ip[matchLength]) {
  478. /* match smaller than current */
  479. *smallerPtr = matchIndex; /* update smaller idx */
  480. commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
  481. if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
  482. smallerPtr = nextPtr+1; /* new candidate => larger than match, which was smaller than current */
  483. matchIndex = nextPtr[1]; /* new matchIndex, larger than previous, closer to current */
  484. } else {
  485. *largerPtr = matchIndex;
  486. commonLengthLarger = matchLength;
  487. if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
  488. largerPtr = nextPtr;
  489. matchIndex = nextPtr[0];
  490. } }
  491. *smallerPtr = *largerPtr = 0;
  492. assert(matchEndIdx > current+8);
  493. ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
  494. return mnum;
  495. }
  496. FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
  497. ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
  498. const BYTE* ip, const BYTE* const iHighLimit, int const extDict,
  499. U32 rep[ZSTD_REP_NUM], U32 const ll0,
  500. ZSTD_match_t* matches, U32 const lengthToBeat)
  501. {
  502. U32 const matchLengthSearch = cParams->searchLength;
  503. DEBUGLOG(7, "ZSTD_BtGetAllMatches");
  504. if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
  505. ZSTD_updateTree_internal(ms, cParams, ip, iHighLimit, matchLengthSearch, extDict);
  506. switch(matchLengthSearch)
  507. {
  508. case 3 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, extDict, rep, ll0, matches, lengthToBeat, 3);
  509. default :
  510. case 4 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, extDict, rep, ll0, matches, lengthToBeat, 4);
  511. case 5 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, extDict, rep, ll0, matches, lengthToBeat, 5);
  512. case 7 :
  513. case 6 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, extDict, rep, ll0, matches, lengthToBeat, 6);
  514. }
  515. }
  516. /*-*******************************
  517. * Optimal parser
  518. *********************************/
  519. typedef struct repcodes_s {
  520. U32 rep[3];
  521. } repcodes_t;
  522. repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
  523. {
  524. repcodes_t newReps;
  525. if (offset >= ZSTD_REP_NUM) { /* full offset */
  526. newReps.rep[2] = rep[1];
  527. newReps.rep[1] = rep[0];
  528. newReps.rep[0] = offset - ZSTD_REP_MOVE;
  529. } else { /* repcode */
  530. U32 const repCode = offset + ll0;
  531. if (repCode > 0) { /* note : if repCode==0, no change */
  532. U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
  533. newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2];
  534. newReps.rep[1] = rep[0];
  535. newReps.rep[0] = currentOffset;
  536. } else { /* repCode == 0 */
  537. memcpy(&newReps, rep, sizeof(newReps));
  538. }
  539. }
  540. return newReps;
  541. }
  542. typedef struct {
  543. const BYTE* anchor;
  544. U32 litlen;
  545. U32 rawLitCost;
  546. } cachedLiteralPrice_t;
  547. static U32 ZSTD_rawLiteralsCost_cached(
  548. cachedLiteralPrice_t* const cachedLitPrice,
  549. const BYTE* const anchor, U32 const litlen,
  550. const optState_t* const optStatePtr)
  551. {
  552. U32 startCost;
  553. U32 remainingLength;
  554. const BYTE* startPosition;
  555. if (anchor == cachedLitPrice->anchor) {
  556. startCost = cachedLitPrice->rawLitCost;
  557. startPosition = anchor + cachedLitPrice->litlen;
  558. assert(litlen >= cachedLitPrice->litlen);
  559. remainingLength = litlen - cachedLitPrice->litlen;
  560. } else {
  561. startCost = 0;
  562. startPosition = anchor;
  563. remainingLength = litlen;
  564. }
  565. { U32 const rawLitCost = startCost + ZSTD_rawLiteralsCost(startPosition, remainingLength, optStatePtr);
  566. cachedLitPrice->anchor = anchor;
  567. cachedLitPrice->litlen = litlen;
  568. cachedLitPrice->rawLitCost = rawLitCost;
  569. return rawLitCost;
  570. }
  571. }
  572. static U32 ZSTD_fullLiteralsCost_cached(
  573. cachedLiteralPrice_t* const cachedLitPrice,
  574. const BYTE* const anchor, U32 const litlen,
  575. const optState_t* const optStatePtr)
  576. {
  577. return ZSTD_rawLiteralsCost_cached(cachedLitPrice, anchor, litlen, optStatePtr)
  578. + ZSTD_litLengthPrice(litlen, optStatePtr);
  579. }
  580. static int ZSTD_literalsContribution_cached(
  581. cachedLiteralPrice_t* const cachedLitPrice,
  582. const BYTE* const anchor, U32 const litlen,
  583. const optState_t* const optStatePtr)
  584. {
  585. int const contribution = ZSTD_rawLiteralsCost_cached(cachedLitPrice, anchor, litlen, optStatePtr)
  586. + ZSTD_litLengthContribution(litlen, optStatePtr);
  587. return contribution;
  588. }
  589. FORCE_INLINE_TEMPLATE
  590. size_t ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,seqStore_t* seqStore,
  591. U32 rep[ZSTD_REP_NUM],
  592. ZSTD_compressionParameters const* cParams,
  593. const void* src, size_t srcSize,
  594. const int optLevel, const int extDict)
  595. {
  596. optState_t* const optStatePtr = &ms->opt;
  597. const BYTE* const istart = (const BYTE*)src;
  598. const BYTE* ip = istart;
  599. const BYTE* anchor = istart;
  600. const BYTE* const iend = istart + srcSize;
  601. const BYTE* const ilimit = iend - 8;
  602. const BYTE* const base = ms->window.base;
  603. const BYTE* const prefixStart = base + ms->window.dictLimit;
  604. U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
  605. U32 const minMatch = (cParams->searchLength == 3) ? 3 : 4;
  606. ZSTD_optimal_t* const opt = optStatePtr->priceTable;
  607. ZSTD_match_t* const matches = optStatePtr->matchTable;
  608. cachedLiteralPrice_t cachedLitPrice;
  609. /* init */
  610. DEBUGLOG(5, "ZSTD_compressBlock_opt_generic");
  611. ms->nextToUpdate3 = ms->nextToUpdate;
  612. ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize);
  613. ip += (ip==prefixStart);
  614. memset(&cachedLitPrice, 0, sizeof(cachedLitPrice));
  615. /* Match Loop */
  616. while (ip < ilimit) {
  617. U32 cur, last_pos = 0;
  618. U32 best_mlen, best_off;
  619. /* find first match */
  620. { U32 const litlen = (U32)(ip - anchor);
  621. U32 const ll0 = !litlen;
  622. U32 const nbMatches = ZSTD_BtGetAllMatches(ms, cParams, ip, iend, extDict, rep, ll0, matches, minMatch);
  623. if (!nbMatches) { ip++; continue; }
  624. /* initialize opt[0] */
  625. { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
  626. opt[0].mlen = 1;
  627. opt[0].litlen = litlen;
  628. /* large match -> immediate encoding */
  629. { U32 const maxML = matches[nbMatches-1].len;
  630. DEBUGLOG(7, "found %u matches of maxLength=%u and offset=%u at cPos=%u => start new serie",
  631. nbMatches, maxML, matches[nbMatches-1].off, (U32)(ip-prefixStart));
  632. if (maxML > sufficient_len) {
  633. best_mlen = maxML;
  634. best_off = matches[nbMatches-1].off;
  635. DEBUGLOG(7, "large match (%u>%u), immediate encoding",
  636. best_mlen, sufficient_len);
  637. cur = 0;
  638. last_pos = 1;
  639. goto _shortestPath;
  640. } }
  641. /* set prices for first matches starting position == 0 */
  642. { U32 const literalsPrice = ZSTD_fullLiteralsCost_cached(&cachedLitPrice, anchor, litlen, optStatePtr);
  643. U32 pos;
  644. U32 matchNb;
  645. for (pos = 0; pos < minMatch; pos++) {
  646. opt[pos].mlen = 1;
  647. opt[pos].price = ZSTD_MAX_PRICE;
  648. }
  649. for (matchNb = 0; matchNb < nbMatches; matchNb++) {
  650. U32 const offset = matches[matchNb].off;
  651. U32 const end = matches[matchNb].len;
  652. repcodes_t const repHistory = ZSTD_updateRep(rep, offset, ll0);
  653. for ( ; pos <= end ; pos++ ) {
  654. U32 const matchPrice = literalsPrice + ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel);
  655. DEBUGLOG(7, "rPos:%u => set initial price : %u",
  656. pos, matchPrice);
  657. opt[pos].mlen = pos;
  658. opt[pos].off = offset;
  659. opt[pos].litlen = litlen;
  660. opt[pos].price = matchPrice;
  661. memcpy(opt[pos].rep, &repHistory, sizeof(repHistory));
  662. } }
  663. last_pos = pos-1;
  664. }
  665. }
  666. /* check further positions */
  667. for (cur = 1; cur <= last_pos; cur++) {
  668. const BYTE* const inr = ip + cur;
  669. assert(cur < ZSTD_OPT_NUM);
  670. /* Fix current position with one literal if cheaper */
  671. { U32 const litlen = (opt[cur-1].mlen == 1) ? opt[cur-1].litlen + 1 : 1;
  672. int price; /* note : contribution can be negative */
  673. if (cur > litlen) {
  674. price = opt[cur - litlen].price + ZSTD_literalsContribution(inr-litlen, litlen, optStatePtr);
  675. } else {
  676. price = ZSTD_literalsContribution_cached(&cachedLitPrice, anchor, litlen, optStatePtr);
  677. }
  678. assert(price < 1000000000); /* overflow check */
  679. if (price <= opt[cur].price) {
  680. DEBUGLOG(7, "rPos:%u : better price (%u<%u) using literal",
  681. cur, price, opt[cur].price);
  682. opt[cur].mlen = 1;
  683. opt[cur].off = 0;
  684. opt[cur].litlen = litlen;
  685. opt[cur].price = price;
  686. memcpy(opt[cur].rep, opt[cur-1].rep, sizeof(opt[cur].rep));
  687. } }
  688. /* last match must start at a minimum distance of 8 from oend */
  689. if (inr > ilimit) continue;
  690. if (cur == last_pos) break;
  691. if ( (optLevel==0) /*static*/
  692. && (opt[cur+1].price <= opt[cur].price) )
  693. continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
  694. { U32 const ll0 = (opt[cur].mlen != 1);
  695. U32 const litlen = (opt[cur].mlen == 1) ? opt[cur].litlen : 0;
  696. U32 const previousPrice = (cur > litlen) ? opt[cur-litlen].price : 0;
  697. U32 const basePrice = previousPrice + ZSTD_fullLiteralsCost(inr-litlen, litlen, optStatePtr);
  698. U32 const nbMatches = ZSTD_BtGetAllMatches(ms, cParams, inr, iend, extDict, opt[cur].rep, ll0, matches, minMatch);
  699. U32 matchNb;
  700. if (!nbMatches) continue;
  701. { U32 const maxML = matches[nbMatches-1].len;
  702. DEBUGLOG(7, "rPos:%u, found %u matches, of maxLength=%u",
  703. cur, nbMatches, maxML);
  704. if ( (maxML > sufficient_len)
  705. | (cur + maxML >= ZSTD_OPT_NUM) ) {
  706. best_mlen = maxML;
  707. best_off = matches[nbMatches-1].off;
  708. last_pos = cur + 1;
  709. goto _shortestPath;
  710. }
  711. }
  712. /* set prices using matches found at position == cur */
  713. for (matchNb = 0; matchNb < nbMatches; matchNb++) {
  714. U32 const offset = matches[matchNb].off;
  715. repcodes_t const repHistory = ZSTD_updateRep(opt[cur].rep, offset, ll0);
  716. U32 const lastML = matches[matchNb].len;
  717. U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
  718. U32 mlen;
  719. DEBUGLOG(7, "testing match %u => offCode=%u, mlen=%u, llen=%u",
  720. matchNb, matches[matchNb].off, lastML, litlen);
  721. for (mlen = lastML; mlen >= startML; mlen--) {
  722. U32 const pos = cur + mlen;
  723. int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
  724. if ((pos > last_pos) || (price < opt[pos].price)) {
  725. DEBUGLOG(7, "rPos:%u => new better price (%u<%u)",
  726. pos, price, opt[pos].price);
  727. while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; }
  728. opt[pos].mlen = mlen;
  729. opt[pos].off = offset;
  730. opt[pos].litlen = litlen;
  731. opt[pos].price = price;
  732. memcpy(opt[pos].rep, &repHistory, sizeof(repHistory));
  733. } else {
  734. if (optLevel==0) break; /* gets ~+10% speed for about -0.01 ratio loss */
  735. }
  736. } } }
  737. } /* for (cur = 1; cur <= last_pos; cur++) */
  738. best_mlen = opt[last_pos].mlen;
  739. best_off = opt[last_pos].off;
  740. cur = last_pos - best_mlen;
  741. _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
  742. assert(opt[0].mlen == 1);
  743. /* reverse traversal */
  744. DEBUGLOG(7, "start reverse traversal (last_pos:%u, cur:%u)",
  745. last_pos, cur);
  746. { U32 selectedMatchLength = best_mlen;
  747. U32 selectedOffset = best_off;
  748. U32 pos = cur;
  749. while (1) {
  750. U32 const mlen = opt[pos].mlen;
  751. U32 const off = opt[pos].off;
  752. opt[pos].mlen = selectedMatchLength;
  753. opt[pos].off = selectedOffset;
  754. selectedMatchLength = mlen;
  755. selectedOffset = off;
  756. if (mlen > pos) break;
  757. pos -= mlen;
  758. } }
  759. /* save sequences */
  760. { U32 pos;
  761. for (pos=0; pos < last_pos; ) {
  762. U32 const llen = (U32)(ip - anchor);
  763. U32 const mlen = opt[pos].mlen;
  764. U32 const offset = opt[pos].off;
  765. if (mlen == 1) { ip++; pos++; continue; } /* literal position => move on */
  766. pos += mlen; ip += mlen;
  767. /* repcodes update : like ZSTD_updateRep(), but update in place */
  768. if (offset >= ZSTD_REP_NUM) { /* full offset */
  769. rep[2] = rep[1];
  770. rep[1] = rep[0];
  771. rep[0] = offset - ZSTD_REP_MOVE;
  772. } else { /* repcode */
  773. U32 const repCode = offset + (llen==0);
  774. if (repCode) { /* note : if repCode==0, no change */
  775. U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
  776. if (repCode >= 2) rep[2] = rep[1];
  777. rep[1] = rep[0];
  778. rep[0] = currentOffset;
  779. }
  780. }
  781. ZSTD_updateStats(optStatePtr, llen, anchor, offset, mlen);
  782. ZSTD_storeSeq(seqStore, llen, anchor, offset, mlen-MINMATCH);
  783. anchor = ip;
  784. } }
  785. ZSTD_setLog2Prices(optStatePtr);
  786. } /* while (ip < ilimit) */
  787. /* Return the last literals size */
  788. return iend - anchor;
  789. }
  790. size_t ZSTD_compressBlock_btopt(
  791. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  792. ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
  793. {
  794. DEBUGLOG(5, "ZSTD_compressBlock_btopt");
  795. return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 0 /*optLevel*/, 0 /*extDict*/);
  796. }
  797. size_t ZSTD_compressBlock_btultra(
  798. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  799. ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
  800. {
  801. return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 2 /*optLevel*/, 0 /*extDict*/);
  802. }
  803. size_t ZSTD_compressBlock_btopt_extDict(
  804. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  805. ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
  806. {
  807. return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 0 /*optLevel*/, 1 /*extDict*/);
  808. }
  809. size_t ZSTD_compressBlock_btultra_extDict(
  810. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  811. ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
  812. {
  813. return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 2 /*optLevel*/, 1 /*extDict*/);
  814. }