zstd_ldm.c 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653
  1. /*
  2. * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  3. * All rights reserved.
  4. *
  5. * This source code is licensed under both the BSD-style license (found in the
  6. * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  7. * in the COPYING file in the root directory of this source tree).
  8. */
  9. #include "zstd_ldm.h"
  10. #include "zstd_fast.h" /* ZSTD_fillHashTable() */
  11. #include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */
  12. #define LDM_BUCKET_SIZE_LOG 3
  13. #define LDM_MIN_MATCH_LENGTH 64
  14. #define LDM_HASH_RLOG 7
  15. #define LDM_HASH_CHAR_OFFSET 10
  16. void ZSTD_ldm_adjustParameters(ldmParams_t* params,
  17. ZSTD_compressionParameters const* cParams)
  18. {
  19. U32 const windowLog = cParams->windowLog;
  20. ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX);
  21. DEBUGLOG(4, "ZSTD_ldm_adjustParameters");
  22. if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
  23. if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH;
  24. if (cParams->strategy >= ZSTD_btopt) {
  25. /* Get out of the way of the optimal parser */
  26. U32 const minMatch = MAX(cParams->targetLength, params->minMatchLength);
  27. assert(minMatch >= ZSTD_LDM_MINMATCH_MIN);
  28. assert(minMatch <= ZSTD_LDM_MINMATCH_MAX);
  29. params->minMatchLength = minMatch;
  30. }
  31. if (params->hashLog == 0) {
  32. params->hashLog = MAX(ZSTD_HASHLOG_MIN, windowLog - LDM_HASH_RLOG);
  33. assert(params->hashLog <= ZSTD_HASHLOG_MAX);
  34. }
  35. if (params->hashEveryLog == 0) {
  36. params->hashEveryLog =
  37. windowLog < params->hashLog ? 0 : windowLog - params->hashLog;
  38. }
  39. params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog);
  40. }
  41. size_t ZSTD_ldm_getTableSize(ldmParams_t params)
  42. {
  43. size_t const ldmHSize = ((size_t)1) << params.hashLog;
  44. size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog);
  45. size_t const ldmBucketSize =
  46. ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
  47. size_t const totalSize = ldmBucketSize + ldmHSize * sizeof(ldmEntry_t);
  48. return params.enableLdm ? totalSize : 0;
  49. }
  50. size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize)
  51. {
  52. return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0;
  53. }
  54. /** ZSTD_ldm_getSmallHash() :
  55. * numBits should be <= 32
  56. * If numBits==0, returns 0.
  57. * @return : the most significant numBits of value. */
  58. static U32 ZSTD_ldm_getSmallHash(U64 value, U32 numBits)
  59. {
  60. assert(numBits <= 32);
  61. return numBits == 0 ? 0 : (U32)(value >> (64 - numBits));
  62. }
  63. /** ZSTD_ldm_getChecksum() :
  64. * numBitsToDiscard should be <= 32
  65. * @return : the next most significant 32 bits after numBitsToDiscard */
  66. static U32 ZSTD_ldm_getChecksum(U64 hash, U32 numBitsToDiscard)
  67. {
  68. assert(numBitsToDiscard <= 32);
  69. return (hash >> (64 - 32 - numBitsToDiscard)) & 0xFFFFFFFF;
  70. }
  71. /** ZSTD_ldm_getTag() ;
  72. * Given the hash, returns the most significant numTagBits bits
  73. * after (32 + hbits) bits.
  74. *
  75. * If there are not enough bits remaining, return the last
  76. * numTagBits bits. */
  77. static U32 ZSTD_ldm_getTag(U64 hash, U32 hbits, U32 numTagBits)
  78. {
  79. assert(numTagBits < 32 && hbits <= 32);
  80. if (32 - hbits < numTagBits) {
  81. return hash & (((U32)1 << numTagBits) - 1);
  82. } else {
  83. return (hash >> (32 - hbits - numTagBits)) & (((U32)1 << numTagBits) - 1);
  84. }
  85. }
  86. /** ZSTD_ldm_getBucket() :
  87. * Returns a pointer to the start of the bucket associated with hash. */
  88. static ldmEntry_t* ZSTD_ldm_getBucket(
  89. ldmState_t* ldmState, size_t hash, ldmParams_t const ldmParams)
  90. {
  91. return ldmState->hashTable + (hash << ldmParams.bucketSizeLog);
  92. }
  93. /** ZSTD_ldm_insertEntry() :
  94. * Insert the entry with corresponding hash into the hash table */
  95. static void ZSTD_ldm_insertEntry(ldmState_t* ldmState,
  96. size_t const hash, const ldmEntry_t entry,
  97. ldmParams_t const ldmParams)
  98. {
  99. BYTE* const bucketOffsets = ldmState->bucketOffsets;
  100. *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + bucketOffsets[hash]) = entry;
  101. bucketOffsets[hash]++;
  102. bucketOffsets[hash] &= ((U32)1 << ldmParams.bucketSizeLog) - 1;
  103. }
  104. /** ZSTD_ldm_makeEntryAndInsertByTag() :
  105. *
  106. * Gets the small hash, checksum, and tag from the rollingHash.
  107. *
  108. * If the tag matches (1 << ldmParams.hashEveryLog)-1, then
  109. * creates an ldmEntry from the offset, and inserts it into the hash table.
  110. *
  111. * hBits is the length of the small hash, which is the most significant hBits
  112. * of rollingHash. The checksum is the next 32 most significant bits, followed
  113. * by ldmParams.hashEveryLog bits that make up the tag. */
  114. static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState,
  115. U64 const rollingHash,
  116. U32 const hBits,
  117. U32 const offset,
  118. ldmParams_t const ldmParams)
  119. {
  120. U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog);
  121. U32 const tagMask = ((U32)1 << ldmParams.hashEveryLog) - 1;
  122. if (tag == tagMask) {
  123. U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits);
  124. U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
  125. ldmEntry_t entry;
  126. entry.offset = offset;
  127. entry.checksum = checksum;
  128. ZSTD_ldm_insertEntry(ldmState, hash, entry, ldmParams);
  129. }
  130. }
  131. /** ZSTD_ldm_getRollingHash() :
  132. * Get a 64-bit hash using the first len bytes from buf.
  133. *
  134. * Giving bytes s = s_1, s_2, ... s_k, the hash is defined to be
  135. * H(s) = s_1*(a^(k-1)) + s_2*(a^(k-2)) + ... + s_k*(a^0)
  136. *
  137. * where the constant a is defined to be prime8bytes.
  138. *
  139. * The implementation adds an offset to each byte, so
  140. * H(s) = (s_1 + HASH_CHAR_OFFSET)*(a^(k-1)) + ... */
  141. static U64 ZSTD_ldm_getRollingHash(const BYTE* buf, U32 len)
  142. {
  143. U64 ret = 0;
  144. U32 i;
  145. for (i = 0; i < len; i++) {
  146. ret *= prime8bytes;
  147. ret += buf[i] + LDM_HASH_CHAR_OFFSET;
  148. }
  149. return ret;
  150. }
  151. /** ZSTD_ldm_ipow() :
  152. * Return base^exp. */
  153. static U64 ZSTD_ldm_ipow(U64 base, U64 exp)
  154. {
  155. U64 ret = 1;
  156. while (exp) {
  157. if (exp & 1) { ret *= base; }
  158. exp >>= 1;
  159. base *= base;
  160. }
  161. return ret;
  162. }
  163. U64 ZSTD_ldm_getHashPower(U32 minMatchLength) {
  164. DEBUGLOG(4, "ZSTD_ldm_getHashPower: mml=%u", minMatchLength);
  165. assert(minMatchLength >= ZSTD_LDM_MINMATCH_MIN);
  166. return ZSTD_ldm_ipow(prime8bytes, minMatchLength - 1);
  167. }
  168. /** ZSTD_ldm_updateHash() :
  169. * Updates hash by removing toRemove and adding toAdd. */
  170. static U64 ZSTD_ldm_updateHash(U64 hash, BYTE toRemove, BYTE toAdd, U64 hashPower)
  171. {
  172. hash -= ((toRemove + LDM_HASH_CHAR_OFFSET) * hashPower);
  173. hash *= prime8bytes;
  174. hash += toAdd + LDM_HASH_CHAR_OFFSET;
  175. return hash;
  176. }
  177. /** ZSTD_ldm_countBackwardsMatch() :
  178. * Returns the number of bytes that match backwards before pIn and pMatch.
  179. *
  180. * We count only bytes where pMatch >= pBase and pIn >= pAnchor. */
  181. static size_t ZSTD_ldm_countBackwardsMatch(
  182. const BYTE* pIn, const BYTE* pAnchor,
  183. const BYTE* pMatch, const BYTE* pBase)
  184. {
  185. size_t matchLength = 0;
  186. while (pIn > pAnchor && pMatch > pBase && pIn[-1] == pMatch[-1]) {
  187. pIn--;
  188. pMatch--;
  189. matchLength++;
  190. }
  191. return matchLength;
  192. }
  193. /** ZSTD_ldm_fillFastTables() :
  194. *
  195. * Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies.
  196. * This is similar to ZSTD_loadDictionaryContent.
  197. *
  198. * The tables for the other strategies are filled within their
  199. * block compressors. */
  200. static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
  201. ZSTD_compressionParameters const* cParams,
  202. void const* end)
  203. {
  204. const BYTE* const iend = (const BYTE*)end;
  205. switch(cParams->strategy)
  206. {
  207. case ZSTD_fast:
  208. ZSTD_fillHashTable(ms, cParams, iend);
  209. ms->nextToUpdate = (U32)(iend - ms->window.base);
  210. break;
  211. case ZSTD_dfast:
  212. ZSTD_fillDoubleHashTable(ms, cParams, iend);
  213. ms->nextToUpdate = (U32)(iend - ms->window.base);
  214. break;
  215. case ZSTD_greedy:
  216. case ZSTD_lazy:
  217. case ZSTD_lazy2:
  218. case ZSTD_btlazy2:
  219. case ZSTD_btopt:
  220. case ZSTD_btultra:
  221. break;
  222. default:
  223. assert(0); /* not possible : not a valid strategy id */
  224. }
  225. return 0;
  226. }
  227. /** ZSTD_ldm_fillLdmHashTable() :
  228. *
  229. * Fills hashTable from (lastHashed + 1) to iend (non-inclusive).
  230. * lastHash is the rolling hash that corresponds to lastHashed.
  231. *
  232. * Returns the rolling hash corresponding to position iend-1. */
  233. static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state,
  234. U64 lastHash, const BYTE* lastHashed,
  235. const BYTE* iend, const BYTE* base,
  236. U32 hBits, ldmParams_t const ldmParams)
  237. {
  238. U64 rollingHash = lastHash;
  239. const BYTE* cur = lastHashed + 1;
  240. while (cur < iend) {
  241. rollingHash = ZSTD_ldm_updateHash(rollingHash, cur[-1],
  242. cur[ldmParams.minMatchLength-1],
  243. state->hashPower);
  244. ZSTD_ldm_makeEntryAndInsertByTag(state,
  245. rollingHash, hBits,
  246. (U32)(cur - base), ldmParams);
  247. ++cur;
  248. }
  249. return rollingHash;
  250. }
  251. /** ZSTD_ldm_limitTableUpdate() :
  252. *
  253. * Sets cctx->nextToUpdate to a position corresponding closer to anchor
  254. * if it is far way
  255. * (after a long match, only update tables a limited amount). */
  256. static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor)
  257. {
  258. U32 const current = (U32)(anchor - ms->window.base);
  259. if (current > ms->nextToUpdate + 1024) {
  260. ms->nextToUpdate =
  261. current - MIN(512, current - ms->nextToUpdate - 1024);
  262. }
  263. }
  264. static size_t ZSTD_ldm_generateSequences_internal(
  265. ldmState_t* ldmState, rawSeqStore_t* rawSeqStore,
  266. ldmParams_t const* params, void const* src, size_t srcSize)
  267. {
  268. /* LDM parameters */
  269. int const extDict = ZSTD_window_hasExtDict(ldmState->window);
  270. U32 const minMatchLength = params->minMatchLength;
  271. U64 const hashPower = ldmState->hashPower;
  272. U32 const hBits = params->hashLog - params->bucketSizeLog;
  273. U32 const ldmBucketSize = 1U << params->bucketSizeLog;
  274. U32 const hashEveryLog = params->hashEveryLog;
  275. U32 const ldmTagMask = (1U << params->hashEveryLog) - 1;
  276. /* Prefix and extDict parameters */
  277. U32 const dictLimit = ldmState->window.dictLimit;
  278. U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit;
  279. BYTE const* const base = ldmState->window.base;
  280. BYTE const* const dictBase = extDict ? ldmState->window.dictBase : NULL;
  281. BYTE const* const dictStart = extDict ? dictBase + lowestIndex : NULL;
  282. BYTE const* const dictEnd = extDict ? dictBase + dictLimit : NULL;
  283. BYTE const* const lowPrefixPtr = base + dictLimit;
  284. /* Input bounds */
  285. BYTE const* const istart = (BYTE const*)src;
  286. BYTE const* const iend = istart + srcSize;
  287. BYTE const* const ilimit = iend - MAX(minMatchLength, HASH_READ_SIZE);
  288. /* Input positions */
  289. BYTE const* anchor = istart;
  290. BYTE const* ip = istart;
  291. /* Rolling hash */
  292. BYTE const* lastHashed = NULL;
  293. U64 rollingHash = 0;
  294. while (ip <= ilimit) {
  295. size_t mLength;
  296. U32 const current = (U32)(ip - base);
  297. size_t forwardMatchLength = 0, backwardMatchLength = 0;
  298. ldmEntry_t* bestEntry = NULL;
  299. if (ip != istart) {
  300. rollingHash = ZSTD_ldm_updateHash(rollingHash, lastHashed[0],
  301. lastHashed[minMatchLength],
  302. hashPower);
  303. } else {
  304. rollingHash = ZSTD_ldm_getRollingHash(ip, minMatchLength);
  305. }
  306. lastHashed = ip;
  307. /* Do not insert and do not look for a match */
  308. if (ZSTD_ldm_getTag(rollingHash, hBits, hashEveryLog) != ldmTagMask) {
  309. ip++;
  310. continue;
  311. }
  312. /* Get the best entry and compute the match lengths */
  313. {
  314. ldmEntry_t* const bucket =
  315. ZSTD_ldm_getBucket(ldmState,
  316. ZSTD_ldm_getSmallHash(rollingHash, hBits),
  317. *params);
  318. ldmEntry_t* cur;
  319. size_t bestMatchLength = 0;
  320. U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
  321. for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) {
  322. size_t curForwardMatchLength, curBackwardMatchLength,
  323. curTotalMatchLength;
  324. if (cur->checksum != checksum || cur->offset <= lowestIndex) {
  325. continue;
  326. }
  327. if (extDict) {
  328. BYTE const* const curMatchBase =
  329. cur->offset < dictLimit ? dictBase : base;
  330. BYTE const* const pMatch = curMatchBase + cur->offset;
  331. BYTE const* const matchEnd =
  332. cur->offset < dictLimit ? dictEnd : iend;
  333. BYTE const* const lowMatchPtr =
  334. cur->offset < dictLimit ? dictStart : lowPrefixPtr;
  335. curForwardMatchLength = ZSTD_count_2segments(
  336. ip, pMatch, iend,
  337. matchEnd, lowPrefixPtr);
  338. if (curForwardMatchLength < minMatchLength) {
  339. continue;
  340. }
  341. curBackwardMatchLength =
  342. ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch,
  343. lowMatchPtr);
  344. curTotalMatchLength = curForwardMatchLength +
  345. curBackwardMatchLength;
  346. } else { /* !extDict */
  347. BYTE const* const pMatch = base + cur->offset;
  348. curForwardMatchLength = ZSTD_count(ip, pMatch, iend);
  349. if (curForwardMatchLength < minMatchLength) {
  350. continue;
  351. }
  352. curBackwardMatchLength =
  353. ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch,
  354. lowPrefixPtr);
  355. curTotalMatchLength = curForwardMatchLength +
  356. curBackwardMatchLength;
  357. }
  358. if (curTotalMatchLength > bestMatchLength) {
  359. bestMatchLength = curTotalMatchLength;
  360. forwardMatchLength = curForwardMatchLength;
  361. backwardMatchLength = curBackwardMatchLength;
  362. bestEntry = cur;
  363. }
  364. }
  365. }
  366. /* No match found -- continue searching */
  367. if (bestEntry == NULL) {
  368. ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash,
  369. hBits, current,
  370. *params);
  371. ip++;
  372. continue;
  373. }
  374. /* Match found */
  375. mLength = forwardMatchLength + backwardMatchLength;
  376. ip -= backwardMatchLength;
  377. {
  378. /* Store the sequence:
  379. * ip = current - backwardMatchLength
  380. * The match is at (bestEntry->offset - backwardMatchLength)
  381. */
  382. U32 const matchIndex = bestEntry->offset;
  383. U32 const offset = current - matchIndex;
  384. rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;
  385. /* Out of sequence storage */
  386. if (rawSeqStore->size == rawSeqStore->capacity)
  387. return ERROR(dstSize_tooSmall);
  388. seq->litLength = (U32)(ip - anchor);
  389. seq->matchLength = (U32)mLength;
  390. seq->offset = offset;
  391. rawSeqStore->size++;
  392. }
  393. /* Insert the current entry into the hash table */
  394. ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
  395. (U32)(lastHashed - base),
  396. *params);
  397. assert(ip + backwardMatchLength == lastHashed);
  398. /* Fill the hash table from lastHashed+1 to ip+mLength*/
  399. /* Heuristic: don't need to fill the entire table at end of block */
  400. if (ip + mLength <= ilimit) {
  401. rollingHash = ZSTD_ldm_fillLdmHashTable(
  402. ldmState, rollingHash, lastHashed,
  403. ip + mLength, base, hBits, *params);
  404. lastHashed = ip + mLength - 1;
  405. }
  406. ip += mLength;
  407. anchor = ip;
  408. }
  409. return iend - anchor;
  410. }
  411. /*! ZSTD_ldm_reduceTable() :
  412. * reduce table indexes by `reducerValue` */
  413. static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size,
  414. U32 const reducerValue)
  415. {
  416. U32 u;
  417. for (u = 0; u < size; u++) {
  418. if (table[u].offset < reducerValue) table[u].offset = 0;
  419. else table[u].offset -= reducerValue;
  420. }
  421. }
  422. size_t ZSTD_ldm_generateSequences(
  423. ldmState_t* ldmState, rawSeqStore_t* sequences,
  424. ldmParams_t const* params, void const* src, size_t srcSize)
  425. {
  426. U32 const maxDist = 1U << params->windowLog;
  427. BYTE const* const istart = (BYTE const*)src;
  428. BYTE const* const iend = istart + srcSize;
  429. size_t const kMaxChunkSize = 1 << 20;
  430. size_t const nbChunks = (srcSize / kMaxChunkSize) + ((srcSize % kMaxChunkSize) != 0);
  431. size_t chunk;
  432. size_t leftoverSize = 0;
  433. assert(ZSTD_CHUNKSIZE_MAX >= kMaxChunkSize);
  434. /* Check that ZSTD_window_update() has been called for this chunk prior
  435. * to passing it to this function.
  436. */
  437. assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize);
  438. /* The input could be very large (in zstdmt), so it must be broken up into
  439. * chunks to enforce the maximmum distance and handle overflow correction.
  440. */
  441. assert(sequences->pos <= sequences->size);
  442. assert(sequences->size <= sequences->capacity);
  443. for (chunk = 0; chunk < nbChunks && sequences->size < sequences->capacity; ++chunk) {
  444. BYTE const* const chunkStart = istart + chunk * kMaxChunkSize;
  445. size_t const remaining = (size_t)(iend - chunkStart);
  446. BYTE const *const chunkEnd =
  447. (remaining < kMaxChunkSize) ? iend : chunkStart + kMaxChunkSize;
  448. size_t const chunkSize = chunkEnd - chunkStart;
  449. size_t newLeftoverSize;
  450. size_t const prevSize = sequences->size;
  451. assert(chunkStart < iend);
  452. /* 1. Perform overflow correction if necessary. */
  453. if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) {
  454. U32 const ldmHSize = 1U << params->hashLog;
  455. U32 const correction = ZSTD_window_correctOverflow(
  456. &ldmState->window, /* cycleLog */ 0, maxDist, src);
  457. ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
  458. }
  459. /* 2. We enforce the maximum offset allowed.
  460. *
  461. * kMaxChunkSize should be small enough that we don't lose too much of
  462. * the window through early invalidation.
  463. * TODO: * Test the chunk size.
  464. * * Try invalidation after the sequence generation and test the
  465. * the offset against maxDist directly.
  466. */
  467. ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, NULL);
  468. /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
  469. newLeftoverSize = ZSTD_ldm_generateSequences_internal(
  470. ldmState, sequences, params, chunkStart, chunkSize);
  471. if (ZSTD_isError(newLeftoverSize))
  472. return newLeftoverSize;
  473. /* 4. We add the leftover literals from previous iterations to the first
  474. * newly generated sequence, or add the `newLeftoverSize` if none are
  475. * generated.
  476. */
  477. /* Prepend the leftover literals from the last call */
  478. if (prevSize < sequences->size) {
  479. sequences->seq[prevSize].litLength += (U32)leftoverSize;
  480. leftoverSize = newLeftoverSize;
  481. } else {
  482. assert(newLeftoverSize == chunkSize);
  483. leftoverSize += chunkSize;
  484. }
  485. }
  486. return 0;
  487. }
  488. void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) {
  489. while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) {
  490. rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos;
  491. if (srcSize <= seq->litLength) {
  492. /* Skip past srcSize literals */
  493. seq->litLength -= (U32)srcSize;
  494. return;
  495. }
  496. srcSize -= seq->litLength;
  497. seq->litLength = 0;
  498. if (srcSize < seq->matchLength) {
  499. /* Skip past the first srcSize of the match */
  500. seq->matchLength -= (U32)srcSize;
  501. if (seq->matchLength < minMatch) {
  502. /* The match is too short, omit it */
  503. if (rawSeqStore->pos + 1 < rawSeqStore->size) {
  504. seq[1].litLength += seq[0].matchLength;
  505. }
  506. rawSeqStore->pos++;
  507. }
  508. return;
  509. }
  510. srcSize -= seq->matchLength;
  511. seq->matchLength = 0;
  512. rawSeqStore->pos++;
  513. }
  514. }
  515. /**
  516. * If the sequence length is longer than remaining then the sequence is split
  517. * between this block and the next.
  518. *
  519. * Returns the current sequence to handle, or if the rest of the block should
  520. * be literals, it returns a sequence with offset == 0.
  521. */
  522. static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore,
  523. U32 const remaining, U32 const minMatch)
  524. {
  525. rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos];
  526. assert(sequence.offset > 0);
  527. /* Likely: No partial sequence */
  528. if (remaining >= sequence.litLength + sequence.matchLength) {
  529. rawSeqStore->pos++;
  530. return sequence;
  531. }
  532. /* Cut the sequence short (offset == 0 ==> rest is literals). */
  533. if (remaining <= sequence.litLength) {
  534. sequence.offset = 0;
  535. } else if (remaining < sequence.litLength + sequence.matchLength) {
  536. sequence.matchLength = remaining - sequence.litLength;
  537. if (sequence.matchLength < minMatch) {
  538. sequence.offset = 0;
  539. }
  540. }
  541. /* Skip past `remaining` bytes for the future sequences. */
  542. ZSTD_ldm_skipSequences(rawSeqStore, remaining, minMatch);
  543. return sequence;
  544. }
  545. size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
  546. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  547. ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize,
  548. int const extDict)
  549. {
  550. unsigned const minMatch = cParams->searchLength;
  551. ZSTD_blockCompressor const blockCompressor =
  552. ZSTD_selectBlockCompressor(cParams->strategy, extDict);
  553. BYTE const* const base = ms->window.base;
  554. /* Input bounds */
  555. BYTE const* const istart = (BYTE const*)src;
  556. BYTE const* const iend = istart + srcSize;
  557. /* Input positions */
  558. BYTE const* ip = istart;
  559. assert(rawSeqStore->pos <= rawSeqStore->size);
  560. assert(rawSeqStore->size <= rawSeqStore->capacity);
  561. /* Loop through each sequence and apply the block compressor to the lits */
  562. while (rawSeqStore->pos < rawSeqStore->size && ip < iend) {
  563. /* maybeSplitSequence updates rawSeqStore->pos */
  564. rawSeq const sequence = maybeSplitSequence(rawSeqStore,
  565. (U32)(iend - ip), minMatch);
  566. int i;
  567. /* End signal */
  568. if (sequence.offset == 0)
  569. break;
  570. assert(sequence.offset <= (1U << cParams->windowLog));
  571. assert(ip + sequence.litLength + sequence.matchLength <= iend);
  572. /* Fill tables for block compressor */
  573. ZSTD_ldm_limitTableUpdate(ms, ip);
  574. ZSTD_ldm_fillFastTables(ms, cParams, ip);
  575. /* Run the block compressor */
  576. {
  577. size_t const newLitLength =
  578. blockCompressor(ms, seqStore, rep, cParams, ip,
  579. sequence.litLength);
  580. ip += sequence.litLength;
  581. ms->nextToUpdate = (U32)(ip - base);
  582. /* Update the repcodes */
  583. for (i = ZSTD_REP_NUM - 1; i > 0; i--)
  584. rep[i] = rep[i-1];
  585. rep[0] = sequence.offset;
  586. /* Store the sequence */
  587. ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength,
  588. sequence.offset + ZSTD_REP_MOVE,
  589. sequence.matchLength - MINMATCH);
  590. ip += sequence.matchLength;
  591. }
  592. }
  593. /* Fill the tables for the block compressor */
  594. ZSTD_ldm_limitTableUpdate(ms, ip);
  595. ZSTD_ldm_fillFastTables(ms, cParams, ip);
  596. /* Compress the last literals */
  597. {
  598. size_t const lastLiterals = blockCompressor(ms, seqStore, rep, cParams,
  599. ip, iend - ip);
  600. ms->nextToUpdate = (U32)(iend - base);
  601. return lastLiterals;
  602. }
  603. }