lz4hc.c 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748
  1. /*
  2. LZ4 HC - High Compression Mode of LZ4
  3. Copyright (C) 2011-2015, Yann Collet.
  4. BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
  5. Redistribution and use in source and binary forms, with or without
  6. modification, are permitted provided that the following conditions are
  7. met:
  8. * Redistributions of source code must retain the above copyright
  9. notice, this list of conditions and the following disclaimer.
  10. * Redistributions in binary form must reproduce the above
  11. copyright notice, this list of conditions and the following disclaimer
  12. in the documentation and/or other materials provided with the
  13. distribution.
  14. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  15. "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  16. LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  17. A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  18. OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  19. SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  20. LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  21. DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  22. THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  23. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  24. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. You can contact the author at :
  26. - LZ4 source repository : https://github.com/Cyan4973/lz4
  27. - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
  28. */
  29. /* *************************************
  30. * Tuning Parameter
  31. ***************************************/
  32. static const int LZ4HC_compressionLevel_default = 9;
  33. /*!
  34. * HEAPMODE :
  35. * Select how default compression function will allocate workplace memory,
  36. * in stack (0:fastest), or in heap (1:requires malloc()).
  37. * Since workplace is rather large, heap mode is recommended.
  38. */
  39. #define LZ4HC_HEAPMODE 0
  40. /* *************************************
  41. * Includes
  42. ***************************************/
  43. #include "lz4hc.h"
  44. /* *************************************
  45. * Local Compiler Options
  46. ***************************************/
  47. #if defined(__GNUC__)
  48. # pragma GCC diagnostic ignored "-Wunused-function"
  49. #endif
  50. #if defined (__clang__)
  51. # pragma clang diagnostic ignored "-Wunused-function"
  52. #endif
  53. /* *************************************
  54. * Common LZ4 definition
  55. ***************************************/
  56. #define LZ4_COMMONDEFS_ONLY
  57. #include "lz4.c"
  58. /* *************************************
  59. * Local Constants
  60. ***************************************/
  61. #define DICTIONARY_LOGSIZE 16
  62. #define MAXD (1<<DICTIONARY_LOGSIZE)
  63. #define MAXD_MASK (MAXD - 1)
  64. #define HASH_LOG (DICTIONARY_LOGSIZE-1)
  65. #define HASHTABLESIZE (1 << HASH_LOG)
  66. #define HASH_MASK (HASHTABLESIZE - 1)
  67. #define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH)
  68. static const int g_maxCompressionLevel = 16;
  69. /**************************************
  70. * Local Types
  71. **************************************/
  72. typedef struct
  73. {
  74. U32 hashTable[HASHTABLESIZE];
  75. U16 chainTable[MAXD];
  76. const BYTE* end; /* next block here to continue on current prefix */
  77. const BYTE* base; /* All index relative to this position */
  78. const BYTE* dictBase; /* alternate base for extDict */
  79. BYTE* inputBuffer; /* deprecated */
  80. U32 dictLimit; /* below that point, need extDict */
  81. U32 lowLimit; /* below that point, no more dict */
  82. U32 nextToUpdate; /* index from which to continue dictionary update */
  83. U32 compressionLevel;
  84. } LZ4HC_Data_Structure;
  85. /**************************************
  86. * Local Macros
  87. **************************************/
  88. #define HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8)-HASH_LOG))
  89. //#define DELTANEXTU16(p) chainTable[(p) & MAXD_MASK] /* flexible, MAXD dependent */
  90. #define DELTANEXTU16(p) chainTable[(U16)(p)] /* faster */
  91. static U32 LZ4HC_hashPtr(const void* ptr) { return HASH_FUNCTION(LZ4_read32(ptr)); }
  92. /**************************************
  93. * HC Compression
  94. **************************************/
  95. static void LZ4HC_init (LZ4HC_Data_Structure* hc4, const BYTE* start)
  96. {
  97. MEM_INIT((void*)hc4->hashTable, 0, sizeof(hc4->hashTable));
  98. MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable));
  99. hc4->nextToUpdate = 64 KB;
  100. hc4->base = start - 64 KB;
  101. hc4->end = start;
  102. hc4->dictBase = start - 64 KB;
  103. hc4->dictLimit = 64 KB;
  104. hc4->lowLimit = 64 KB;
  105. }
  106. /* Update chains up to ip (excluded) */
  107. FORCE_INLINE void LZ4HC_Insert (LZ4HC_Data_Structure* hc4, const BYTE* ip)
  108. {
  109. U16* chainTable = hc4->chainTable;
  110. U32* HashTable = hc4->hashTable;
  111. const BYTE* const base = hc4->base;
  112. const U32 target = (U32)(ip - base);
  113. U32 idx = hc4->nextToUpdate;
  114. while(idx < target)
  115. {
  116. U32 h = LZ4HC_hashPtr(base+idx);
  117. size_t delta = idx - HashTable[h];
  118. if (delta>MAX_DISTANCE) delta = MAX_DISTANCE;
  119. DELTANEXTU16(idx) = (U16)delta;
  120. HashTable[h] = idx;
  121. idx++;
  122. }
  123. hc4->nextToUpdate = target;
  124. }
  125. FORCE_INLINE int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4, /* Index table will be updated */
  126. const BYTE* ip, const BYTE* const iLimit,
  127. const BYTE** matchpos,
  128. const int maxNbAttempts)
  129. {
  130. U16* const chainTable = hc4->chainTable;
  131. U32* const HashTable = hc4->hashTable;
  132. const BYTE* const base = hc4->base;
  133. const BYTE* const dictBase = hc4->dictBase;
  134. const U32 dictLimit = hc4->dictLimit;
  135. const U32 lowLimit = (hc4->lowLimit + 64 KB > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - (64 KB - 1);
  136. U32 matchIndex;
  137. const BYTE* match;
  138. int nbAttempts=maxNbAttempts;
  139. size_t ml=0;
  140. /* HC4 match finder */
  141. LZ4HC_Insert(hc4, ip);
  142. matchIndex = HashTable[LZ4HC_hashPtr(ip)];
  143. while ((matchIndex>=lowLimit) && (nbAttempts))
  144. {
  145. nbAttempts--;
  146. if (matchIndex >= dictLimit)
  147. {
  148. match = base + matchIndex;
  149. if (*(match+ml) == *(ip+ml)
  150. && (LZ4_read32(match) == LZ4_read32(ip)))
  151. {
  152. size_t mlt = LZ4_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH;
  153. if (mlt > ml) { ml = mlt; *matchpos = match; }
  154. }
  155. }
  156. else
  157. {
  158. match = dictBase + matchIndex;
  159. if (LZ4_read32(match) == LZ4_read32(ip))
  160. {
  161. size_t mlt;
  162. const BYTE* vLimit = ip + (dictLimit - matchIndex);
  163. if (vLimit > iLimit) vLimit = iLimit;
  164. mlt = LZ4_count(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH;
  165. if ((ip+mlt == vLimit) && (vLimit < iLimit))
  166. mlt += LZ4_count(ip+mlt, base+dictLimit, iLimit);
  167. if (mlt > ml) { ml = mlt; *matchpos = base + matchIndex; } /* virtual matchpos */
  168. }
  169. }
  170. matchIndex -= DELTANEXTU16(matchIndex);
  171. }
  172. return (int)ml;
  173. }
  174. FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch (
  175. LZ4HC_Data_Structure* hc4,
  176. const BYTE* const ip,
  177. const BYTE* const iLowLimit,
  178. const BYTE* const iHighLimit,
  179. int longest,
  180. const BYTE** matchpos,
  181. const BYTE** startpos,
  182. const int maxNbAttempts)
  183. {
  184. U16* const chainTable = hc4->chainTable;
  185. U32* const HashTable = hc4->hashTable;
  186. const BYTE* const base = hc4->base;
  187. const U32 dictLimit = hc4->dictLimit;
  188. const BYTE* const lowPrefixPtr = base + dictLimit;
  189. const U32 lowLimit = (hc4->lowLimit + 64 KB > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - (64 KB - 1);
  190. const BYTE* const dictBase = hc4->dictBase;
  191. U32 matchIndex;
  192. int nbAttempts = maxNbAttempts;
  193. int delta = (int)(ip-iLowLimit);
  194. /* First Match */
  195. LZ4HC_Insert(hc4, ip);
  196. matchIndex = HashTable[LZ4HC_hashPtr(ip)];
  197. while ((matchIndex>=lowLimit) && (nbAttempts))
  198. {
  199. nbAttempts--;
  200. if (matchIndex >= dictLimit)
  201. {
  202. const BYTE* matchPtr = base + matchIndex;
  203. if (*(iLowLimit + longest) == *(matchPtr - delta + longest))
  204. if (LZ4_read32(matchPtr) == LZ4_read32(ip))
  205. {
  206. int mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit);
  207. int back = 0;
  208. while ((ip+back>iLowLimit)
  209. && (matchPtr+back > lowPrefixPtr)
  210. && (ip[back-1] == matchPtr[back-1]))
  211. back--;
  212. mlt -= back;
  213. if (mlt > longest)
  214. {
  215. longest = (int)mlt;
  216. *matchpos = matchPtr+back;
  217. *startpos = ip+back;
  218. }
  219. }
  220. }
  221. else
  222. {
  223. const BYTE* matchPtr = dictBase + matchIndex;
  224. if (LZ4_read32(matchPtr) == LZ4_read32(ip))
  225. {
  226. size_t mlt;
  227. int back=0;
  228. const BYTE* vLimit = ip + (dictLimit - matchIndex);
  229. if (vLimit > iHighLimit) vLimit = iHighLimit;
  230. mlt = LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH;
  231. if ((ip+mlt == vLimit) && (vLimit < iHighLimit))
  232. mlt += LZ4_count(ip+mlt, base+dictLimit, iHighLimit);
  233. while ((ip+back > iLowLimit) && (matchIndex+back > lowLimit) && (ip[back-1] == matchPtr[back-1])) back--;
  234. mlt -= back;
  235. if ((int)mlt > longest) { longest = (int)mlt; *matchpos = base + matchIndex + back; *startpos = ip+back; }
  236. }
  237. }
  238. matchIndex -= DELTANEXTU16(matchIndex);
  239. }
  240. return longest;
  241. }
  242. typedef enum { noLimit = 0, limitedOutput = 1 } limitedOutput_directive;
  243. #define LZ4HC_DEBUG 0
  244. #if LZ4HC_DEBUG
  245. static unsigned debug = 0;
  246. #endif
  247. FORCE_INLINE int LZ4HC_encodeSequence (
  248. const BYTE** ip,
  249. BYTE** op,
  250. const BYTE** anchor,
  251. int matchLength,
  252. const BYTE* const match,
  253. limitedOutput_directive limitedOutputBuffer,
  254. BYTE* oend)
  255. {
  256. int length;
  257. BYTE* token;
  258. #if LZ4HC_DEBUG
  259. if (debug) printf("literal : %u -- match : %u -- offset : %u\n", (U32)(*ip - *anchor), (U32)matchLength, (U32)(*ip-match));
  260. #endif
  261. /* Encode Literal length */
  262. length = (int)(*ip - *anchor);
  263. token = (*op)++;
  264. if ((limitedOutputBuffer) && ((*op + (length>>8) + length + (2 + 1 + LASTLITERALS)) > oend)) return 1; /* Check output limit */
  265. if (length>=(int)RUN_MASK) { int len; *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *(*op)++ = 255; *(*op)++ = (BYTE)len; }
  266. else *token = (BYTE)(length<<ML_BITS);
  267. /* Copy Literals */
  268. LZ4_wildCopy(*op, *anchor, (*op) + length);
  269. *op += length;
  270. /* Encode Offset */
  271. LZ4_writeLE16(*op, (U16)(*ip-match)); *op += 2;
  272. /* Encode MatchLength */
  273. length = (int)(matchLength-MINMATCH);
  274. if ((limitedOutputBuffer) && (*op + (length>>8) + (1 + LASTLITERALS) > oend)) return 1; /* Check output limit */
  275. if (length>=(int)ML_MASK) { *token+=ML_MASK; length-=ML_MASK; for(; length > 509 ; length-=510) { *(*op)++ = 255; *(*op)++ = 255; } if (length > 254) { length-=255; *(*op)++ = 255; } *(*op)++ = (BYTE)length; }
  276. else *token += (BYTE)(length);
  277. /* Prepare next loop */
  278. *ip += matchLength;
  279. *anchor = *ip;
  280. return 0;
  281. }
  282. static int LZ4HC_compress_generic (
  283. void* ctxvoid,
  284. const char* source,
  285. char* dest,
  286. int inputSize,
  287. int maxOutputSize,
  288. int compressionLevel,
  289. limitedOutput_directive limit
  290. )
  291. {
  292. LZ4HC_Data_Structure* ctx = (LZ4HC_Data_Structure*) ctxvoid;
  293. const BYTE* ip = (const BYTE*) source;
  294. const BYTE* anchor = ip;
  295. const BYTE* const iend = ip + inputSize;
  296. const BYTE* const mflimit = iend - MFLIMIT;
  297. const BYTE* const matchlimit = (iend - LASTLITERALS);
  298. BYTE* op = (BYTE*) dest;
  299. BYTE* const oend = op + maxOutputSize;
  300. unsigned maxNbAttempts;
  301. int ml, ml2, ml3, ml0;
  302. const BYTE* ref=NULL;
  303. const BYTE* start2=NULL;
  304. const BYTE* ref2=NULL;
  305. const BYTE* start3=NULL;
  306. const BYTE* ref3=NULL;
  307. const BYTE* start0;
  308. const BYTE* ref0;
  309. /* init */
  310. if (compressionLevel > g_maxCompressionLevel) compressionLevel = g_maxCompressionLevel;
  311. if (compressionLevel < 1) compressionLevel = LZ4HC_compressionLevel_default;
  312. maxNbAttempts = 1 << (compressionLevel-1);
  313. ctx->end += inputSize;
  314. ip++;
  315. /* Main Loop */
  316. while (ip < mflimit)
  317. {
  318. ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, (&ref), maxNbAttempts);
  319. if (!ml) { ip++; continue; }
  320. /* saved, in case we would skip too much */
  321. start0 = ip;
  322. ref0 = ref;
  323. ml0 = ml;
  324. _Search2:
  325. if (ip+ml < mflimit)
  326. ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 1, matchlimit, ml, &ref2, &start2, maxNbAttempts);
  327. else ml2 = ml;
  328. if (ml2 == ml) /* No better match */
  329. {
  330. if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
  331. continue;
  332. }
  333. if (start0 < ip)
  334. {
  335. if (start2 < ip + ml0) /* empirical */
  336. {
  337. ip = start0;
  338. ref = ref0;
  339. ml = ml0;
  340. }
  341. }
  342. /* Here, start0==ip */
  343. if ((start2 - ip) < 3) /* First Match too small : removed */
  344. {
  345. ml = ml2;
  346. ip = start2;
  347. ref =ref2;
  348. goto _Search2;
  349. }
  350. _Search3:
  351. /*
  352. * Currently we have :
  353. * ml2 > ml1, and
  354. * ip1+3 <= ip2 (usually < ip1+ml1)
  355. */
  356. if ((start2 - ip) < OPTIMAL_ML)
  357. {
  358. int correction;
  359. int new_ml = ml;
  360. if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML;
  361. if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
  362. correction = new_ml - (int)(start2 - ip);
  363. if (correction > 0)
  364. {
  365. start2 += correction;
  366. ref2 += correction;
  367. ml2 -= correction;
  368. }
  369. }
  370. /* Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) */
  371. if (start2 + ml2 < mflimit)
  372. ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3, maxNbAttempts);
  373. else ml3 = ml2;
  374. if (ml3 == ml2) /* No better match : 2 sequences to encode */
  375. {
  376. /* ip & ref are known; Now for ml */
  377. if (start2 < ip+ml) ml = (int)(start2 - ip);
  378. /* Now, encode 2 sequences */
  379. if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
  380. ip = start2;
  381. if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml2, ref2, limit, oend)) return 0;
  382. continue;
  383. }
  384. if (start3 < ip+ml+3) /* Not enough space for match 2 : remove it */
  385. {
  386. if (start3 >= (ip+ml)) /* can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1 */
  387. {
  388. if (start2 < ip+ml)
  389. {
  390. int correction = (int)(ip+ml - start2);
  391. start2 += correction;
  392. ref2 += correction;
  393. ml2 -= correction;
  394. if (ml2 < MINMATCH)
  395. {
  396. start2 = start3;
  397. ref2 = ref3;
  398. ml2 = ml3;
  399. }
  400. }
  401. if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
  402. ip = start3;
  403. ref = ref3;
  404. ml = ml3;
  405. start0 = start2;
  406. ref0 = ref2;
  407. ml0 = ml2;
  408. goto _Search2;
  409. }
  410. start2 = start3;
  411. ref2 = ref3;
  412. ml2 = ml3;
  413. goto _Search3;
  414. }
  415. /*
  416. * OK, now we have 3 ascending matches; let's write at least the first one
  417. * ip & ref are known; Now for ml
  418. */
  419. if (start2 < ip+ml)
  420. {
  421. if ((start2 - ip) < (int)ML_MASK)
  422. {
  423. int correction;
  424. if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
  425. if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
  426. correction = ml - (int)(start2 - ip);
  427. if (correction > 0)
  428. {
  429. start2 += correction;
  430. ref2 += correction;
  431. ml2 -= correction;
  432. }
  433. }
  434. else
  435. {
  436. ml = (int)(start2 - ip);
  437. }
  438. }
  439. if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
  440. ip = start2;
  441. ref = ref2;
  442. ml = ml2;
  443. start2 = start3;
  444. ref2 = ref3;
  445. ml2 = ml3;
  446. goto _Search3;
  447. }
  448. /* Encode Last Literals */
  449. {
  450. int lastRun = (int)(iend - anchor);
  451. if ((limit) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) return 0; /* Check output limit */
  452. if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
  453. else *op++ = (BYTE)(lastRun<<ML_BITS);
  454. memcpy(op, anchor, iend - anchor);
  455. op += iend-anchor;
  456. }
  457. /* End */
  458. return (int) (((char*)op)-dest);
  459. }
  460. int LZ4_sizeofStateHC(void) { return sizeof(LZ4HC_Data_Structure); }
  461. int LZ4_compress_HC_extStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel)
  462. {
  463. if (((size_t)(state)&(sizeof(void*)-1)) != 0) return 0; /* Error : state is not aligned for pointers (32 or 64 bits) */
  464. LZ4HC_init ((LZ4HC_Data_Structure*)state, (const BYTE*)src);
  465. if (maxDstSize < LZ4_compressBound(srcSize))
  466. return LZ4HC_compress_generic (state, src, dst, srcSize, maxDstSize, compressionLevel, limitedOutput);
  467. else
  468. return LZ4HC_compress_generic (state, src, dst, srcSize, maxDstSize, compressionLevel, noLimit);
  469. }
  470. int LZ4_compress_HC(const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel)
  471. {
  472. #if LZ4HC_HEAPMODE==1
  473. LZ4HC_Data_Structure* statePtr = malloc(sizeof(LZ4HC_Data_Structure));
  474. #else
  475. LZ4HC_Data_Structure state;
  476. LZ4HC_Data_Structure* const statePtr = &state;
  477. #endif
  478. int cSize = LZ4_compress_HC_extStateHC(statePtr, src, dst, srcSize, maxDstSize, compressionLevel);
  479. #if LZ4HC_HEAPMODE==1
  480. free(statePtr);
  481. #endif
  482. return cSize;
  483. }
  484. /**************************************
  485. * Streaming Functions
  486. **************************************/
  487. /* allocation */
  488. LZ4_streamHC_t* LZ4_createStreamHC(void) { return (LZ4_streamHC_t*)malloc(sizeof(LZ4_streamHC_t)); }
  489. int LZ4_freeStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr) { free(LZ4_streamHCPtr); return 0; }
  490. /* initialization */
  491. void LZ4_resetStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel)
  492. {
  493. LZ4_STATIC_ASSERT(sizeof(LZ4HC_Data_Structure) <= sizeof(LZ4_streamHC_t)); /* if compilation fails here, LZ4_STREAMHCSIZE must be increased */
  494. ((LZ4HC_Data_Structure*)LZ4_streamHCPtr)->base = NULL;
  495. ((LZ4HC_Data_Structure*)LZ4_streamHCPtr)->compressionLevel = (unsigned)compressionLevel;
  496. }
  497. int LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, const char* dictionary, int dictSize)
  498. {
  499. LZ4HC_Data_Structure* ctxPtr = (LZ4HC_Data_Structure*) LZ4_streamHCPtr;
  500. if (dictSize > 64 KB)
  501. {
  502. dictionary += dictSize - 64 KB;
  503. dictSize = 64 KB;
  504. }
  505. LZ4HC_init (ctxPtr, (const BYTE*)dictionary);
  506. if (dictSize >= 4) LZ4HC_Insert (ctxPtr, (const BYTE*)dictionary +(dictSize-3));
  507. ctxPtr->end = (const BYTE*)dictionary + dictSize;
  508. return dictSize;
  509. }
  510. /* compression */
  511. static void LZ4HC_setExternalDict(LZ4HC_Data_Structure* ctxPtr, const BYTE* newBlock)
  512. {
  513. if (ctxPtr->end >= ctxPtr->base + 4)
  514. LZ4HC_Insert (ctxPtr, ctxPtr->end-3); /* Referencing remaining dictionary content */
  515. /* Only one memory segment for extDict, so any previous extDict is lost at this stage */
  516. ctxPtr->lowLimit = ctxPtr->dictLimit;
  517. ctxPtr->dictLimit = (U32)(ctxPtr->end - ctxPtr->base);
  518. ctxPtr->dictBase = ctxPtr->base;
  519. ctxPtr->base = newBlock - ctxPtr->dictLimit;
  520. ctxPtr->end = newBlock;
  521. ctxPtr->nextToUpdate = ctxPtr->dictLimit; /* match referencing will resume from there */
  522. }
  523. static int LZ4_compressHC_continue_generic (LZ4HC_Data_Structure* ctxPtr,
  524. const char* source, char* dest,
  525. int inputSize, int maxOutputSize, limitedOutput_directive limit)
  526. {
  527. /* auto-init if forgotten */
  528. if (ctxPtr->base == NULL)
  529. LZ4HC_init (ctxPtr, (const BYTE*) source);
  530. /* Check overflow */
  531. if ((size_t)(ctxPtr->end - ctxPtr->base) > 2 GB)
  532. {
  533. size_t dictSize = (size_t)(ctxPtr->end - ctxPtr->base) - ctxPtr->dictLimit;
  534. if (dictSize > 64 KB) dictSize = 64 KB;
  535. LZ4_loadDictHC((LZ4_streamHC_t*)ctxPtr, (const char*)(ctxPtr->end) - dictSize, (int)dictSize);
  536. }
  537. /* Check if blocks follow each other */
  538. if ((const BYTE*)source != ctxPtr->end)
  539. LZ4HC_setExternalDict(ctxPtr, (const BYTE*)source);
  540. /* Check overlapping input/dictionary space */
  541. {
  542. const BYTE* sourceEnd = (const BYTE*) source + inputSize;
  543. const BYTE* dictBegin = ctxPtr->dictBase + ctxPtr->lowLimit;
  544. const BYTE* dictEnd = ctxPtr->dictBase + ctxPtr->dictLimit;
  545. if ((sourceEnd > dictBegin) && ((const BYTE*)source < dictEnd))
  546. {
  547. if (sourceEnd > dictEnd) sourceEnd = dictEnd;
  548. ctxPtr->lowLimit = (U32)(sourceEnd - ctxPtr->dictBase);
  549. if (ctxPtr->dictLimit - ctxPtr->lowLimit < 4) ctxPtr->lowLimit = ctxPtr->dictLimit;
  550. }
  551. }
  552. return LZ4HC_compress_generic (ctxPtr, source, dest, inputSize, maxOutputSize, ctxPtr->compressionLevel, limit);
  553. }
  554. int LZ4_compress_HC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize, int maxOutputSize)
  555. {
  556. if (maxOutputSize < LZ4_compressBound(inputSize))
  557. return LZ4_compressHC_continue_generic ((LZ4HC_Data_Structure*)LZ4_streamHCPtr, source, dest, inputSize, maxOutputSize, limitedOutput);
  558. else
  559. return LZ4_compressHC_continue_generic ((LZ4HC_Data_Structure*)LZ4_streamHCPtr, source, dest, inputSize, maxOutputSize, noLimit);
  560. }
  561. /* dictionary saving */
  562. int LZ4_saveDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, char* safeBuffer, int dictSize)
  563. {
  564. LZ4HC_Data_Structure* streamPtr = (LZ4HC_Data_Structure*)LZ4_streamHCPtr;
  565. int prefixSize = (int)(streamPtr->end - (streamPtr->base + streamPtr->dictLimit));
  566. if (dictSize > 64 KB) dictSize = 64 KB;
  567. if (dictSize < 4) dictSize = 0;
  568. if (dictSize > prefixSize) dictSize = prefixSize;
  569. memmove(safeBuffer, streamPtr->end - dictSize, dictSize);
  570. {
  571. U32 endIndex = (U32)(streamPtr->end - streamPtr->base);
  572. streamPtr->end = (const BYTE*)safeBuffer + dictSize;
  573. streamPtr->base = streamPtr->end - endIndex;
  574. streamPtr->dictLimit = endIndex - dictSize;
  575. streamPtr->lowLimit = endIndex - dictSize;
  576. if (streamPtr->nextToUpdate < streamPtr->dictLimit) streamPtr->nextToUpdate = streamPtr->dictLimit;
  577. }
  578. return dictSize;
  579. }
  580. /***********************************
  581. * Deprecated Functions
  582. ***********************************/
  583. /* Deprecated compression functions */
  584. /* These functions are planned to start generate warnings by r131 approximately */
  585. int LZ4_compressHC(const char* src, char* dst, int srcSize) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), 0); }
  586. int LZ4_compressHC_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, 0); }
  587. int LZ4_compressHC2(const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); }
  588. int LZ4_compressHC2_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, cLevel); }
  589. int LZ4_compressHC_withStateHC (void* state, const char* src, char* dst, int srcSize) { return LZ4_compress_HC_extStateHC (state, src, dst, srcSize, LZ4_compressBound(srcSize), 0); }
  590. int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_extStateHC (state, src, dst, srcSize, maxDstSize, 0); }
  591. int LZ4_compressHC2_withStateHC (void* state, const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); }
  592. int LZ4_compressHC2_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, maxDstSize, cLevel); }
  593. int LZ4_compressHC_continue (LZ4_streamHC_t* ctx, const char* src, char* dst, int srcSize) { return LZ4_compress_HC_continue (ctx, src, dst, srcSize, LZ4_compressBound(srcSize)); }
  594. int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* ctx, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_continue (ctx, src, dst, srcSize, maxDstSize); }
  595. /* Deprecated streaming functions */
  596. /* These functions currently generate deprecation warnings */
  597. int LZ4_sizeofStreamStateHC(void) { return LZ4_STREAMHCSIZE; }
  598. int LZ4_resetStreamStateHC(void* state, char* inputBuffer)
  599. {
  600. if ((((size_t)state) & (sizeof(void*)-1)) != 0) return 1; /* Error : pointer is not aligned for pointer (32 or 64 bits) */
  601. LZ4HC_init((LZ4HC_Data_Structure*)state, (const BYTE*)inputBuffer);
  602. ((LZ4HC_Data_Structure*)state)->inputBuffer = (BYTE*)inputBuffer;
  603. return 0;
  604. }
  605. void* LZ4_createHC (char* inputBuffer)
  606. {
  607. void* hc4 = ALLOCATOR(1, sizeof(LZ4HC_Data_Structure));
  608. if (hc4 == NULL) return NULL; /* not enough memory */
  609. LZ4HC_init ((LZ4HC_Data_Structure*)hc4, (const BYTE*)inputBuffer);
  610. ((LZ4HC_Data_Structure*)hc4)->inputBuffer = (BYTE*)inputBuffer;
  611. return hc4;
  612. }
  613. int LZ4_freeHC (void* LZ4HC_Data)
  614. {
  615. FREEMEM(LZ4HC_Data);
  616. return (0);
  617. }
  618. int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int compressionLevel)
  619. {
  620. return LZ4HC_compress_generic (LZ4HC_Data, source, dest, inputSize, 0, compressionLevel, noLimit);
  621. }
  622. int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel)
  623. {
  624. return LZ4HC_compress_generic (LZ4HC_Data, source, dest, inputSize, maxOutputSize, compressionLevel, limitedOutput);
  625. }
  626. char* LZ4_slideInputBufferHC(void* LZ4HC_Data)
  627. {
  628. LZ4HC_Data_Structure* hc4 = (LZ4HC_Data_Structure*)LZ4HC_Data;
  629. int dictSize = LZ4_saveDictHC((LZ4_streamHC_t*)LZ4HC_Data, (char*)(hc4->inputBuffer), 64 KB);
  630. return (char*)(hc4->inputBuffer + dictSize);
  631. }