blosclz.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507
  1. /*********************************************************************
  2. Blosc - Blocked Shuffling and Compression Library
  3. Author: Francesc Alted <francesc@blosc.org>
  4. Creation date: 2009-05-20
  5. See LICENSES/BLOSC.txt for details about copyright and rights to use.
  6. **********************************************************************/
  7. /*********************************************************************
  8. The code in this file is heavily based on FastLZ, a lightning-fast
  9. lossless compression library. See LICENSES/FASTLZ.txt for details.
  10. **********************************************************************/
  11. #include <stdlib.h>
  12. #include <stdio.h>
  13. #include <string.h>
  14. #include "blosclz.h"
  15. #if defined(_WIN32) && !defined(__MINGW32__)
  16. #include <windows.h>
  17. /* stdint.h only available in VS2010 (VC++ 16.0) and newer */
  18. #if defined(_MSC_VER) && _MSC_VER < 1600
  19. #include "win32/stdint-windows.h"
  20. #else
  21. #include <stdint.h>
  22. #endif
  23. /* llabs only available in VS2013 (VC++ 18.0) and newer */
  24. #if defined(_MSC_VER) && _MSC_VER < 1800
  25. #define llabs(v) abs(v)
  26. #endif
  27. #else
  28. #include <stdint.h>
  29. #endif /* _WIN32 */
  30. /*
  31. * Prevent accessing more than 8-bit at once, except on x86 architectures.
  32. */
  33. #if !defined(BLOSCLZ_STRICT_ALIGN)
  34. #define BLOSCLZ_STRICT_ALIGN
  35. #if defined(__i386__) || defined(__386) || defined (__amd64) /* GNU C, Sun Studio */
  36. #undef BLOSCLZ_STRICT_ALIGN
  37. #elif defined(__i486__) || defined(__i586__) || defined(__i686__) /* GNU C */
  38. #undef BLOSCLZ_STRICT_ALIGN
  39. #elif defined(_M_IX86) || defined(_M_X64) /* Intel, MSVC */
  40. #undef BLOSCLZ_STRICT_ALIGN
  41. #elif defined(__386)
  42. #undef BLOSCLZ_STRICT_ALIGN
  43. #elif defined(_X86_) /* MinGW */
  44. #undef BLOSCLZ_STRICT_ALIGN
  45. #elif defined(__I86__) /* Digital Mars */
  46. #undef BLOSCLZ_STRICT_ALIGN
  47. /* Seems like unaligned access in ARM (at least ARMv6) is pretty
  48. expensive, so we are going to always enforce strict aligment in ARM.
  49. If anybody suggest that newer ARMs are better, we can revisit this. */
  50. /* #elif defined(__ARM_FEATURE_UNALIGNED) */ /* ARM, GNU C */
  51. /* #undef BLOSCLZ_STRICT_ALIGN */
  52. #endif
  53. #endif
  54. /*
  55. * Always check for bound when decompressing.
  56. * Generally it is best to leave it defined.
  57. */
  58. #define BLOSCLZ_SAFE
  59. /*
  60. * Give hints to the compiler for branch prediction optimization.
  61. */
  62. #if defined(__GNUC__) && (__GNUC__ > 2)
  63. #define BLOSCLZ_EXPECT_CONDITIONAL(c) (__builtin_expect((c), 1))
  64. #define BLOSCLZ_UNEXPECT_CONDITIONAL(c) (__builtin_expect((c), 0))
  65. #else
  66. #define BLOSCLZ_EXPECT_CONDITIONAL(c) (c)
  67. #define BLOSCLZ_UNEXPECT_CONDITIONAL(c) (c)
  68. #endif
  69. /*
  70. * Use inlined functions for supported systems.
  71. */
  72. #if defined(_MSC_VER) && !defined(__cplusplus) /* Visual Studio */
  73. #define inline __inline /* Visual C is not C99, but supports some kind of inline */
  74. #endif
  75. #define MAX_COPY 32
  76. #define MAX_DISTANCE 8191
  77. #define MAX_FARDISTANCE (65535+MAX_DISTANCE-1)
  78. #ifdef BLOSCLZ_STRICT_ALIGN
  79. #define BLOSCLZ_READU16(p) ((p)[0] | (p)[1]<<8)
  80. #else
  81. #define BLOSCLZ_READU16(p) *((const uint16_t*)(p))
  82. #endif
  83. /*
  84. * Fast copy macros
  85. */
  86. #if defined(_WIN32)
  87. #define CPYSIZE 32
  88. #else
  89. #define CPYSIZE 8
  90. #endif
  91. #define MCPY(d,s) { memcpy(d, s, CPYSIZE); d+=CPYSIZE; s+=CPYSIZE; }
  92. #define FASTCOPY(d,s,e) { do { MCPY(d,s) } while (d<e); }
  93. #define SAFECOPY(d,s,e) { while (d<e) { MCPY(d,s) } }
  94. /* Copy optimized for copying in blocks */
  95. #define BLOCK_COPY(op, ref, len, op_limit) \
  96. { int ilen = len % CPYSIZE; \
  97. uint8_t *cpy = op + len; \
  98. if (cpy + CPYSIZE - ilen <= op_limit) { \
  99. FASTCOPY(op, ref, cpy); \
  100. ref -= (op-cpy); op = cpy; \
  101. } \
  102. else { \
  103. cpy -= ilen; \
  104. SAFECOPY(op, ref, cpy); \
  105. ref -= (op-cpy); op = cpy; \
  106. for(; ilen; --ilen) \
  107. *op++ = *ref++; \
  108. } \
  109. }
  110. #define SAFE_COPY(op, ref, len, op_limit) \
  111. if (llabs(op-ref) < CPYSIZE) { \
  112. for(; len; --len) \
  113. *op++ = *ref++; \
  114. } \
  115. else BLOCK_COPY(op, ref, len, op_limit);
  116. /* Copy optimized for GCC 4.8. Seems like long copy loops are optimal. */
  117. #define GCC_SAFE_COPY(op, ref, len, op_limit) \
  118. if ((len > 32) || (llabs(op-ref) < CPYSIZE)) { \
  119. for(; len; --len) \
  120. *op++ = *ref++; \
  121. } \
  122. else BLOCK_COPY(op, ref, len, op_limit);
  123. /* Simple, but pretty effective hash function for 3-byte sequence */
  124. #define HASH_FUNCTION(v, p, l) { \
  125. v = BLOSCLZ_READU16(p); \
  126. v ^= BLOSCLZ_READU16(p + 1) ^ ( v >> (16 - l)); \
  127. v &= (1 << l) - 1; \
  128. }
  129. /* Another version which seems to be a bit more effective than the above,
  130. * but a bit slower. Could be interesting for high opt_level.
  131. */
  132. #define MINMATCH 3
  133. #define HASH_FUNCTION2(v, p, l) { \
  134. v = BLOSCLZ_READU16(p); \
  135. v = (v * 2654435761U) >> ((MINMATCH * 8) - (l + 1)); \
  136. v &= (1 << l) - 1; \
  137. }
  138. #define LITERAL(ip, op, op_limit, anchor, copy) { \
  139. if (BLOSCLZ_UNEXPECT_CONDITIONAL(op+2 > op_limit)) \
  140. goto out; \
  141. *op++ = *anchor++; \
  142. ip = anchor; \
  143. copy++; \
  144. if(BLOSCLZ_UNEXPECT_CONDITIONAL(copy == MAX_COPY)) { \
  145. copy = 0; \
  146. *op++ = MAX_COPY-1; \
  147. } \
  148. continue; \
  149. }
  150. #define IP_BOUNDARY 2
  151. int blosclz_compress(const int opt_level, const void* input, int length,
  152. void* output, int maxout, int accel)
  153. {
  154. uint8_t* ip = (uint8_t*) input;
  155. uint8_t* ibase = (uint8_t*) input;
  156. uint8_t* ip_bound = ip + length - IP_BOUNDARY;
  157. uint8_t* ip_limit = ip + length - 12;
  158. uint8_t* op = (uint8_t*) output;
  159. /* Hash table depends on the opt level. Hash_log cannot be larger than 15. */
  160. /* The parametrization below is made from playing with the bench suite, like:
  161. $ bench/bench blosclz single 4
  162. $ bench/bench blosclz single 4 4194280 12 25
  163. and taking the minimum times on a i5-3380M @ 2.90GHz.
  164. Curiously enough, values >= 14 does not always
  165. get maximum compression, even with large blocksizes. */
  166. int8_t hash_log_[10] = {-1, 11, 11, 11, 12, 13, 13, 13, 13, 13};
  167. uint8_t hash_log = hash_log_[opt_level];
  168. uint16_t hash_size = 1 << hash_log;
  169. uint16_t *htab;
  170. uint8_t* op_limit;
  171. int32_t hval;
  172. uint8_t copy;
  173. double maxlength_[10] = {-1, .1, .15, .2, .3, .45, .6, .75, .9, 1.0};
  174. int32_t maxlength = (int32_t) (length * maxlength_[opt_level]);
  175. if (maxlength > (int32_t) maxout) {
  176. maxlength = (int32_t) maxout;
  177. }
  178. op_limit = op + maxlength;
  179. /* output buffer cannot be less than 66 bytes or we can get into trouble */
  180. if (BLOSCLZ_UNEXPECT_CONDITIONAL(maxlength < 66 || length < 4)) {
  181. return 0;
  182. }
  183. /* prepare the acceleration to be used in condition */
  184. accel = accel < 1 ? 1 : accel;
  185. accel -= 1;
  186. htab = (uint16_t *) calloc(hash_size, sizeof(uint16_t));
  187. /* we start with literal copy */
  188. copy = 2;
  189. *op++ = MAX_COPY-1;
  190. *op++ = *ip++;
  191. *op++ = *ip++;
  192. /* main loop */
  193. while(BLOSCLZ_EXPECT_CONDITIONAL(ip < ip_limit)) {
  194. const uint8_t* ref;
  195. int32_t distance;
  196. int32_t len = 3; /* minimum match length */
  197. uint8_t* anchor = ip; /* comparison starting-point */
  198. /* check for a run */
  199. if(ip[0] == ip[-1] && BLOSCLZ_READU16(ip-1)==BLOSCLZ_READU16(ip+1)) {
  200. distance = 1;
  201. ip += 3;
  202. ref = anchor - 1 + 3;
  203. goto match;
  204. }
  205. /* find potential match */
  206. HASH_FUNCTION(hval, ip, hash_log);
  207. ref = ibase + htab[hval];
  208. /* calculate distance to the match */
  209. distance = (int32_t)(anchor - ref);
  210. /* update hash table if necessary */
  211. if ((distance & accel) == 0)
  212. htab[hval] = (uint16_t)(anchor - ibase);
  213. /* is this a match? check the first 3 bytes */
  214. if (distance==0 || (distance >= MAX_FARDISTANCE) ||
  215. *ref++ != *ip++ || *ref++!=*ip++ || *ref++!=*ip++)
  216. LITERAL(ip, op, op_limit, anchor, copy);
  217. /* far, needs at least 5-byte match */
  218. if (opt_level >= 5 && distance >= MAX_DISTANCE) {
  219. if (*ip++ != *ref++ || *ip++ != *ref++)
  220. LITERAL(ip, op, op_limit, anchor, copy);
  221. len += 2;
  222. }
  223. match:
  224. /* last matched byte */
  225. ip = anchor + len;
  226. /* distance is biased */
  227. distance--;
  228. if(!distance) {
  229. /* zero distance means a run */
  230. uint8_t x = ip[-1];
  231. int64_t value, value2;
  232. /* Broadcast the value for every byte in a 64-bit register */
  233. memset(&value, x, 8);
  234. /* safe because the outer check against ip limit */
  235. while (ip < (ip_bound - (sizeof(int64_t) - IP_BOUNDARY))) {
  236. #if !defined(BLOSCLZ_STRICT_ALIGN)
  237. value2 = ((int64_t *)ref)[0];
  238. #else
  239. memcpy(&value2, ref, 8);
  240. #endif
  241. if (value != value2) {
  242. /* Find the byte that starts to differ */
  243. while (ip < ip_bound) {
  244. if (*ref++ != x) break; else ip++;
  245. }
  246. break;
  247. }
  248. else {
  249. ip += 8;
  250. ref += 8;
  251. }
  252. }
  253. if (ip > ip_bound) {
  254. long l = (long)(ip - ip_bound);
  255. ip -= l;
  256. ref -= l;
  257. } /* End of optimization */
  258. }
  259. else {
  260. for(;;) {
  261. /* safe because the outer check against ip limit */
  262. while (ip < (ip_bound - (sizeof(int64_t) - IP_BOUNDARY))) {
  263. #if !defined(BLOSCLZ_STRICT_ALIGN)
  264. if (((int64_t *)ref)[0] != ((int64_t *)ip)[0]) {
  265. #endif
  266. /* Find the byte that starts to differ */
  267. while (ip < ip_bound) {
  268. if (*ref++ != *ip++) break;
  269. }
  270. break;
  271. #if !defined(BLOSCLZ_STRICT_ALIGN)
  272. } else { ip += 8; ref += 8; }
  273. #endif
  274. }
  275. /* Last correction before exiting loop */
  276. if (ip > ip_bound) {
  277. int32_t l = (int32_t)(ip - ip_bound);
  278. ip -= l;
  279. ref -= l;
  280. } /* End of optimization */
  281. break;
  282. }
  283. }
  284. /* if we have copied something, adjust the copy count */
  285. if (copy)
  286. /* copy is biased, '0' means 1 byte copy */
  287. *(op-copy-1) = copy-1;
  288. else
  289. /* back, to overwrite the copy count */
  290. op--;
  291. /* reset literal counter */
  292. copy = 0;
  293. /* length is biased, '1' means a match of 3 bytes */
  294. ip -= 3;
  295. len = (int32_t)(ip - anchor);
  296. /* check that we have space enough to encode the match for all the cases */
  297. if (BLOSCLZ_UNEXPECT_CONDITIONAL(op+(len/255)+6 > op_limit)) goto out;
  298. /* encode the match */
  299. if(distance < MAX_DISTANCE) {
  300. if(len < 7) {
  301. *op++ = (len << 5) + (distance >> 8);
  302. *op++ = (distance & 255);
  303. }
  304. else {
  305. *op++ = (uint8_t)((7 << 5) + (distance >> 8));
  306. for(len-=7; len >= 255; len-= 255)
  307. *op++ = 255;
  308. *op++ = len;
  309. *op++ = (distance & 255);
  310. }
  311. }
  312. else {
  313. /* far away, but not yet in the another galaxy... */
  314. if(len < 7) {
  315. distance -= MAX_DISTANCE;
  316. *op++ = (uint8_t)((len << 5) + 31);
  317. *op++ = 255;
  318. *op++ = (uint8_t)(distance >> 8);
  319. *op++ = distance & 255;
  320. }
  321. else {
  322. distance -= MAX_DISTANCE;
  323. *op++ = (7 << 5) + 31;
  324. for(len-=7; len >= 255; len-= 255)
  325. *op++ = 255;
  326. *op++ = len;
  327. *op++ = 255;
  328. *op++ = (uint8_t)(distance >> 8);
  329. *op++ = distance & 255;
  330. }
  331. }
  332. /* update the hash at match boundary */
  333. HASH_FUNCTION(hval, ip, hash_log);
  334. htab[hval] = (uint16_t)(ip++ - ibase);
  335. HASH_FUNCTION(hval, ip, hash_log);
  336. htab[hval] = (uint16_t)(ip++ - ibase);
  337. /* assuming literal copy */
  338. *op++ = MAX_COPY-1;
  339. }
  340. /* left-over as literal copy */
  341. ip_bound++;
  342. while(ip <= ip_bound) {
  343. if (BLOSCLZ_UNEXPECT_CONDITIONAL(op+2 > op_limit)) goto out;
  344. *op++ = *ip++;
  345. copy++;
  346. if(copy == MAX_COPY) {
  347. copy = 0;
  348. *op++ = MAX_COPY-1;
  349. }
  350. }
  351. /* if we have copied something, adjust the copy length */
  352. if(copy)
  353. *(op-copy-1) = copy-1;
  354. else
  355. op--;
  356. /* marker for blosclz */
  357. *(uint8_t*)output |= (1 << 5);
  358. free(htab);
  359. return (int)(op - (uint8_t*)output);
  360. out:
  361. free(htab);
  362. return 0;
  363. }
  364. int blosclz_decompress(const void* input, int length, void* output, int maxout)
  365. {
  366. const uint8_t* ip = (const uint8_t*) input;
  367. const uint8_t* ip_limit = ip + length;
  368. uint8_t* op = (uint8_t*) output;
  369. uint8_t* op_limit = op + maxout;
  370. int32_t ctrl = (*ip++) & 31;
  371. int32_t loop = 1;
  372. do {
  373. uint8_t* ref = op;
  374. int32_t len = ctrl >> 5;
  375. int32_t ofs = (ctrl & 31) << 8;
  376. if(ctrl >= 32) {
  377. uint8_t code;
  378. len--;
  379. ref -= ofs;
  380. if (len == 7-1)
  381. do {
  382. code = *ip++;
  383. len += code;
  384. } while (code==255);
  385. code = *ip++;
  386. ref -= code;
  387. /* match from 16-bit distance */
  388. if(BLOSCLZ_UNEXPECT_CONDITIONAL(code==255))
  389. if(BLOSCLZ_EXPECT_CONDITIONAL(ofs==(31 << 8))) {
  390. ofs = (*ip++) << 8;
  391. ofs += *ip++;
  392. ref = op - ofs - MAX_DISTANCE;
  393. }
  394. #ifdef BLOSCLZ_SAFE
  395. if (BLOSCLZ_UNEXPECT_CONDITIONAL(op + len + 3 > op_limit)) {
  396. return 0;
  397. }
  398. if (BLOSCLZ_UNEXPECT_CONDITIONAL(ref-1 < (uint8_t *)output)) {
  399. return 0;
  400. }
  401. #endif
  402. if(BLOSCLZ_EXPECT_CONDITIONAL(ip < ip_limit))
  403. ctrl = *ip++;
  404. else
  405. loop = 0;
  406. if(ref == op) {
  407. /* optimize copy for a run */
  408. uint8_t b = ref[-1];
  409. memset(op, b, len+3);
  410. op += len+3;
  411. }
  412. else {
  413. /* copy from reference */
  414. ref--;
  415. len += 3;
  416. #if !defined(_WIN32) && ((defined(__GNUC__) || defined(__INTEL_COMPILER) || !defined(__clang__)))
  417. GCC_SAFE_COPY(op, ref, len, op_limit);
  418. #else
  419. SAFE_COPY(op, ref, len, op_limit);
  420. #endif
  421. }
  422. }
  423. else {
  424. ctrl++;
  425. #ifdef BLOSCLZ_SAFE
  426. if (BLOSCLZ_UNEXPECT_CONDITIONAL(op + ctrl > op_limit)) {
  427. return 0;
  428. }
  429. if (BLOSCLZ_UNEXPECT_CONDITIONAL(ip + ctrl > ip_limit)) {
  430. return 0;
  431. }
  432. #endif
  433. BLOCK_COPY(op, ip, ctrl, op_limit);
  434. loop = (int32_t)BLOSCLZ_EXPECT_CONDITIONAL(ip < ip_limit);
  435. if(loop)
  436. ctrl = *ip++;
  437. }
  438. } while(BLOSCLZ_EXPECT_CONDITIONAL(loop));
  439. return (int)(op - (uint8_t*)output);
  440. }