123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528 |
- /*********************************************************************
- Blosc - Blocked Shuffling and Compression Library
- Author: Francesc Alted <francesc@blosc.org>
- Creation date: 2009-05-20
- See LICENSES/BLOSC.txt for details about copyright and rights to use.
- **********************************************************************/
- /*********************************************************************
- The code in this file is heavily based on FastLZ, a lightning-fast
- lossless compression library. See LICENSES/FASTLZ.txt for details.
- **********************************************************************/
- #include <stdio.h>
- #include <stdlib.h>
- #include "blosclz.h"
- #include "fastcopy.h"
- #include "blosc-common.h"
- /*
- * Check for bound when decompressing.
- * It is a good idea to define this while developing.
- */
- #undef BLOSCLZ_SAFE
- /*
- * Give hints to the compiler for branch prediction optimization.
- */
- #if defined(__GNUC__) && (__GNUC__ > 2)
- #define BLOSCLZ_EXPECT_CONDITIONAL(c) (__builtin_expect((c), 1))
- #define BLOSCLZ_UNEXPECT_CONDITIONAL(c) (__builtin_expect((c), 0))
- #else
- #define BLOSCLZ_EXPECT_CONDITIONAL(c) (c)
- #define BLOSCLZ_UNEXPECT_CONDITIONAL(c) (c)
- #endif
- /*
- * Use inlined functions for supported systems.
- */
- #if defined(_MSC_VER) && !defined(__cplusplus) /* Visual Studio */
- #define inline __inline /* Visual C is not C99, but supports some kind of inline */
- #endif
- #define MAX_COPY 32
- #define MAX_DISTANCE 8191
- #define MAX_FARDISTANCE (65535 + MAX_DISTANCE - 1)
- #ifdef BLOSC_STRICT_ALIGN
- #define BLOSCLZ_READU16(p) ((p)[0] | (p)[1]<<8)
- #else
- #define BLOSCLZ_READU16(p) *((const uint16_t*)(p))
- #endif
- /* Simple, but pretty effective hash function for 3-byte sequence */
- #define HASH_FUNCTION(v, p, l) { \
- v = BLOSCLZ_READU16(p); \
- v ^= BLOSCLZ_READU16(p + 1) ^ ( v >> (16 - l)); \
- v &= (1 << l) - 1; \
- }
- #define LITERAL(ip, op, op_limit, anchor, copy) { \
- if (BLOSCLZ_UNEXPECT_CONDITIONAL(op + 2 > op_limit)) \
- goto out; \
- *op++ = *anchor++; \
- ip = anchor; \
- copy++; \
- if(BLOSCLZ_UNEXPECT_CONDITIONAL(copy == MAX_COPY)) { \
- copy = 0; \
- *op++ = MAX_COPY-1; \
- } \
- continue; \
- }
- #define IP_BOUNDARY 2
- static inline uint8_t *get_run(uint8_t *ip, const uint8_t *ip_bound, const uint8_t *ref) {
- uint8_t x = ip[-1];
- int64_t value, value2;
- /* Broadcast the value for every byte in a 64-bit register */
- memset(&value, x, 8);
- /* safe because the outer check against ip limit */
- while (ip < (ip_bound - sizeof(int64_t))) {
- #if defined(BLOSC_STRICT_ALIGN)
- memcpy(&value2, ref, 8);
- #else
- value2 = ((int64_t*)ref)[0];
- #endif
- if (value != value2) {
- /* Find the byte that starts to differ */
- while (*ref++ == x) ip++;
- return ip;
- }
- else {
- ip += 8;
- ref += 8;
- }
- }
- /* Look into the remainder */
- while ((ip < ip_bound) && (*ref++ == x)) ip++;
- return ip;
- }
- #ifdef __SSE2__
- static inline uint8_t *get_run_16(uint8_t *ip, const uint8_t *ip_bound, const uint8_t *ref) {
- uint8_t x = ip[-1];
- __m128i value, value2, cmp;
- /* Broadcast the value for every byte in a 128-bit register */
- memset(&value, x, sizeof(__m128i));
- /* safe because the outer check against ip limit */
- while (ip < (ip_bound - sizeof(__m128i))) {
- value2 = _mm_loadu_si128((__m128i *)ref);
- cmp = _mm_cmpeq_epi32(value, value2);
- if (_mm_movemask_epi8(cmp) != 0xFFFF) {
- /* Find the byte that starts to differ */
- while (*ref++ == x) ip++;
- return ip;
- }
- else {
- ip += sizeof(__m128i);
- ref += sizeof(__m128i);
- }
- }
- /* Look into the remainder */
- while ((ip < ip_bound) && (*ref++ == x)) ip++;
- return ip;
- }
- #endif
- #ifdef __AVX2__
- static inline uint8_t *get_run_32(uint8_t *ip, const uint8_t *ip_bound, const uint8_t *ref) {
- uint8_t x = ip[-1];
- __m256i value, value2, cmp;
- /* Broadcast the value for every byte in a 256-bit register */
- memset(&value, x, sizeof(__m256i));
- /* safe because the outer check against ip limit */
- while (ip < (ip_bound - (sizeof(__m256i)))) {
- value2 = _mm256_loadu_si256((__m256i *)ref);
- cmp = _mm256_cmpeq_epi64(value, value2);
- if (_mm256_movemask_epi8(cmp) != 0xFFFFFFFF) {
- /* Find the byte that starts to differ */
- while (*ref++ == x) ip++;
- return ip;
- }
- else {
- ip += sizeof(__m256i);
- ref += sizeof(__m256i);
- }
- }
- /* Look into the remainder */
- while ((ip < ip_bound) && (*ref++ == x)) ip++;
- return ip;
- }
- #endif
- /* Find the byte that starts to differ */
- uint8_t *get_match(uint8_t *ip, const uint8_t *ip_bound, const uint8_t *ref) {
- #if !defined(BLOSC_STRICT_ALIGN)
- while (ip < (ip_bound - sizeof(int64_t))) {
- if (((int64_t*)ref)[0] != ((int64_t*)ip)[0]) {
- /* Find the byte that starts to differ */
- while (*ref++ == *ip++) {}
- return ip;
- }
- else {
- ip += sizeof(int64_t);
- ref += sizeof(int64_t);
- }
- }
- #endif
- /* Look into the remainder */
- while ((ip < ip_bound) && (*ref++ == *ip++)) {}
- return ip;
- }
- #if defined(__SSE2__)
- uint8_t *get_match_16(uint8_t *ip, const uint8_t *ip_bound, const uint8_t *ref) {
- __m128i value, value2, cmp;
- while (ip < (ip_bound - sizeof(__m128i))) {
- value = _mm_loadu_si128((__m128i *) ip);
- value2 = _mm_loadu_si128((__m128i *) ref);
- cmp = _mm_cmpeq_epi32(value, value2);
- if (_mm_movemask_epi8(cmp) != 0xFFFF) {
- /* Find the byte that starts to differ */
- while (*ref++ == *ip++) {}
- return ip;
- }
- else {
- ip += sizeof(__m128i);
- ref += sizeof(__m128i);
- }
- }
- /* Look into the remainder */
- while ((ip < ip_bound) && (*ref++ == *ip++)) {}
- return ip;
- }
- #endif
- #if defined(__AVX2__)
- uint8_t *get_match_32(uint8_t *ip, const uint8_t *ip_bound, const uint8_t *ref) {
- __m256i value, value2, cmp;
- while (ip < (ip_bound - sizeof(__m256i))) {
- value = _mm256_loadu_si256((__m256i *) ip);
- value2 = _mm256_loadu_si256((__m256i *)ref);
- cmp = _mm256_cmpeq_epi64(value, value2);
- if (_mm256_movemask_epi8(cmp) != 0xFFFFFFFF) {
- /* Find the byte that starts to differ */
- while (*ref++ == *ip++) {}
- return ip;
- }
- else {
- ip += sizeof(__m256i);
- ref += sizeof(__m256i);
- }
- }
- /* Look into the remainder */
- while ((ip < ip_bound) && (*ref++ == *ip++)) {}
- return ip;
- }
- #endif
- int blosclz_compress(const int opt_level, const void* input, int length,
- void* output, int maxout) {
- uint8_t* ip = (uint8_t*)input;
- uint8_t* ibase = (uint8_t*)input;
- uint8_t* ip_bound = ip + length - IP_BOUNDARY;
- uint8_t* ip_limit = ip + length - 12;
- uint8_t* op = (uint8_t*)output;
- /* Hash table depends on the opt level. Hash_log cannot be larger than 15. */
- /* The parametrization below is made from playing with the bench suite, like:
- $ bench/bench blosclz single 4
- $ bench/bench blosclz single 4 4194280 12 25
- and taking the minimum times on a i5-3380M @ 2.90GHz.
- Curiously enough, values >= 14 does not always
- get maximum compression, even with large blocksizes. */
- int8_t hash_log_[10] = {-1, 15, 15, 15, 15, 15, 15, 15, 15, 15};
- uint8_t hash_log = hash_log_[opt_level];
- uint16_t hash_size = 1 << hash_log;
- uint16_t* htab;
- uint8_t* op_limit;
- int32_t hval;
- uint8_t copy;
- double maxlength_[10] = {-1, .1, .3, .5, .6, .8, .9, .95, 1.0, 1.0};
- int32_t maxlength = (int32_t)(length * maxlength_[opt_level]);
- if (maxlength > (int32_t)maxout) {
- maxlength = (int32_t)maxout;
- }
- op_limit = op + maxlength;
- /* output buffer cannot be less than 66 bytes or we can get into trouble */
- if (BLOSCLZ_UNEXPECT_CONDITIONAL(maxout < 66 || length < 4)) {
- return 0;
- }
- htab = (uint16_t*)calloc(hash_size, sizeof(uint16_t));
- /* we start with literal copy */
- copy = 2;
- *op++ = MAX_COPY - 1;
- *op++ = *ip++;
- *op++ = *ip++;
- /* main loop */
- while (BLOSCLZ_EXPECT_CONDITIONAL(ip < ip_limit)) {
- const uint8_t* ref;
- int32_t distance;
- int32_t len = 3; /* minimum match length */
- uint8_t* anchor = ip; /* comparison starting-point */
- /* check for a run */
- if (ip[0] == ip[-1] && BLOSCLZ_READU16(ip - 1) == BLOSCLZ_READU16(ip + 1)) {
- distance = 1;
- ip += 3;
- ref = anchor - 1 + 3;
- goto match;
- }
- /* find potential match */
- HASH_FUNCTION(hval, ip, hash_log);
- ref = ibase + htab[hval];
- /* calculate distance to the match */
- distance = (int32_t)(anchor - ref);
- /* update hash table if necessary */
- /* not exactly sure why 0x1F works best, but experiments apparently say so */
- if ((distance & 0x1F) == 0)
- htab[hval] = (uint16_t)(anchor - ibase);
- /* is this a match? check the first 3 bytes */
- if (distance == 0 || (distance >= MAX_FARDISTANCE) ||
- *ref++ != *ip++ || *ref++ != *ip++ || *ref++ != *ip++) {
- LITERAL(ip, op, op_limit, anchor, copy);
- }
- /* far, needs at least 5-byte match */
- if (opt_level >= 5 && distance >= MAX_DISTANCE) {
- if (*ip++ != *ref++ || *ip++ != *ref++) LITERAL(ip, op, op_limit, anchor, copy);
- len += 2;
- }
- match:
- /* last matched byte */
- ip = anchor + len;
- /* distance is biased */
- distance--;
- if (!distance) {
- /* zero distance means a run */
- #if defined(__AVX2__)
- ip = get_run_32(ip, ip_bound, ref);
- #elif defined(__SSE2__)
- ip = get_run_16(ip, ip_bound, ref);
- #else
- ip = get_run(ip, ip_bound, ref);
- #endif
- }
- else {
- #if defined(__AVX2__)
- /* Experiments show that the SSE2 version is a bit faster, even on AVX2 processors */
- ip = get_match_16(ip, ip_bound + IP_BOUNDARY, ref);
- #elif defined(__SSE2__)
- ip = get_match_16(ip, ip_bound + IP_BOUNDARY, ref);
- #else
- ip = get_match(ip, ip_bound + IP_BOUNDARY, ref);
- #endif
- }
- /* if we have copied something, adjust the copy count */
- if (copy)
- /* copy is biased, '0' means 1 byte copy */
- *(op - copy - 1) = copy - 1;
- else
- /* back, to overwrite the copy count */
- op--;
- /* reset literal counter */
- copy = 0;
- /* length is biased, '1' means a match of 3 bytes */
- ip -= 3;
- len = (int32_t)(ip - anchor);
- /* check that we have space enough to encode the match for all the cases */
- if (BLOSCLZ_UNEXPECT_CONDITIONAL(op + (len / 255) + 6 > op_limit)) goto out;
- /* encode the match */
- if (distance < MAX_DISTANCE) {
- if (len < 7) {
- *op++ = (len << 5) + (distance >> 8);
- *op++ = (distance & 255);
- }
- else {
- *op++ = (uint8_t)((7 << 5) + (distance >> 8));
- for (len -= 7; len >= 255; len -= 255)
- *op++ = 255;
- *op++ = len;
- *op++ = (distance & 255);
- }
- }
- else {
- /* far away, but not yet in the another galaxy... */
- if (len < 7) {
- distance -= MAX_DISTANCE;
- *op++ = (uint8_t)((len << 5) + 31);
- *op++ = 255;
- *op++ = (uint8_t)(distance >> 8);
- *op++ = distance & 255;
- }
- else {
- distance -= MAX_DISTANCE;
- *op++ = (7 << 5) + 31;
- for (len -= 7; len >= 255; len -= 255)
- *op++ = 255;
- *op++ = len;
- *op++ = 255;
- *op++ = (uint8_t)(distance >> 8);
- *op++ = distance & 255;
- }
- }
- /* update the hash at match boundary */
- HASH_FUNCTION(hval, ip, hash_log);
- htab[hval] = (uint16_t)(ip++ - ibase);
- HASH_FUNCTION(hval, ip, hash_log);
- htab[hval] = (uint16_t)(ip++ - ibase);
- /* assuming literal copy */
- *op++ = MAX_COPY - 1;
- }
- /* left-over as literal copy */
- ip_bound++;
- while (ip <= ip_bound) {
- if (BLOSCLZ_UNEXPECT_CONDITIONAL(op + 2 > op_limit)) goto out;
- *op++ = *ip++;
- copy++;
- if (copy == MAX_COPY) {
- copy = 0;
- *op++ = MAX_COPY - 1;
- }
- }
- /* if we have copied something, adjust the copy length */
- if (copy)
- *(op - copy - 1) = copy - 1;
- else
- op--;
- /* marker for blosclz */
- *(uint8_t*)output |= (1 << 5);
- free(htab);
- return (int)(op - (uint8_t*)output);
- out:
- free(htab);
- return 0;
- }
- int blosclz_decompress(const void* input, int length, void* output, int maxout) {
- const uint8_t* ip = (const uint8_t*)input;
- const uint8_t* ip_limit = ip + length;
- uint8_t* op = (uint8_t*)output;
- int32_t ctrl = (*ip++) & 31;
- int32_t loop = 1;
- #ifdef BLOSCLZ_SAFE
- uint8_t* op_limit = op + maxout;
- #endif
- do {
- uint8_t* ref = op;
- int32_t len = ctrl >> 5;
- int32_t ofs = (ctrl & 31) << 8;
- if (ctrl >= 32) {
- uint8_t code;
- len--;
- ref -= ofs;
- if (len == 7 - 1)
- do {
- code = *ip++;
- len += code;
- } while (code == 255);
- code = *ip++;
- ref -= code;
- /* match from 16-bit distance */
- if (BLOSCLZ_UNEXPECT_CONDITIONAL(code == 255)) if (BLOSCLZ_EXPECT_CONDITIONAL(ofs == (31 << 8))) {
- ofs = (*ip++) << 8;
- ofs += *ip++;
- ref = op - ofs - MAX_DISTANCE;
- }
- #ifdef BLOSCLZ_SAFE
- if (BLOSCLZ_UNEXPECT_CONDITIONAL(op + len + 3 > op_limit)) {
- return 0;
- }
- if (BLOSCLZ_UNEXPECT_CONDITIONAL(ref - 1 < (uint8_t*)output)) {
- return 0;
- }
- #endif
- if (BLOSCLZ_EXPECT_CONDITIONAL(ip < ip_limit))
- ctrl = *ip++;
- else
- loop = 0;
- if (ref == op) {
- /* optimized copy for a run */
- uint8_t b = ref[-1];
- memset(op, b, len + 3);
- op += len + 3;
- }
- else {
- /* copy from reference */
- ref--;
- len += 3;
- op = safecopy(op, ref, (unsigned) len);
- }
- }
- else {
- ctrl++;
- #ifdef BLOSCLZ_SAFE
- if (BLOSCLZ_UNEXPECT_CONDITIONAL(op + ctrl > op_limit)) {
- return 0;
- }
- if (BLOSCLZ_UNEXPECT_CONDITIONAL(ip + ctrl > ip_limit)) {
- return 0;
- }
- #endif
- // memcpy(op, ip, ctrl); op += ctrl; ip += ctrl;
- // On GCC-6, fastcopy this is still faster than plain memcpy
- // However, using recent CLANG/LLVM 9.0, there is almost no difference
- // in performance.
- op = fastcopy(op, ip, (unsigned) ctrl);
- ip += ctrl;
- loop = (int32_t)BLOSCLZ_EXPECT_CONDITIONAL(ip < ip_limit);
- if (loop)
- ctrl = *ip++;
- }
- } while (BLOSCLZ_EXPECT_CONDITIONAL(loop));
- return (int)(op - (uint8_t*)output);
- }
|