blosc.h 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468
  1. /*********************************************************************
  2. Blosc - Blocked Shuffling and Compression Library
  3. Author: Francesc Alted <francesc@blosc.org>
  4. See LICENSES/BLOSC.txt for details about copyright and rights to use.
  5. **********************************************************************/
  6. #ifndef BLOSC_H
  7. #define BLOSC_H
  8. #include <limits.h>
  9. #include <stdlib.h>
  10. #include "blosc-export.h"
  11. #ifdef __cplusplus
  12. extern "C" {
  13. #endif
  14. /* Version numbers */
  15. #define BLOSC_VERSION_MAJOR 1 /* for major interface/format changes */
  16. #define BLOSC_VERSION_MINOR 9 /* for minor interface/format changes */
  17. #define BLOSC_VERSION_RELEASE 3 /* for tweaks, bug-fixes, or development */
  18. #define BLOSC_VERSION_STRING "1.9.3" /* string version. Sync with above! */
  19. #define BLOSC_VERSION_REVISION "$Rev$" /* revision version */
  20. #define BLOSC_VERSION_DATE "$Date:: 2016-07-06 #$" /* date version */
  21. #define BLOSCLZ_VERSION_STRING "1.0.5" /* the internal compressor version */
  22. /* The *_FORMAT symbols should be just 1-byte long */
  23. #define BLOSC_VERSION_FORMAT 2 /* Blosc format version, starting at 1 */
  24. /* Minimum header length */
  25. #define BLOSC_MIN_HEADER_LENGTH 16
  26. /* The maximum overhead during compression in bytes. This equals to
  27. BLOSC_MIN_HEADER_LENGTH now, but can be higher in future
  28. implementations */
  29. #define BLOSC_MAX_OVERHEAD BLOSC_MIN_HEADER_LENGTH
  30. /* Maximum source buffer size to be compressed */
  31. #define BLOSC_MAX_BUFFERSIZE (INT_MAX - BLOSC_MAX_OVERHEAD)
  32. /* Maximum typesize before considering source buffer as a stream of bytes */
  33. #define BLOSC_MAX_TYPESIZE 255 /* Cannot be larger than 255 */
  34. /* The maximum number of threads (for some static arrays) */
  35. #define BLOSC_MAX_THREADS 256
  36. /* Codes for shuffling (see blosc_compress) */
  37. #define BLOSC_NOSHUFFLE 0 /* no shuffle */
  38. #define BLOSC_SHUFFLE 1 /* byte-wise shuffle */
  39. #define BLOSC_BITSHUFFLE 2 /* bit-wise shuffle */
  40. /* Codes for internal flags (see blosc_cbuffer_metainfo) */
  41. #define BLOSC_DOSHUFFLE 0x1 /* byte-wise shuffle */
  42. #define BLOSC_MEMCPYED 0x2 /* plain copy */
  43. #define BLOSC_DOBITSHUFFLE 0x4 /* bit-wise shuffle */
  44. /* Codes for the different compressors shipped with Blosc */
  45. #define BLOSC_BLOSCLZ 0
  46. #define BLOSC_LZ4 1
  47. #define BLOSC_LZ4HC 2
  48. #define BLOSC_SNAPPY 3
  49. #define BLOSC_ZLIB 4
  50. /* Names for the different compressors shipped with Blosc */
  51. #define BLOSC_BLOSCLZ_COMPNAME "blosclz"
  52. #define BLOSC_LZ4_COMPNAME "lz4"
  53. #define BLOSC_LZ4HC_COMPNAME "lz4hc"
  54. #define BLOSC_SNAPPY_COMPNAME "snappy"
  55. #define BLOSC_ZLIB_COMPNAME "zlib"
  56. /* Codes for compression libraries shipped with Blosc (code must be < 8) */
  57. #define BLOSC_BLOSCLZ_LIB 0
  58. #define BLOSC_LZ4_LIB 1
  59. #define BLOSC_SNAPPY_LIB 2
  60. #define BLOSC_ZLIB_LIB 3
  61. /* Names for the different compression libraries shipped with Blosc */
  62. #define BLOSC_BLOSCLZ_LIBNAME "BloscLZ"
  63. #define BLOSC_LZ4_LIBNAME "LZ4"
  64. #define BLOSC_SNAPPY_LIBNAME "Snappy"
  65. #define BLOSC_ZLIB_LIBNAME "Zlib"
  66. /* The codes for compressor formats shipped with Blosc */
  67. #define BLOSC_BLOSCLZ_FORMAT BLOSC_BLOSCLZ_LIB
  68. #define BLOSC_LZ4_FORMAT BLOSC_LZ4_LIB
  69. /* LZ4HC and LZ4 share the same format */
  70. #define BLOSC_LZ4HC_FORMAT BLOSC_LZ4_LIB
  71. #define BLOSC_SNAPPY_FORMAT BLOSC_SNAPPY_LIB
  72. #define BLOSC_ZLIB_FORMAT BLOSC_ZLIB_LIB
  73. /* The version formats for compressors shipped with Blosc */
  74. /* All versions here starts at 1 */
  75. #define BLOSC_BLOSCLZ_VERSION_FORMAT 1
  76. #define BLOSC_LZ4_VERSION_FORMAT 1
  77. #define BLOSC_LZ4HC_VERSION_FORMAT 1 /* LZ4HC and LZ4 share the same format */
  78. #define BLOSC_SNAPPY_VERSION_FORMAT 1
  79. #define BLOSC_ZLIB_VERSION_FORMAT 1
  80. /**
  81. Initialize the Blosc library environment.
  82. You must call this previous to any other Blosc call, unless you want
  83. Blosc to be used simultaneously in a multi-threaded environment, in
  84. which case you should *exclusively* use the
  85. blosc_compress_ctx()/blosc_decompress_ctx() pair (see below).
  86. */
  87. BLOSC_EXPORT void blosc_init(void);
  88. /**
  89. Destroy the Blosc library environment.
  90. You must call this after to you are done with all the Blosc calls,
  91. unless you have not used blosc_init() before (see blosc_init()
  92. above).
  93. */
  94. BLOSC_EXPORT void blosc_destroy(void);
  95. /**
  96. Compress a block of data in the `src` buffer and returns the size of
  97. the compressed block. The size of `src` buffer is specified by
  98. `nbytes`. There is not a minimum for `src` buffer size (`nbytes`).
  99. `clevel` is the desired compression level and must be a number
  100. between 0 (no compression) and 9 (maximum compression).
  101. `doshuffle` specifies whether the shuffle compression preconditioner
  102. should be applied or not. BLOSC_NOSHUFFLE means not applying it,
  103. BLOSC_SHUFFLE means applying it at a byte level and BLOSC_BITSHUFFLE
  104. at a bit level (slower but may achieve better entropy alignment).
  105. `typesize` is the number of bytes for the atomic type in binary
  106. `src` buffer. This is mainly useful for the shuffle preconditioner.
  107. For implementation reasons, only a 1 < typesize < 256 will allow the
  108. shuffle filter to work. When typesize is not in this range, shuffle
  109. will be silently disabled.
  110. The `dest` buffer must have at least the size of `destsize`. Blosc
  111. guarantees that if you set `destsize` to, at least,
  112. (`nbytes`+BLOSC_MAX_OVERHEAD), the compression will always succeed.
  113. The `src` buffer and the `dest` buffer can not overlap.
  114. Compression is memory safe and guaranteed not to write the `dest`
  115. buffer more than what is specified in `destsize`.
  116. If `src` buffer cannot be compressed into `destsize`, the return
  117. value is zero and you should discard the contents of the `dest`
  118. buffer.
  119. A negative return value means that an internal error happened. This
  120. should never happen. If you see this, please report it back
  121. together with the buffer data causing this and compression settings.
  122. Environment variables
  123. ---------------------
  124. blosc_compress() honors different environment variables to control
  125. internal parameters without the need of doing that programatically.
  126. Here are the ones supported:
  127. BLOSC_CLEVEL=(INTEGER): This will overwrite the `clevel` parameter
  128. before the compression process starts.
  129. BLOSC_SHUFFLE=[NOSHUFFLE | SHUFFLE | BITSHUFFLE]: This will
  130. overwrite the `doshuffle` parameter before the compression process
  131. starts.
  132. BLOSC_TYPESIZE=(INTEGER): This will overwrite the `typesize`
  133. parameter before the compression process starts.
  134. BLOSC_COMPRESSOR=[BLOSCLZ | LZ4 | LZ4HC | SNAPPY | ZLIB]: This will
  135. call blosc_set_compressor(BLOSC_COMPRESSOR) before the compression
  136. process starts.
  137. BLOSC_NTHREADS=(INTEGER): This will call
  138. blosc_set_nthreads(BLOSC_NTHREADS) before the compression process
  139. starts.
  140. BLOSC_BLOCKSIZE=(INTEGER): This will call
  141. blosc_set_blocksize(BLOSC_BLOCKSIZE) before the compression process
  142. starts. *NOTE:* The blocksize is a critical parameter with
  143. important restrictions in the allowed values, so use this with care.
  144. BLOSC_NOLOCK=(ANY VALUE): This will call blosc_compress_ctx() under
  145. the hood, with the `compressor`, `blocksize` and
  146. `numinternalthreads` parameters set to the same as the last calls to
  147. blosc_set_compressor(), blosc_set_blocksize() and
  148. blosc_set_nthreads(). BLOSC_CLEVEL, BLOSC_SHUFFLE, BLOSC_TYPESIZE
  149. environment vars will also be honored.
  150. */
  151. BLOSC_EXPORT int blosc_compress(int clevel, int doshuffle, size_t typesize,
  152. size_t nbytes, const void *src, void *dest,
  153. size_t destsize);
  154. /**
  155. Context interface to blosc compression. This does not require a call
  156. to blosc_init() and can be called from multithreaded applications
  157. without the global lock being used, so allowing Blosc be executed
  158. simultaneously in those scenarios.
  159. It uses the same parameters than the blosc_compress() function plus:
  160. `compressor`: the string representing the type of compressor to use.
  161. `blocksize`: the requested size of the compressed blocks. If 0, an
  162. automatic blocksize will be used.
  163. `numinternalthreads`: the number of threads to use internally.
  164. A negative return value means that an internal error happened. This
  165. should never happen. If you see this, please report it back
  166. together with the buffer data causing this and compression settings.
  167. */
  168. BLOSC_EXPORT int blosc_compress_ctx(int clevel, int doshuffle, size_t typesize,
  169. size_t nbytes, const void* src, void* dest,
  170. size_t destsize, const char* compressor,
  171. size_t blocksize, int numinternalthreads);
  172. /**
  173. Decompress a block of compressed data in `src`, put the result in
  174. `dest` and returns the size of the decompressed block.
  175. The `src` buffer and the `dest` buffer can not overlap.
  176. Decompression is memory safe and guaranteed not to write the `dest`
  177. buffer more than what is specified in `destsize`.
  178. If an error occurs, e.g. the compressed data is corrupted or the
  179. output buffer is not large enough, then 0 (zero) or a negative value
  180. will be returned instead.
  181. Environment variables
  182. ---------------------
  183. blosc_decompress() honors different environment variables to control
  184. internal parameters without the need of doing that programatically.
  185. Here are the ones supported:
  186. BLOSC_NTHREADS=(INTEGER): This will call
  187. blosc_set_nthreads(BLOSC_NTHREADS) before the proper decompression
  188. process starts.
  189. BLOSC_NOLOCK=(ANY VALUE): This will call blosc_decompress_ctx()
  190. under the hood, with the `numinternalthreads` parameter set to the
  191. same value as the last call to blosc_set_nthreads().
  192. */
  193. BLOSC_EXPORT int blosc_decompress(const void *src, void *dest, size_t destsize);
  194. /**
  195. Context interface to blosc decompression. This does not require a
  196. call to blosc_init() and can be called from multithreaded
  197. applications without the global lock being used, so allowing Blosc
  198. be executed simultaneously in those scenarios.
  199. It uses the same parameters than the blosc_decompress() function plus:
  200. `numinternalthreads`: number of threads to use internally.
  201. Decompression is memory safe and guaranteed not to write the `dest`
  202. buffer more than what is specified in `destsize`.
  203. If an error occurs, e.g. the compressed data is corrupted or the
  204. output buffer is not large enough, then 0 (zero) or a negative value
  205. will be returned instead.
  206. */
  207. BLOSC_EXPORT int blosc_decompress_ctx(const void *src, void *dest,
  208. size_t destsize, int numinternalthreads);
  209. /**
  210. Get `nitems` (of typesize size) in `src` buffer starting in `start`.
  211. The items are returned in `dest` buffer, which has to have enough
  212. space for storing all items.
  213. Returns the number of bytes copied to `dest` or a negative value if
  214. some error happens.
  215. */
  216. BLOSC_EXPORT int blosc_getitem(const void *src, int start, int nitems, void *dest);
  217. /**
  218. Returns the current number of threads that are used for
  219. compression/decompression.
  220. */
  221. BLOSC_EXPORT int blosc_get_nthreads(void);
  222. /**
  223. Initialize a pool of threads for compression/decompression. If
  224. `nthreads` is 1, then the serial version is chosen and a possible
  225. previous existing pool is ended. If this is not called, `nthreads`
  226. is set to 1 internally.
  227. Returns the previous number of threads.
  228. */
  229. BLOSC_EXPORT int blosc_set_nthreads(int nthreads);
  230. /**
  231. Returns the current compressor that is used for compression.
  232. */
  233. BLOSC_EXPORT char* blosc_get_compressor(void);
  234. /**
  235. Select the compressor to be used. The supported ones are "blosclz",
  236. "lz4", "lz4hc", "snappy" and "zlib". If this function is not
  237. called, then "blosclz" will be used.
  238. In case the compressor is not recognized, or there is not support
  239. for it in this build, it returns a -1. Else it returns the code for
  240. the compressor (>=0).
  241. */
  242. BLOSC_EXPORT int blosc_set_compressor(const char* compname);
  243. /**
  244. Get the `compname` associated with the `compcode`.
  245. If the compressor code is not recognized, or there is not support
  246. for it in this build, -1 is returned. Else, the compressor code is
  247. returned.
  248. */
  249. BLOSC_EXPORT int blosc_compcode_to_compname(int compcode, char **compname);
  250. /**
  251. Return the compressor code associated with the compressor name.
  252. If the compressor name is not recognized, or there is not support
  253. for it in this build, -1 is returned instead.
  254. */
  255. BLOSC_EXPORT int blosc_compname_to_compcode(const char *compname);
  256. /**
  257. Get a list of compressors supported in the current build. The
  258. returned value is a string with a concatenation of "blosclz", "lz4",
  259. "lz4hc", "snappy" or "zlib" separated by commas, depending on which
  260. ones are present in the build.
  261. This function does not leak, so you should not free() the returned
  262. list.
  263. This function should always succeed.
  264. */
  265. BLOSC_EXPORT char* blosc_list_compressors(void);
  266. /**
  267. Return the version of blosc in string format.
  268. Useful for dynamic libraries.
  269. */
  270. BLOSC_EXPORT char* blosc_get_version_string(void);
  271. /**
  272. Get info from compression libraries included in the current build.
  273. In `compname` you pass the compressor name that you want info from.
  274. In `complib` and `version` you get the compression library name and
  275. version (if available) as output.
  276. In `complib` and `version` you get a pointer to the compressor
  277. library name and the version in string format respectively. After
  278. using the name and version, you should free() them so as to avoid
  279. leaks.
  280. If the compressor is supported, it returns the code for the library
  281. (>=0). If it is not supported, this function returns -1.
  282. */
  283. BLOSC_EXPORT int blosc_get_complib_info(char *compname, char **complib, char **version);
  284. /**
  285. Free possible memory temporaries and thread resources. Use this
  286. when you are not going to use Blosc for a long while. In case of
  287. problems releasing the resources, it returns a negative number, else
  288. it returns 0.
  289. */
  290. BLOSC_EXPORT int blosc_free_resources(void);
  291. /**
  292. Return information about a compressed buffer, namely the number of
  293. uncompressed bytes (`nbytes`) and compressed (`cbytes`). It also
  294. returns the `blocksize` (which is used internally for doing the
  295. compression by blocks).
  296. You only need to pass the first BLOSC_MIN_HEADER_LENGTH bytes of a
  297. compressed buffer for this call to work.
  298. This function should always succeed.
  299. */
  300. BLOSC_EXPORT void blosc_cbuffer_sizes(const void *cbuffer, size_t *nbytes,
  301. size_t *cbytes, size_t *blocksize);
  302. /**
  303. Return information about a compressed buffer, namely the type size
  304. (`typesize`), as well as some internal `flags`.
  305. The `flags` is a set of bits, where the currently used ones are:
  306. * bit 0: whether the shuffle filter has been applied or not
  307. * bit 1: whether the internal buffer is a pure memcpy or not
  308. You can use the `BLOSC_DOSHUFFLE`, `BLOSC_DOBITSHUFFLE` and
  309. `BLOSC_MEMCPYED` symbols for extracting the interesting bits
  310. (e.g. ``flags & BLOSC_DOSHUFFLE`` says whether the buffer is
  311. byte-shuffled or not).
  312. This function should always succeed.
  313. */
  314. BLOSC_EXPORT void blosc_cbuffer_metainfo(const void *cbuffer, size_t *typesize,
  315. int *flags);
  316. /**
  317. Return information about a compressed buffer, namely the internal
  318. Blosc format version (`version`) and the format for the internal
  319. Lempel-Ziv compressor used (`versionlz`).
  320. This function should always succeed.
  321. */
  322. BLOSC_EXPORT void blosc_cbuffer_versions(const void *cbuffer, int *version,
  323. int *versionlz);
  324. /**
  325. Return the compressor library/format used in a compressed buffer.
  326. This function should always succeed.
  327. */
  328. BLOSC_EXPORT char *blosc_cbuffer_complib(const void *cbuffer);
  329. /*********************************************************************
  330. Low-level functions follows. Use them only if you are an expert!
  331. *********************************************************************/
  332. /* Get the internal blocksize to be used during compression. 0 means
  333. that an automatic blocksize is computed internally. */
  334. BLOSC_EXPORT int blosc_get_blocksize(void);
  335. /**
  336. Force the use of a specific blocksize. If 0, an automatic
  337. blocksize will be used (the default).
  338. The blocksize is a critical parameter with important restrictions in
  339. the allowed values, so use this with care.
  340. */
  341. BLOSC_EXPORT void blosc_set_blocksize(size_t blocksize);
  342. #ifdef __cplusplus
  343. }
  344. #endif
  345. #endif