bench.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577
  1. /*********************************************************************
  2. Small benchmark for testing basic capabilities of Blosc.
  3. You can select different degrees of 'randomness' in input buffer, as
  4. well as external datafiles (uncomment the lines after "For data
  5. coming from a file" comment).
  6. For usage instructions of this benchmark, please see:
  7. http://blosc.org/synthetic-benchmarks.html
  8. I'm collecting speeds for different machines, so the output of your
  9. benchmarks and your processor specifications are welcome!
  10. Author: Francesc Alted <francesc@blosc.org>
  11. Note: Compiling this with VS2008 does not work well with cmake. Here
  12. it is a way to compile the benchmark (with added support for LZ4):
  13. > cl /DHAVE_LZ4 /arch:SSE2 /Ox /Febench.exe /Iblosc /Iinternal-complibs\lz4-1.7.2 bench\bench.c blosc\blosc.c blosc\blosclz.c blosc\shuffle.c blosc\shuffle-sse2.c blosc\shuffle-generic.c blosc\bitshuffle-generic.c blosc\bitshuffle-sse2.c internal-complibs\lz4-1.7.2\*.c
  14. See LICENSES/BLOSC.txt for details about copyright and rights to use.
  15. **********************************************************************/
  16. #include <stdlib.h>
  17. #include <stdio.h>
  18. #include <string.h>
  19. #include <sys/types.h>
  20. #include <sys/stat.h>
  21. #include <fcntl.h>
  22. #if defined(_WIN32)
  23. /* For QueryPerformanceCounter(), etc. */
  24. #include <windows.h>
  25. #elif defined(__MACH__)
  26. #include <mach/clock.h>
  27. #include <mach/mach.h>
  28. #include <time.h>
  29. #include <sys/time.h>
  30. #elif defined(__unix__)
  31. #include <unistd.h>
  32. #if defined(__linux__)
  33. #include <time.h>
  34. #else
  35. #include <sys/time.h>
  36. #endif
  37. #else
  38. #error Unable to detect platform.
  39. #endif
  40. #include "../blosc/blosc.h"
  41. #define KB 1024
  42. #define MB (1024*KB)
  43. #define GB (1024*MB)
  44. #define NCHUNKS (32*1024) /* maximum number of chunks */
  45. #define MAX_THREADS 16
  46. int nchunks = NCHUNKS;
  47. int niter = 3; /* default number of iterations */
  48. double totalsize = 0.; /* total compressed/decompressed size */
  49. /* System-specific high-precision timing functions. */
  50. #if defined(_WIN32)
  51. /* The type of timestamp used on this system. */
  52. #define blosc_timestamp_t LARGE_INTEGER
  53. /* Set a timestamp value to the current time. */
  54. void blosc_set_timestamp(blosc_timestamp_t* timestamp) {
  55. /* Ignore the return value, assume the call always succeeds. */
  56. QueryPerformanceCounter(timestamp);
  57. }
  58. /* Given two timestamp values, return the difference in microseconds. */
  59. double blosc_elapsed_usecs(blosc_timestamp_t start_time, blosc_timestamp_t end_time) {
  60. LARGE_INTEGER CounterFreq;
  61. QueryPerformanceFrequency(&CounterFreq);
  62. return (double)(end_time.QuadPart - start_time.QuadPart) / ((double)CounterFreq.QuadPart / 1e6);
  63. }
  64. #else
  65. /* The type of timestamp used on this system. */
  66. #define blosc_timestamp_t struct timespec
  67. /* Set a timestamp value to the current time. */
  68. void blosc_set_timestamp(blosc_timestamp_t* timestamp) {
  69. #ifdef __MACH__ // OS X does not have clock_gettime, use clock_get_time
  70. clock_serv_t cclock;
  71. mach_timespec_t mts;
  72. host_get_clock_service(mach_host_self(), CALENDAR_CLOCK, &cclock);
  73. clock_get_time(cclock, &mts);
  74. mach_port_deallocate(mach_task_self(), cclock);
  75. timestamp->tv_sec = mts.tv_sec;
  76. timestamp->tv_nsec = mts.tv_nsec;
  77. #else
  78. clock_gettime(CLOCK_MONOTONIC, timestamp);
  79. #endif
  80. }
  81. /* Given two timestamp values, return the difference in microseconds. */
  82. double blosc_elapsed_usecs(blosc_timestamp_t start_time, blosc_timestamp_t end_time) {
  83. return (1e6 * (end_time.tv_sec - start_time.tv_sec))
  84. + (1e-3 * (end_time.tv_nsec - start_time.tv_nsec));
  85. }
  86. #endif
  87. /* Given two timeval stamps, return the difference in seconds */
  88. double getseconds(blosc_timestamp_t last, blosc_timestamp_t current) {
  89. return 1e-6 * blosc_elapsed_usecs(last, current);
  90. }
  91. /* Given two timeval stamps, return the time per chunk in usec */
  92. double get_usec_chunk(blosc_timestamp_t last, blosc_timestamp_t current, int niter, size_t nchunks) {
  93. double elapsed_usecs = (double)blosc_elapsed_usecs(last, current);
  94. return elapsed_usecs / (double)(niter * nchunks);
  95. }
  96. /* Define posix_memalign for Windows */
  97. #if defined(_WIN32)
  98. #include <malloc.h>
  99. int posix_memalign(void **memptr, size_t alignment, size_t size)
  100. {
  101. *memptr = _aligned_malloc(size, alignment);
  102. return 0;
  103. }
  104. /* Buffers allocated with _aligned_malloc need to be freed with _aligned_free. */
  105. #define aligned_free(memptr) _aligned_free(memptr)
  106. #else
  107. /* If not using MSVC, aligned memory can be freed in the usual way. */
  108. #define aligned_free(memptr) free(memptr)
  109. #endif /* defined(_WIN32) && !defined(__MINGW32__) */
  110. int get_value(int i, int rshift) {
  111. int v;
  112. v = (i<<26)^(i<<18)^(i<<11)^(i<<3)^i;
  113. if (rshift < 32) {
  114. v &= (1 << rshift) - 1;
  115. }
  116. return v;
  117. }
  118. void init_buffer(void *src, int size, int rshift) {
  119. unsigned int i;
  120. int *_src = (int *)src;
  121. /* To have reproducible results */
  122. srand(1);
  123. /* Initialize the original buffer */
  124. for (i = 0; i < size/sizeof(int); ++i) {
  125. /* Choose one below */
  126. /* _src[i] = 0;
  127. * _src[i] = 0x01010101;
  128. * _src[i] = 0x01020304;
  129. * _src[i] = i * 1/.3;
  130. * _src[i] = i;
  131. * _src[i] = rand() >> (32-rshift); */
  132. _src[i] = get_value(i, rshift);
  133. }
  134. }
  135. void do_bench(char *compressor, char *shuffle, int nthreads, int size, int elsize,
  136. int rshift, FILE * ofile) {
  137. void *src, *srccpy;
  138. void *dest[NCHUNKS], *dest2;
  139. int nbytes = 0, cbytes = 0;
  140. int i, j, retcode;
  141. unsigned char *orig, *round;
  142. blosc_timestamp_t last, current;
  143. double tmemcpy, tshuf, tunshuf;
  144. int clevel, doshuffle;
  145. if (strcmp(shuffle, "shuffle") == 0) {
  146. doshuffle = BLOSC_SHUFFLE;
  147. }
  148. else if (strcmp(shuffle, "bitshuffle") == 0) {
  149. doshuffle = BLOSC_BITSHUFFLE;
  150. }
  151. else if (strcmp(shuffle, "noshuffle") == 0) {
  152. doshuffle = BLOSC_NOSHUFFLE;
  153. }
  154. blosc_set_nthreads(nthreads);
  155. if(blosc_set_compressor(compressor) < 0){
  156. printf("Compiled w/o support for compressor: '%s', so sorry.\n",
  157. compressor);
  158. exit(1);
  159. }
  160. /* Initialize buffers */
  161. srccpy = malloc(size);
  162. retcode = posix_memalign( (void **)(&src), 32, size);
  163. retcode = posix_memalign( (void **)(&dest2), 32, size);
  164. /* zero src to initialize byte on it, and not only multiples of 4 */
  165. memset(src, 0, size);
  166. init_buffer(src, size, rshift);
  167. memcpy(srccpy, src, size);
  168. for (j = 0; j < nchunks; j++) {
  169. retcode = posix_memalign( (void **)(&dest[j]), 32, size+BLOSC_MAX_OVERHEAD);
  170. }
  171. fprintf(ofile, "--> %d, %d, %d, %d, %s, %s\n", nthreads, size, elsize, rshift, compressor, shuffle);
  172. fprintf(ofile, "********************** Run info ******************************\n");
  173. fprintf(ofile, "Blosc version: %s (%s)\n", BLOSC_VERSION_STRING, BLOSC_VERSION_DATE);
  174. fprintf(ofile, "Using synthetic data with %d significant bits (out of 32)\n", rshift);
  175. fprintf(ofile, "Dataset size: %d bytes\tType size: %d bytes\n", size, elsize);
  176. fprintf(ofile, "Working set: %.1f MB\t\t", (size*nchunks) / (float)MB);
  177. fprintf(ofile, "Number of threads: %d\n", nthreads);
  178. fprintf(ofile, "********************** Running benchmarks *********************\n");
  179. blosc_set_timestamp(&last);
  180. for (i = 0; i < niter; i++) {
  181. for (j = 0; j < nchunks; j++) {
  182. memcpy(dest[j], src, size);
  183. }
  184. }
  185. blosc_set_timestamp(&current);
  186. tmemcpy = get_usec_chunk(last, current, niter, nchunks);
  187. fprintf(ofile, "memcpy(write):\t\t %6.1f us, %.1f MB/s\n",
  188. tmemcpy, (size * 1e6) / (tmemcpy*MB));
  189. blosc_set_timestamp(&last);
  190. for (i = 0; i < niter; i++) {
  191. for (j = 0; j < nchunks; j++) {
  192. memcpy(dest2, dest[j], size);
  193. }
  194. }
  195. blosc_set_timestamp(&current);
  196. tmemcpy = get_usec_chunk(last, current, niter, nchunks);
  197. fprintf(ofile, "memcpy(read):\t\t %6.1f us, %.1f MB/s\n",
  198. tmemcpy, (size * 1e6) / (tmemcpy*MB));
  199. for (clevel=0; clevel<10; clevel++) {
  200. fprintf(ofile, "Compression level: %d\n", clevel);
  201. blosc_set_timestamp(&last);
  202. for (i = 0; i < niter; i++) {
  203. for (j = 0; j < nchunks; j++) {
  204. cbytes = blosc_compress(clevel, doshuffle, elsize, size, src,
  205. dest[j], size+BLOSC_MAX_OVERHEAD);
  206. }
  207. }
  208. blosc_set_timestamp(&current);
  209. tshuf = get_usec_chunk(last, current, niter, nchunks);
  210. fprintf(ofile, "comp(write):\t %6.1f us, %.1f MB/s\t ",
  211. tshuf, (size * 1e6) / (tshuf*MB));
  212. fprintf(ofile, "Final bytes: %d ", cbytes);
  213. if (cbytes > 0) {
  214. fprintf(ofile, "Ratio: %3.2f", size/(float)cbytes);
  215. }
  216. fprintf(ofile, "\n");
  217. /* Compressor was unable to compress. Copy the buffer manually. */
  218. if (cbytes == 0) {
  219. for (j = 0; j < nchunks; j++) {
  220. memcpy(dest[j], src, size);
  221. }
  222. }
  223. blosc_set_timestamp(&last);
  224. for (i = 0; i < niter; i++) {
  225. for (j = 0; j < nchunks; j++) {
  226. if (cbytes == 0) {
  227. memcpy(dest2, dest[j], size);
  228. nbytes = size;
  229. }
  230. else {
  231. nbytes = blosc_decompress(dest[j], dest2, size);
  232. }
  233. }
  234. }
  235. blosc_set_timestamp(&current);
  236. tunshuf = get_usec_chunk(last, current, niter, nchunks);
  237. fprintf(ofile, "decomp(read):\t %6.1f us, %.1f MB/s\t ",
  238. tunshuf, (nbytes * 1e6) / (tunshuf*MB));
  239. if (nbytes < 0) {
  240. fprintf(ofile, "FAILED. Error code: %d\n", nbytes);
  241. }
  242. /* fprintf(ofile, "Orig bytes: %d\tFinal bytes: %d\n", cbytes, nbytes); */
  243. /* Check if data has had a good roundtrip.
  244. Byte-by-byte comparison is slow, so use 'memcmp' to check whether the
  245. roundtripped data is correct. If not, fall back to the slow path to
  246. print diagnostic messages. */
  247. orig = (unsigned char *)srccpy;
  248. round = (unsigned char *)dest2;
  249. if (memcmp(orig, round, size) != 0)
  250. {
  251. for(i = 0; i<size; ++i){
  252. if (orig[i] != round[i]) {
  253. fprintf(ofile, "\nError: Original data and round-trip do not match in pos %d\n",
  254. (int)i);
  255. fprintf(ofile, "Orig--> %x, round-trip--> %x\n", orig[i], round[i]);
  256. break;
  257. }
  258. }
  259. }
  260. else { i = size; }
  261. if (i == size) fprintf(ofile, "OK\n");
  262. } /* End clevel loop */
  263. /* To compute the totalsize, we should take into account the 10
  264. compression levels */
  265. totalsize += (size * nchunks * niter * 10.);
  266. aligned_free(src); free(srccpy); aligned_free(dest2);
  267. for (i = 0; i < nchunks; i++) {
  268. aligned_free(dest[i]);
  269. }
  270. }
  271. /* Compute a sensible value for nchunks */
  272. int get_nchunks(int size_, int ws) {
  273. int nchunks;
  274. nchunks = ws / size_;
  275. if (nchunks > NCHUNKS) nchunks = NCHUNKS;
  276. if (nchunks < 1) nchunks = 1;
  277. return nchunks;
  278. }
  279. void print_compress_info(void)
  280. {
  281. char *name = NULL, *version = NULL;
  282. int ret;
  283. printf("Blosc version: %s (%s)\n", BLOSC_VERSION_STRING, BLOSC_VERSION_DATE);
  284. printf("List of supported compressors in this build: %s\n",
  285. blosc_list_compressors());
  286. printf("Supported compression libraries:\n");
  287. ret = blosc_get_complib_info("blosclz", &name, &version);
  288. if (ret >= 0) printf(" %s: %s\n", name, version);
  289. ret = blosc_get_complib_info("lz4", &name, &version);
  290. if (ret >= 0) printf(" %s: %s\n", name, version);
  291. ret = blosc_get_complib_info("snappy", &name, &version);
  292. if (ret >= 0) printf(" %s: %s\n", name, version);
  293. ret = blosc_get_complib_info("zlib", &name, &version);
  294. if (ret >= 0) printf(" %s: %s\n", name, version);
  295. }
  296. int main(int argc, char *argv[]) {
  297. char compressor[32];
  298. char shuffle[32] = "shuffle";
  299. char bsuite[32];
  300. int single = 1;
  301. int suite = 0;
  302. int hard_suite = 0;
  303. int extreme_suite = 0;
  304. int debug_suite = 0;
  305. int nthreads = 4; /* The number of threads */
  306. int size = 2*MB; /* Buffer size */
  307. int elsize = 8; /* Datatype size */
  308. int rshift = 19; /* Significant bits */
  309. int workingset = 256*MB; /* The maximum allocated memory */
  310. int nthreads_, size_, elsize_, rshift_, i;
  311. FILE * output_file = stdout;
  312. blosc_timestamp_t last, current;
  313. float totaltime;
  314. char usage[256];
  315. print_compress_info();
  316. strncpy(usage, "Usage: bench [blosclz | lz4 | lz4hc | snappy | zlib] "
  317. "[noshuffle | shuffle | bitshuffle] "
  318. "[single | suite | hardsuite | extremesuite | debugsuite] "
  319. "[nthreads] [bufsize(bytes)] [typesize] [sbits]", 255);
  320. if (argc < 2) {
  321. printf("%s\n", usage);
  322. exit(1);
  323. }
  324. strcpy(compressor, argv[1]);
  325. if (strcmp(compressor, "blosclz") != 0 &&
  326. strcmp(compressor, "lz4") != 0 &&
  327. strcmp(compressor, "lz4hc") != 0 &&
  328. strcmp(compressor, "snappy") != 0 &&
  329. strcmp(compressor, "zlib") != 0) {
  330. printf("No such compressor: '%s'\n", compressor);
  331. printf("%s\n", usage);
  332. exit(2);
  333. }
  334. if (argc >= 3) {
  335. strcpy(shuffle, argv[2]);
  336. if (strcmp(shuffle, "shuffle") != 0 &&
  337. strcmp(shuffle, "bitshuffle") != 0 &&
  338. strcmp(shuffle, "noshuffle") != 0) {
  339. printf("No such shuffler: '%s'\n", shuffle);
  340. printf("%s\n", usage);
  341. exit(2);
  342. }
  343. }
  344. if (argc < 4)
  345. strcpy(bsuite, "single");
  346. else
  347. strcpy(bsuite, argv[3]);
  348. if (strcmp(bsuite, "single") == 0) {
  349. single = 1;
  350. }
  351. else if (strcmp(bsuite, "test") == 0) {
  352. single = 1;
  353. workingset = 128*MB;
  354. }
  355. else if (strcmp(bsuite, "suite") == 0) {
  356. suite = 1;
  357. }
  358. else if (strcmp(bsuite, "hardsuite") == 0) {
  359. hard_suite = 1;
  360. workingset = 64*MB;
  361. /* Values here are ending points for loops */
  362. nthreads = 2;
  363. size = 8*MB;
  364. elsize = 32;
  365. rshift = 32;
  366. }
  367. else if (strcmp(bsuite, "extremesuite") == 0) {
  368. extreme_suite = 1;
  369. workingset = 32*MB;
  370. niter = 1;
  371. /* Values here are ending points for loops */
  372. nthreads = 4;
  373. size = 16*MB;
  374. elsize = 32;
  375. rshift = 32;
  376. }
  377. else if (strcmp(bsuite, "debugsuite") == 0) {
  378. debug_suite = 1;
  379. workingset = 32*MB;
  380. niter = 1;
  381. /* Warning: values here are starting points for loops. This is
  382. useful for debugging. */
  383. nthreads = 1;
  384. size = 16*KB;
  385. elsize = 1;
  386. rshift = 0;
  387. }
  388. else {
  389. printf("%s\n", usage);
  390. exit(1);
  391. }
  392. printf("Using compressor: %s\n", compressor);
  393. printf("Using shuffle type: %s\n", shuffle);
  394. printf("Running suite: %s\n", bsuite);
  395. if (argc >= 5) {
  396. nthreads = atoi(argv[4]);
  397. }
  398. if (argc >= 6) {
  399. size = atoi(argv[5]);
  400. }
  401. if (argc >= 7) {
  402. elsize = atoi(argv[6]);
  403. }
  404. if (argc >= 8) {
  405. rshift = atoi(argv[7]);
  406. }
  407. if ((argc >= 9) || !(single || suite || hard_suite || extreme_suite)) {
  408. printf("%s\n", usage);
  409. exit(1);
  410. }
  411. nchunks = get_nchunks(size, workingset);
  412. blosc_set_timestamp(&last);
  413. blosc_init();
  414. if (suite) {
  415. for (nthreads_=1; nthreads_ <= nthreads; nthreads_++) {
  416. do_bench(compressor, shuffle, nthreads_, size, elsize, rshift, output_file);
  417. }
  418. }
  419. else if (hard_suite) {
  420. /* Let's start the rshift loop by 4 so that 19 is visited. This
  421. is to allow a direct comparison with the plain suite, that runs
  422. precisely at 19 significant bits. */
  423. for (rshift_ = 4; rshift_ <= rshift; rshift_ += 5) {
  424. for (elsize_ = 1; elsize_ <= elsize; elsize_ *= 2) {
  425. /* The next loop is for getting sizes that are not power of 2 */
  426. for (i = -elsize_; i <= elsize_; i += elsize_) {
  427. for (size_ = 32*KB; size_ <= size; size_ *= 2) {
  428. nchunks = get_nchunks(size_+i, workingset);
  429. niter = 1;
  430. for (nthreads_ = 1; nthreads_ <= nthreads; nthreads_++) {
  431. do_bench(compressor, shuffle, nthreads_, size_+i, elsize_, rshift_, output_file);
  432. blosc_set_timestamp(&current);
  433. totaltime = (float)getseconds(last, current);
  434. printf("Elapsed time:\t %6.1f s. Processed data: %.1f GB\n",
  435. totaltime, totalsize / GB);
  436. }
  437. }
  438. }
  439. }
  440. }
  441. }
  442. else if (extreme_suite) {
  443. for (rshift_ = 0; rshift_ <= rshift; rshift_++) {
  444. for (elsize_ = 1; elsize_ <= elsize; elsize_++) {
  445. /* The next loop is for getting sizes that are not power of 2 */
  446. for (i = -elsize_*2; i <= elsize_*2; i += elsize_) {
  447. for (size_ = 32*KB; size_ <= size; size_ *= 2) {
  448. nchunks = get_nchunks(size_+i, workingset);
  449. for (nthreads_ = 1; nthreads_ <= nthreads; nthreads_++) {
  450. do_bench(compressor, shuffle, nthreads_, size_+i, elsize_, rshift_, output_file);
  451. blosc_set_timestamp(&current);
  452. totaltime = (float)getseconds(last, current);
  453. printf("Elapsed time:\t %6.1f s. Processed data: %.1f GB\n",
  454. totaltime, totalsize / GB);
  455. }
  456. }
  457. }
  458. }
  459. }
  460. }
  461. else if (debug_suite) {
  462. for (rshift_ = rshift; rshift_ <= 32; rshift_++) {
  463. for (elsize_ = elsize; elsize_ <= 32; elsize_++) {
  464. /* The next loop is for getting sizes that are not power of 2 */
  465. for (i = -elsize_*2; i <= elsize_*2; i += elsize_) {
  466. for (size_ = size; size_ <= 16*MB; size_ *= 2) {
  467. nchunks = get_nchunks(size_+i, workingset);
  468. for (nthreads_ = nthreads; nthreads_ <= 6; nthreads_++) {
  469. do_bench(compressor, shuffle, nthreads_, size_+i, elsize_, rshift_, output_file);
  470. blosc_set_timestamp(&current);
  471. totaltime = (float)getseconds(last, current);
  472. printf("Elapsed time:\t %6.1f s. Processed data: %.1f GB\n",
  473. totaltime, totalsize / GB);
  474. }
  475. }
  476. }
  477. }
  478. }
  479. }
  480. /* Single mode */
  481. else {
  482. do_bench(compressor, shuffle, nthreads, size, elsize, rshift, output_file);
  483. }
  484. /* Print out some statistics */
  485. blosc_set_timestamp(&current);
  486. totaltime = (float)getseconds(last, current);
  487. printf("\nRound-trip compr/decompr on %.1f GB\n", totalsize / GB);
  488. printf("Elapsed time:\t %6.1f s, %.1f MB/s\n",
  489. totaltime, totalsize*2*1.1/(MB*totaltime));
  490. /* Free blosc resources */
  491. blosc_free_resources();
  492. blosc_destroy();
  493. return 0;
  494. }