concat.py 2.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384
  1. # Benchmark that compares the times for concatenating arrays with
  2. # compressed arrays vs plain numpy arrays. The 'numpy' and 'concat'
  3. # styles are for regular numpy arrays, while 'carray' is for carrays.
  4. #
  5. # Call this benchmark as:
  6. #
  7. # python bench/concat.py style
  8. #
  9. # where `style` can be any of 'numpy', 'concat' or 'bcolsz'
  10. #
  11. # You can modify other parameters from the command line if you want:
  12. #
  13. # python bench/concat.py style arraysize nchunks nrepeats clevel
  14. #
  15. from __future__ import absolute_import
  16. import sys
  17. import math
  18. import time
  19. import numpy
  20. import bcolz
  21. from bcolz.py2help import xrange
  22. def concat(data):
  23. tlen = sum(x.shape[0] for x in data)
  24. alldata = numpy.empty((tlen,))
  25. pos = 0
  26. for x in data:
  27. step = x.shape[0]
  28. alldata[pos:pos + step] = x
  29. pos += step
  30. return alldata
  31. def append(data, clevel):
  32. alldata = bcolz.carray(data[0], cparams=bcolz.cparams(clevel))
  33. for carr in data[1:]:
  34. alldata.append(carr)
  35. return alldata
  36. if len(sys.argv) < 2:
  37. print("Pass at least one of these styles: 'numpy', 'concat' or 'bcolz' ")
  38. sys.exit(1)
  39. style = sys.argv[1]
  40. if len(sys.argv) == 2:
  41. N, K, T, clevel = (1000000, 10, 3, 1)
  42. else:
  43. N, K, T = [int(arg) for arg in sys.argv[2:5]]
  44. if len(sys.argv) > 5:
  45. clevel = int(sys.argv[5])
  46. else:
  47. clevel = 0
  48. # The next datasets allow for very high compression ratios
  49. a = [numpy.arange(N, dtype='f8') for _ in range(K)]
  50. print("problem size: (%d) x %d = 10^%g" % (N, K, math.log10(N * K)))
  51. t = time.time()
  52. if style == 'numpy':
  53. for _ in xrange(T):
  54. r = numpy.concatenate(a, 0)
  55. elif style == 'concat':
  56. for _ in xrange(T):
  57. r = concat(a)
  58. elif style == 'bcolz':
  59. for _ in xrange(T):
  60. r = append(a, clevel)
  61. t = time.time() - t
  62. print('time for concat: %.3fs' % (t / T))
  63. if style == 'bcolz':
  64. size = r.cbytes
  65. else:
  66. size = r.size * r.dtype.itemsize
  67. print("size of the final container: %.3f MB" % (size / float(1024 * 1024)) )