concat.py 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
  1. # Benchmark that compares the times for concatenating arrays with
  2. # compressed arrays vs plain numpy arrays. The 'numpy' and 'concat'
  3. # styles are for regular numpy arrays, while 'carray' is for carrays.
  4. #
  5. # Call this benchmark as:
  6. #
  7. # python bench/concat.py style
  8. #
  9. # where `style` can be any of 'numpy', 'concat' or 'bcolsz'
  10. #
  11. # You can modify other parameters from the command line if you want:
  12. #
  13. # python bench/concat.py style arraysize nchunks nrepeats clevel
  14. #
  15. from __future__ import absolute_import
  16. import sys
  17. import math
  18. import time
  19. import numpy
  20. import bcolz
  21. from bcolz.py2help import xrange
  22. from .bench_helper import ctime
  23. def concat(data):
  24. tlen = sum(x.shape[0] for x in data)
  25. alldata = numpy.empty((tlen,))
  26. pos = 0
  27. for x in data:
  28. step = x.shape[0]
  29. alldata[pos:pos + step] = x
  30. pos += step
  31. return alldata
  32. def append(data, clevel):
  33. alldata = bcolz.carray(data[0], cparams=bcolz.cparams(clevel))
  34. for carr in data[1:]:
  35. alldata.append(carr)
  36. return alldata
  37. class Suite:
  38. a = None
  39. N = 1000000
  40. K = 10
  41. T = 3
  42. clevel = 1
  43. style = 'bcolz'
  44. r = None
  45. def __init__(self, N=1000000, K=10, T=3, clevel=1, style='bcolz'):
  46. Suite.N = N
  47. Suite.K = K
  48. Suite.T = T
  49. Suite.clevel = clevel
  50. Suite.style = style
  51. Suite.r = None
  52. def setup(self):
  53. # The next datasets allow for very high compression ratios
  54. Suite.a = [numpy.arange(Suite.N, dtype='f8') for _ in range(Suite.K)]
  55. print("problem size: (%d) x %d = 10^%g" % (Suite.N, Suite.K,
  56. math.log10(Suite.N * Suite.K)))
  57. def time_concatenate(self):
  58. if Suite.style == 'numpy':
  59. for _ in xrange(Suite.T):
  60. Suite.r = numpy.concatenate(Suite.a, 0)
  61. elif Suite.style == 'concat':
  62. for _ in xrange(Suite.T):
  63. Suite.r = concat(Suite.a)
  64. elif Suite.style == 'bcolz':
  65. for _ in xrange(Suite.T):
  66. Suite.r = append(Suite.a, Suite.clevel)
  67. def print_container_size(self):
  68. if Suite.style == 'bcolz':
  69. size = Suite.r.cbytes
  70. else:
  71. size = Suite.r.size * Suite.r.dtype.itemsize
  72. print("size of the final container: %.3f MB" %
  73. (size / float(1024 * 1024)))
  74. if __name__ == '__main__':
  75. if len(sys.argv) < 2:
  76. print(
  77. "Pass at least one of these styles: 'numpy', 'concat' or 'bcolz' ")
  78. sys.exit(1)
  79. style = sys.argv[1]
  80. if len(sys.argv) == 2:
  81. N, K, T, clevel = (1000000, 10, 3, 1)
  82. else:
  83. N, K, T = [int(arg) for arg in sys.argv[2:5]]
  84. if len(sys.argv) > 5:
  85. clevel = int(sys.argv[5])
  86. else:
  87. clevel = 0
  88. # run benchmark
  89. suite = Suite(N=N, K=K, T=T, clevel=clevel, style=style)
  90. suite.setup()
  91. with ctime("time_concatenate"):
  92. suite.time_concatenate()
  93. suite.print_container_size()