123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384 |
- # Benchmark that compares the times for concatenating arrays with
- # compressed arrays vs plain numpy arrays. The 'numpy' and 'concat'
- # styles are for regular numpy arrays, while 'carray' is for carrays.
- #
- # Call this benchmark as:
- #
- # python bench/concat.py style
- #
- # where `style` can be any of 'numpy', 'concat' or 'bcolsz'
- #
- # You can modify other parameters from the command line if you want:
- #
- # python bench/concat.py style arraysize nchunks nrepeats clevel
- #
- from __future__ import absolute_import
- import sys
- import math
- import time
- import numpy
- import bcolz
- from bcolz.py2help import xrange
- def concat(data):
- tlen = sum(x.shape[0] for x in data)
- alldata = numpy.empty((tlen,))
- pos = 0
- for x in data:
- step = x.shape[0]
- alldata[pos:pos + step] = x
- pos += step
- return alldata
- def append(data, clevel):
- alldata = bcolz.carray(data[0], cparams=bcolz.cparams(clevel))
- for carr in data[1:]:
- alldata.append(carr)
- return alldata
- if len(sys.argv) < 2:
- print("Pass at least one of these styles: 'numpy', 'concat' or 'bcolz' ")
- sys.exit(1)
- style = sys.argv[1]
- if len(sys.argv) == 2:
- N, K, T, clevel = (1000000, 10, 3, 1)
- else:
- N, K, T = [int(arg) for arg in sys.argv[2:5]]
- if len(sys.argv) > 5:
- clevel = int(sys.argv[5])
- else:
- clevel = 0
- # The next datasets allow for very high compression ratios
- a = [numpy.arange(N, dtype='f8') for _ in range(K)]
- print("problem size: (%d) x %d = 10^%g" % (N, K, math.log10(N * K)))
- t = time.time()
- if style == 'numpy':
- for _ in xrange(T):
- r = numpy.concatenate(a, 0)
- elif style == 'concat':
- for _ in xrange(T):
- r = concat(a)
- elif style == 'bcolz':
- for _ in xrange(T):
- r = append(a, clevel)
- t = time.time() - t
- print('time for concat: %.3fs' % (t / T))
- if style == 'bcolz':
- size = r.cbytes
- else:
- size = r.size * r.dtype.itemsize
- print("size of the final container: %.3f MB" % (size / float(1024 * 1024)) )
|