123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113 |
- # Benchmark that compares the times for concatenating arrays with
- # compressed arrays vs plain numpy arrays. The 'numpy' and 'concat'
- # styles are for regular numpy arrays, while 'carray' is for carrays.
- #
- # Call this benchmark as:
- #
- # python bench/concat.py style
- #
- # where `style` can be any of 'numpy', 'concat' or 'bcolsz'
- #
- # You can modify other parameters from the command line if you want:
- #
- # python bench/concat.py style arraysize nchunks nrepeats clevel
- #
- from __future__ import absolute_import
- import sys
- import math
- import time
- import numpy
- import bcolz
- from bcolz.py2help import xrange
- from .bench_helper import ctime
- def concat(data):
- tlen = sum(x.shape[0] for x in data)
- alldata = numpy.empty((tlen,))
- pos = 0
- for x in data:
- step = x.shape[0]
- alldata[pos:pos + step] = x
- pos += step
- return alldata
- def append(data, clevel):
- alldata = bcolz.carray(data[0], cparams=bcolz.cparams(clevel))
- for carr in data[1:]:
- alldata.append(carr)
- return alldata
- class Suite:
- a = None
- N = 1000000
- K = 10
- T = 3
- clevel = 1
- style = 'bcolz'
- r = None
- def __init__(self, N=1000000, K=10, T=3, clevel=1, style='bcolz'):
- Suite.N = N
- Suite.K = K
- Suite.T = T
- Suite.clevel = clevel
- Suite.style = style
- Suite.r = None
- def setup(self):
- # The next datasets allow for very high compression ratios
- Suite.a = [numpy.arange(Suite.N, dtype='f8') for _ in range(Suite.K)]
- print("problem size: (%d) x %d = 10^%g" % (Suite.N, Suite.K,
- math.log10(Suite.N * Suite.K)))
- def time_concatenate(self):
- if Suite.style == 'numpy':
- for _ in xrange(Suite.T):
- Suite.r = numpy.concatenate(Suite.a, 0)
- elif Suite.style == 'concat':
- for _ in xrange(Suite.T):
- Suite.r = concat(Suite.a)
- elif Suite.style == 'bcolz':
- for _ in xrange(Suite.T):
- Suite.r = append(Suite.a, Suite.clevel)
- def print_container_size(self):
- if Suite.style == 'bcolz':
- size = Suite.r.cbytes
- else:
- size = Suite.r.size * Suite.r.dtype.itemsize
- print("size of the final container: %.3f MB" %
- (size / float(1024 * 1024)))
- if __name__ == '__main__':
- if len(sys.argv) < 2:
- print(
- "Pass at least one of these styles: 'numpy', 'concat' or 'bcolz' ")
- sys.exit(1)
- style = sys.argv[1]
- if len(sys.argv) == 2:
- N, K, T, clevel = (1000000, 10, 3, 1)
- else:
- N, K, T = [int(arg) for arg in sys.argv[2:5]]
- if len(sys.argv) > 5:
- clevel = int(sys.argv[5])
- else:
- clevel = 0
- # run benchmark
- suite = Suite(N=N, K=K, T=T, clevel=clevel, style=style)
- suite.setup()
- with ctime("time_concatenate"):
- suite.time_concatenate()
- suite.print_container_size()
|