# Benchmark that compares the times for concatenating arrays with
# compressed arrays vs plain numpy arrays.  The 'numpy' and 'concat'
# styles are for regular numpy arrays, while 'carray' is for carrays.
#
# Call this benchmark as:
#
# python bench/concat.py style
#
# where `style` can be any of 'numpy', 'concat' or 'bcolsz'
#
# You can modify other parameters from the command line if you want:
#
# python bench/concat.py style arraysize nchunks nrepeats clevel
#

from __future__ import absolute_import

import sys
import math
import time

import numpy

import bcolz
from bcolz.py2help import xrange


def concat(data):
    tlen = sum(x.shape[0] for x in data)
    alldata = numpy.empty((tlen,))
    pos = 0
    for x in data:
        step = x.shape[0]
        alldata[pos:pos + step] = x
        pos += step

    return alldata


def append(data, clevel):
    alldata = bcolz.carray(data[0], cparams=bcolz.cparams(clevel))
    for carr in data[1:]:
        alldata.append(carr)

    return alldata


if len(sys.argv) < 2:
    print("Pass at least one of these styles: 'numpy', 'concat' or 'bcolz' ")
    sys.exit(1)

style = sys.argv[1]
if len(sys.argv) == 2:
    N, K, T, clevel = (1000000, 10, 3, 1)
else:
    N, K, T = [int(arg) for arg in sys.argv[2:5]]
    if len(sys.argv) > 5:
        clevel = int(sys.argv[5])
    else:
        clevel = 0

# The next datasets allow for very high compression ratios
a = [numpy.arange(N, dtype='f8') for _ in range(K)]
print("problem size: (%d) x %d = 10^%g" % (N, K, math.log10(N * K)))

t = time.time()
if style == 'numpy':
    for _ in xrange(T):
        r = numpy.concatenate(a, 0)
elif style == 'concat':
    for _ in xrange(T):
        r = concat(a)
elif style == 'bcolz':
    for _ in xrange(T):
        r = append(a, clevel)

t = time.time() - t
print('time for concat: %.3fs' % (t / T))

if style == 'bcolz':
    size = r.cbytes
else:
    size = r.size * r.dtype.itemsize
print("size of the final container: %.3f MB" % (size / float(1024 * 1024)) )