123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566 |
- # Benchmark to compare the times for evaluating queries.
- # Numexpr is needed in order to execute this.
- import math
- from time import time
- import numpy as np
- import bcolz
- N = int(1e8) # the number of elements in x
- clevel = 9 # the compression level
- cname = "blosclz" # the compressor name
- sexpr = "(x+1)<10" # small number of items
- # sexpr = "(x+1)<1000000" # large number
- # sexpr = "(2*x*x*x+.3*y**2+z+1)<10" # small number
- #sexpr = "(2*x*x*x+.3*y**2+z+1)<1e15" # medium number
- #sexpr = "(2*x*x*x+.3*y**2+z+1)<1e20" # large number
- print("Creating inputs...")
- cparams = bcolz.cparams(clevel=clevel, cname=cname)
- x = np.arange(N)
- cx = bcolz.carray(x, cparams=cparams)
- if 'y' not in sexpr:
- t = bcolz.ctable((cx,), names=['x'])
- else:
- y = np.arange(N)
- z = np.arange(N)
- cy = bcolz.carray(y, cparams=cparams)
- cz = bcolz.carray(z, cparams=cparams)
- t = bcolz.ctable((cx, cy, cz), names=['x', 'y', 'z'])
- nt = t[:]
- print("Querying '%s' with 10^%d points" % (sexpr, int(math.log10(N))))
- t0 = time()
- out = [r for r in x[eval(sexpr)]]
- print("Time for numpy--> %.3f" % (time() - t0,))
- t0 = time()
- out = [r for r in t[eval(sexpr)]]
- print("Time for structured array--> %.3f" % (time() - t0,))
- t0 = time()
- out = [r for r in cx[sexpr]]
- print("Time for carray --> %.3f" % (time() - t0,))
- # Uncomment the next for disabling threading
- #ne.set_num_threads(1)
- #bcolz.blosc_set_num_threads(1)
- # Seems that this works better if we dividw the number of cores by 2.
- # Maybe due to some contention between Numexpr and Blosc?
- #bcolz.set_num_threads(bcolz.ncores//2)
- t0 = time()
- #cout = t[t.eval(sexpr, cparams=cparams)]
- cout = [r for r in t.where(sexpr)]
- #cout = [r['x'] for r in t.where(sexpr)]
- #cout = [r['y'] for r in t.where(sexpr, colnames=['x', 'y'])]
- print("Time for ctable--> %.3f" % (time() - t0,))
- print("cout-->", len(cout), cout[:10])
- #assert_array_equal(out, cout, "Arrays are not equal")
|