123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120 |
- from __future__ import print_function
- import itertools
- import numpy as np
- import numexpr as ne
- import bcolz
- import time
- import cProfile
- import inspect
- print("numexpr version:", ne.__version__)
- bcolz.defaults.cparams['shuffle'] = bcolz.SHUFFLE
- #bcolz.defaults.cparams['shuffle'] = bcolz.BITSHUFFLE
- bcolz.defaults.cparams['cname'] = 'blosclz'
- #bcolz.defaults.cparams['cname'] = 'lz4'
- bcolz.defaults.cparams['clevel'] = 5
- #bcolz.defaults.vm = "dask"
- #bcolz.defaults.vm = "python"
- bcolz.defaults.vm = "numexpr"
- N = 1e8
- LMAX = 1e3
- npa = np.arange(N)
- npb = np.arange(N)
- ct = bcolz.ctable([npa, npb], names=["a", "b"])
- def do_cprofile(func):
- def profiled_func(*args, **kwargs):
- profile = cProfile.Profile()
- try:
- profile.enable()
- result = func(*args, **kwargs)
- profile.disable()
- return result
- finally:
- profile.print_stats(sort='cumulative')
- return profiled_func
- def timefunc(f):
- def f_timer(*args, **kwargs):
- start = time.time()
- result = f(*args, **kwargs)
- end = time.time()
- print(f.__name__, 'took', round(end - start, 3), 'sec')
- return result
- return f_timer
- @timefunc
- def where_numpy():
- return sum(npa[i] for i in np.where((npa > 5) & (npb < LMAX))[0])
- @timefunc
- def where_numexpr():
- return sum(npa[i] for i in np.where(
- ne.evaluate('(npa > 5) & (npb < LMAX)'))[0])
- @timefunc
- #@do_cprofile
- def bcolz_where():
- return sum(r.a for r in ct.where("(a > 5) & (b < LMAX)"))
- @timefunc
- #@do_cprofile
- def bcolz_where_numpy():
- return sum(r.a for r in ct.where("(npa > 5) & (npb < LMAX)"))
- @timefunc
- #@do_cprofile
- def bcolz_where_numexpr():
- return sum(r.a for r in ct.where(ne.evaluate("(npa > 5) & (npb < LMAX)")))
- @timefunc
- #@do_cprofile
- def whereblocks():
- sum = 0.
- for r in ct.whereblocks("(a > 5) & (b < LMAX)", blen=None):
- sum += r['a'].sum()
- return sum
- @timefunc
- #@do_cprofile
- def fetchwhere_bcolz():
- return ct.fetchwhere("(a > 5) & (b < LMAX)", out_flavor='bcolz')['a'].sum()
- @timefunc
- #@do_cprofile
- def fetchwhere_numpy():
- return ct.fetchwhere("(a > 5) & (b < LMAX)", out_flavor='numpy')['a'].sum()
- @timefunc
- #@do_cprofile
- def fetchwhere_dask():
- result = ct.fetchwhere("(a > 5) & (b < LMAX)", vm="dask")['a'].sum()
- return result
- print(repr(ct))
- a0 = where_numpy()
- print("a0:", a0)
- a1 = where_numexpr()
- assert a0 == a1
- a1 = bcolz_where()
- assert a0 == a1
- a1 = bcolz_where_numpy()
- assert a0 == a1
- a1 = bcolz_where_numexpr()
- assert a0 == a1
- a1 = whereblocks()
- assert a0 == a1
- a1 = fetchwhere_bcolz()
- assert a0 == a1
- a1 = fetchwhere_numpy()
- assert a0 == a1
- a1 = fetchwhere_dask()
- assert a0 == a1
|