1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950 |
- # Benchmark for evaluate best ways to write to a PyTables Table
- import os
- import bcolz
- import tables as tb
- import numpy as np
- from time import time
- filepath = 'tohdf5.h5'
- nodepath = '/ctable'
- NR = int(1e6)
- NC = 10
- dsize = (NR * NC * 4) / 2. ** 30
- bcolz.cparams.setdefaults(clevel=5)
- a = bcolz.arange(NR, dtype='i4')
- ct = bcolz.ctable((a,)*NC)
- # Row-by-row using an iterator
- # t0 = time()
- # f = tb.open_file(filepath, 'w')
- # t = f.create_table(f.root, nodepath[1:], ct.dtype)
- # for row in ct:
- # t.append([row])
- # f.close()
- # tt = time() - t0
- # print("time with iterator: %.2f (%.2f GB/s)" % (tt, dsize / tt))
- # Using blocked write
- t0 = time()
- f = tb.open_file(filepath, 'w')
- t = f.create_table(f.root, nodepath[1:], ct.dtype)
- for block in bcolz.iterblocks(ct):
- t.append(block)
- f.close()
- tt = time() - t0
- print("time with blocked write: %.2f (%.2f GB/s)" % (tt, dsize / tt))
- # Using generic implementation
- os.remove(filepath)
- t0 = time()
- #ct.tohdf5(filepath, nodepath)
- ct.tohdf5(filepath, nodepath, cname="blosc:blosclz")
- tt = time() - t0
- print("time with tohdf5: %.2f (%.2f GB/s)" % (tt, dsize / tt))
- #print(repr(ct))
|