pytables-tohdf5.py 1.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
  1. # Benchmark for evaluate best ways to write to a PyTables Table
  2. import os
  3. import bcolz
  4. import tables as tb
  5. import numpy as np
  6. from time import time
  7. filepath = 'tohdf5.h5'
  8. nodepath = '/ctable'
  9. NR = int(1e6)
  10. NC = 10
  11. dsize = (NR * NC * 4) / 2. ** 30
  12. bcolz.cparams.setdefaults(clevel=5)
  13. a = bcolz.arange(NR, dtype='i4')
  14. ct = bcolz.ctable((a,)*NC)
  15. # Row-by-row using an iterator
  16. # t0 = time()
  17. # f = tb.open_file(filepath, 'w')
  18. # t = f.create_table(f.root, nodepath[1:], ct.dtype)
  19. # for row in ct:
  20. # t.append([row])
  21. # f.close()
  22. # tt = time() - t0
  23. # print("time with iterator: %.2f (%.2f GB/s)" % (tt, dsize / tt))
  24. # Using blocked write
  25. t0 = time()
  26. f = tb.open_file(filepath, 'w')
  27. t = f.create_table(f.root, nodepath[1:], ct.dtype)
  28. for block in bcolz.iterblocks(ct):
  29. t.append(block)
  30. f.close()
  31. tt = time() - t0
  32. print("time with blocked write: %.2f (%.2f GB/s)" % (tt, dsize / tt))
  33. # Using generic implementation
  34. os.remove(filepath)
  35. t0 = time()
  36. #ct.tohdf5(filepath, nodepath)
  37. ct.tohdf5(filepath, nodepath, cname="blosc:blosclz")
  38. tt = time() - t0
  39. print("time with tohdf5: %.2f (%.2f GB/s)" % (tt, dsize / tt))
  40. #print(repr(ct))