query.py 1.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566
  1. # Benchmark to compare the times for evaluating queries.
  2. # Numexpr is needed in order to execute this.
  3. import math
  4. from time import time
  5. import numpy as np
  6. import bcolz
  7. N = 1e7 # the number of elements in x
  8. clevel = 5 # the compression level
  9. cname = "blosclz" # the compressor name
  10. sexpr = "(x+1)<10" # small number of items
  11. # sexpr = "(x+1)<1000000" # large number
  12. sexpr = "(2*x*x*x+.3*y**2+z+1)<10" # small number
  13. #sexpr = "(2*x*x*x+.3*y**2+z+1)<1e15" # medium number
  14. #sexpr = "(2*x*x*x+.3*y**2+z+1)<1e20" # large number
  15. print("Creating inputs...")
  16. cparams = bcolz.cparams(clevel=clevel, cname=cname)
  17. x = np.arange(N)
  18. cx = bcolz.carray(x, cparams=cparams)
  19. if 'y' not in sexpr:
  20. t = bcolz.ctable((cx,), names=['x'])
  21. else:
  22. y = np.arange(N)
  23. z = np.arange(N)
  24. cy = bcolz.carray(y, cparams=cparams)
  25. cz = bcolz.carray(z, cparams=cparams)
  26. t = bcolz.ctable((cx, cy, cz), names=['x', 'y', 'z'])
  27. nt = t[:]
  28. print("Querying '%s' with 10^%d points" % (sexpr, int(math.log10(N))))
  29. t0 = time()
  30. out = [r for r in x[eval(sexpr)]]
  31. print("Time for numpy--> %.3f" % (time() - t0,))
  32. t0 = time()
  33. out = [r for r in t[eval(sexpr)]]
  34. print("Time for structured array--> %.3f" % (time() - t0,))
  35. t0 = time()
  36. out = [r for r in cx[sexpr]]
  37. print("Time for carray --> %.3f" % (time() - t0,))
  38. # Uncomment the next for disabling threading
  39. #ne.set_num_threads(1)
  40. #bcolz.blosc_set_num_threads(1)
  41. # Seems that this works better if we dividw the number of cores by 2.
  42. # Maybe due to some contention between Numexpr and Blosc?
  43. #bcolz.set_num_threads(bcolz.ncores//2)
  44. t0 = time()
  45. #cout = t[t.eval(sexpr, cparams=cparams)]
  46. cout = [r for r in t.where(sexpr)]
  47. #cout = [r['x'] for r in t.where(sexpr)]
  48. #cout = [r['y'] for r in t.where(sexpr, colnames=['x', 'y'])]
  49. print("Time for ctable--> %.3f" % (time() - t0,))
  50. print("cout-->", len(cout), cout[:10])
  51. #assert_array_equal(out, cout, "Arrays are not equal")