column_iter.py 1.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374
  1. import bcolz
  2. import numpy
  3. from .bench_helper import ctime
  4. N = 1000 * 1000
  5. ct = bcolz.fromiter(((i, i * i, i * i * i)
  6. for i in xrange(N)), dtype='i8,i8,i8', count=N)
  7. b = numpy.array(numpy.arange(N) % 2, dtype="bool")
  8. c = bcolz.carray(b)
  9. sorted_index = range(1, N, 2)
  10. class Suite:
  11. def time_tolist(self):
  12. return (ct['f0'][sorted_index]).tolist()
  13. def time_where_01(self):
  14. return [x.f0 for x in ct.where(b)]
  15. def time_where_02(self):
  16. return [x.f0 for x in ct.where(c)]
  17. def time_where_03(self):
  18. return [x for x in ct['f0'].where(b)]
  19. def time_where_04(self):
  20. return [x for x in ct['f0'].where(c)]
  21. def time_sum_01(self):
  22. return sum([x for x in ct['f0'].where(c)])
  23. def time_sum_02(self):
  24. return sum(x for x in ct['f0'].where(c))
  25. def time_sum_03(self):
  26. return bcolz.fromiter((x for x in ct['f0'].where(c)),
  27. dtype=ct['f0'].dtype, count=c.wheretrue().sum()).sum()
  28. def time_sum_na_01(self):
  29. # sum with no NA's
  30. # x==x check to leave out NA values
  31. return sum([x for x in ct['f0'].where(c) if x == x])
  32. def time_sum_na_02(self):
  33. # sum with no NA's
  34. # x==x check to leave out NA values
  35. return sum((x for x in ct['f0'].where(c) if x == x))
  36. if __name__ == '__main__':
  37. s = Suite()
  38. with ctime("time_sum_01"):
  39. s.time_sum_01()
  40. with ctime("time_sum_02"):
  41. s.time_sum_02()
  42. with ctime("time_sum_03"):
  43. s.time_sum_03()
  44. with ctime("time_sum_na_01"):
  45. s.time_sum_na_01()
  46. with ctime("time_sum_na_02"):
  47. s.time_sum_na_02()
  48. with ctime("time_tolist"):
  49. s.time_tolist()
  50. with ctime("time_where_01"):
  51. s.time_where_01()
  52. with ctime("time_where_02"):
  53. s.time_where_02()
  54. with ctime("time_where_03"):
  55. s.time_where_03()
  56. with ctime("time_where_04"):
  57. s.time_where_04()