123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869 |
- from __future__ import print_function
- import sys
- import contextlib, time
- import bcolz, numpy
- PY2 = sys.version_info[0] == 2
- if not PY2:
- xrange = range
- def range(*args):
- return list(xrange(*args))
- @contextlib.contextmanager
- def ctime(label=""):
- "Counts the time spent in some context"
- t = time.time()
- yield
- print(label, round(time.time() - t, 3), "sec")
- N = 1000 * 1000
- ct = bcolz.fromiter(((i, i*i, i*i*i) for i in xrange(N)), dtype='i8,i8,i8', count=N)
- b = numpy.array(numpy.arange(N) % 2, dtype="bool")
- c = bcolz.carray(b)
- sorted_index = range(1, N, 2)
- with ctime():
- r0 = (ct['f0'][sorted_index]).tolist()
- with ctime():
- r1 = [x.f0 for x in ct.where(b)]
- assert r0 == r1
- with ctime():
- r2 = [x.f0 for x in ct.where(c)]
- assert r0 == r2
- with ctime():
- r3 = [x for x in ct['f0'].where(b)]
- assert r0 == r3
- with ctime():
- r4 = [x for x in ct['f0'].where(c)]
- assert r0 == r4
- # sum
- with ctime("sum list"):
- r5 = sum([x for x in ct['f0'].where(c)])
- with ctime("sum generator"):
- r6 = sum(x for x in ct['f0'].where(c))
- assert r5 == r6
- with ctime("sum method"):
- r7 = bcolz.fromiter((x for x in ct['f0'].where(c)),
- dtype=ct['f0'].dtype,
- count=c.wheretrue().sum()).sum()
- assert r7 == r5
- # sum with no NA's
- with ctime("sum with no NA (list)"):
- r8 = sum([x for x in ct['f0'].where(c) if x == x]) # x==x check to leave out NA values
- # sum with no NA's
- with ctime("sum with no NA (generator)"):
- r9 = sum((x for x in ct['f0'].where(c) if x == x)) # x==x check to leave out NA values
|