From: kevin l. <lke...@ya...> - 2005-01-24 08:33:15
|
I thought I'd provide this too. Three things are different... 150000nrows; I changed the query from where(r['H1']<3) to where(r['H1']>3). Notice the radical change in the indexed query. I also included the numarray ""where"" function to compare by H = table['H1']; print where(H>3). The script is also below. Nrows----> 150000*20cols Filters----> 'None' Time for standard query--> 0.18799996376 Time for inkernel query--> 0.139999866486 Time for indexed query--> 2.14100003242 Time for numarray--> 0.0310001373291 Nrows----> 150000*20cols Filters----> 'None' Time for standard query--> 0.18799996376 Time for inkernel query--> 0.155999898911 Time for indexed query--> 2.17200016975 Time for numarray--> 0.0309998989105 Nrows----> 150000*20cols Filters----> 'ZLIB' Time for standard query--> 0.25 Time for inkernel query--> 0.219000101089 Time for indexed query--> 2.45300006866 Time for numarray--> 0.0929999351501 Nrows----> 150000*20cols Filters----> 'LZO' Time for standard query--> 0.203999996185 Time for inkernel query--> 0.18700003624 Time for indexed query--> 2.28099989891 Time for numarray--> 0.0629999637604 Nrows----> 150000*20cols Filters----> 'UCL' Time for standard query--> 0.234999895096 Time for inkernel query--> 0.203000068665 Time for indexed query--> 2.32799983025 Time for numarray--> 0.0620000362396 Running 'C:\H5\klester.py' ... C:\Python24\lib\site-packages\tables\Leaf.py:90: UserWarning: zlib compression library is not available. Using zlib instead!. warnings.warn( \ Nrows----> 150000*20cols Filters----> 'None' Time for standard query--> 0.18799996376 Time for inkernel query--> 0.156000137329 Time for indexed query--> 2.17199993134 from tables import * from numarray import * class ES(IsDescription): H1 = UInt8Col(indexed=1) H2 = UInt8Col(indexed=0) M1 = Int8Col(indexed=1) S1 = Int8Col(indexed=1) BS = Int16Col(indexed=1) BP = Float32Col(indexed=1) AP = Float32Col(indexed=1) AS = Int16Col(indexed=1) L1 = Float32Col(indexed=1) V1 = Int16Col(indexed=1) A1 = Int8Col(indexed=1) UD = BoolCol(indexed=1) TV = Int64Col(indexed=1) DT = Float32Col(indexed=1) HI = Float32Col(indexed=1) LO = Float32Col(indexed=1) def create(): nrows = 150000 dat = arange(nrows*20, shape=(nrows,20), type=UInt8) date = 'd01_04_05' idxdate = 'd050104' length = (len(dat))+20 filt = Filters(complevel=1,complib='ucl',shuffle=1,fletcher32=0) file = openFile("/H5/ES_DATA139.h5",mode="w",title="ES_DATA_FILE",filters=None) root = file.root group1 = file.createGroup("/", 'd050104', 'd01_04_05') table1 = file.createTable(group1,'raw',ES,"RAW", expectedrows=length) # ATTRIBUTES--------- g1 = file.root.d050104 g1._v_attrs.date = date g1._v_attrs.idxdate = idxdate t1 = file.root.d050104.raw t1.attrs.date = date t1.attrs.idxdate = idxdate eS = table1.row for i in xrange(len(dat)): eS['H1'] = dat[i][13] eS['H2'] = dat[i][13] eS['M1'] = dat[i][14] eS['S1'] = int(dat[i][15]) eS['BS'] = dat[i][0] eS['BP'] = dat[i][1] eS['AP'] = dat[i][2] eS['AS'] = dat[i][3] eS['L1'] = dat[i][4] eS['V1'] = dat[i][5] eS['UD'] = dat[i][12] eS['A1'] = dat[i][11] eS['TV'] = dat[i][8] eS['DT'] = dat[i][16] eS['HI'] = dat[i][6] eS['LO'] = dat[i][7] eS.append() table1.flush() file.close() def select(): from time import time print 'Nrows----> 150000*20cols' print "Filters----> 'None'" file = openFile("/H5/ES_DATA139.h5") table = file.root.d050104.raw t1=time() results = [r["H1"] for r in table if r['H1']>3] print "Time for standard query-->", time()-t1 t1=time() results = [r["H2"] for r in table.where(table.cols.H2>3)] print "Time for inkernel query-->", time()-t1 t1=time() results = [r["H1"] for r in table.where(table.cols.H1>3)] print "Time for indexed query-->", time()-t1 t1 = time() h = table['H1'] results = where(h>3)[0] print 'Time for numarray-->', time()-t1 file.close() if __name__ == '__main__': create() select() __________________________________ Do you Yahoo!? Yahoo! Mail - Easier than ever with enhanced search. Learn more. http://info.mail.yahoo.com/mail_250 |