From: Aquil H. A. <aqu...@gm...> - 2012-12-14 06:54:17
|
Hello All, I currently use PyTables to generate a dataset that is indexed by a timestamp and a symbol. The problem that I have is that the data is stored at irregular intervals. For example: *# See below for method ts_from_str* data=[{'text_ts':'2012-01-04T15:00:00Z', 'symbol':'APPL', 'price':689.00, 'timestamp':ts_from_str('2012-01-04T15:00:00Z')}, {'text_ts':'2012-01-04T15:11:00Z', 'symbol':'APPL', 'price':687.24, 'timestamp':ts_from_str('2012-01-04T15:11:00Z')}, {'text_ts':'2012-01-05T15:33:00Z', 'symbol':'APPL', 'price':688.32, 'timestamp':ts_from_str('2012-01-05T15:33:00Z')}, {'text_ts':'2012-01-04T15:01:00Z', 'symbol':'MSFT', 'price':32.30, 'timestamp':ts_from_str('2012-01-04T15:01:00Z')}, {'text_ts':'2012-01-04T16:00:00Z', 'symbol':'MSFT', 'price':36.44, 'timestamp':ts_from_str('2012-01-04T16:00:00Z')}, {'text_ts':'2012-01-05T15:19:00Z', 'symbol':'MSFT', 'price':35.89, 'timestamp':ts_from_str('2012-01-05T15:19:00Z')}] If I want to look up the price for Apple on for January 4, 2012 at 15:01:00, I will get an empty ndarry. *Is there a way to optimize the search for data "asof" a specific time other than iterating until you find data?* I've written my own price_asof method (See code below), that produces the following output. *In [63]: price_asof(dt,'APPL')* *QUERY: (timestamp == 1325707380) & (symbol == "APPL") -- text_ts: 2012-01-04T15:03:00Z* *QUERY: (timestamp == 1325707320) & (symbol == "APPL") -- text_ts: 2012-01-04T15:02:00Z* *QUERY: (timestamp == 1325707260) & (symbol == "APPL") -- text_ts: 2012-01-04T15:01:00Z* *QUERY: (timestamp == 1325707200) & (symbol == "APPL") -- text_ts: 2012-01-04T15:00:00Z* *Out[63]: * *array([(689.0, 'APPL', '2012-01-04T15:00:00Z', 1325707200)], * * dtype=[('price', '<f8'), ('symbol', 'S16'), ('text_ts', 'S26'), ('timestamp', '<i4')])* *# Code to generate data* import tables from datetime import datetime, timedelta from time import mktime import numpy as np def ts_from_str(ts_str, ts_format='%Y-%m-%dT%H:%M:%SZ'): """ Create a Unix Timestamp from an ISO 8601 timestamp string """ dt = datetime.strptime(ts_str, ts_format) return mktime(dt.timetuple()) class PriceData(tables.IsDescription): text_ts = tables.StringCol(len('2012-01-01T00:00:00+00:00 ')) symbol = tables.StringCol(16) price = tables.Float64Col() timestamp = tables.Time32Col() h5f = tables.openFile('test.h5','w', title='Price Data For Apple and Microsoft') group = h5f.createGroup('/','January', 'January Price Data') tbl = h5f.createTable(group, 'Prices',PriceData,'Apple and Microsoft Prices') data=[{'text_ts':'2012-01-04T15:00:00Z', 'symbol':'APPL', 'price':689.00, 'timestamp':ts_from_str('2012-01-04T15:00:00Z')}, {'text_ts':'2012-01-04T15:11:00Z', 'symbol':'APPL', 'price':687.24, 'timestamp':ts_from_str('2012-01-04T15:11:00Z')}, {'text_ts':'2012-01-05T15:33:00Z', 'symbol':'APPL', 'price':688.32, 'timestamp':ts_from_str('2012-01-05T15:33:00Z')}, {'text_ts':'2012-01-04T15:01:00Z', 'symbol':'MSFT', 'price':32.30, 'timestamp':ts_from_str('2012-01-04T15:01:00Z')}, {'text_ts':'2012-01-04T16:00:00Z', 'symbol':'MSFT', 'price':36.44, 'timestamp':ts_from_str('2012-01-04T16:00:00Z')}, {'text_ts':'2012-01-05T15:19:00Z', 'symbol':'MSFT', 'price':35.89, 'timestamp':ts_from_str('2012-01-05T15:19:00Z')}] price_data = tbl.row for d in data: price_data['text_ts'] = d['text_ts'] price_data['symbol'] = d['symbol'] price_data['price'] = d['price'] price_data['timestamp'] = d['timestamp'] price_data.append() tbl.flush() *# This is my price_asof function* def price_asof(dt, symbol, max_rec=1000): """ Return the price of the time dt """ ts = mktime(dt.timetuple()) query = '(timestamp == %d)' % ts if symbol: query += ' & (symbol == "%s")' % symbol data = np.ndarray(0) count = 0 while (not data) and (count <= max_rec): # print "QUERY: %s -- text_ts: %s" % (query, dt.strftime('%Y-%m-%dT%H:%M:%SZ')) data = tbl.readWhere(query) dt = dt-timedelta(seconds=60) ts = mktime(dt.timetuple()) query = '(timestamp == %d)' % ts if symbol: query += ' & (symbol == "%s")' % symbol count += 1 return data h5f.close() -- Aquil H. Abdullah aqu...@gm... |