diff --git a/qhp-download-futures.py b/qhp-download-futures.py index 92f0d49..e7dabbb 100644 --- a/qhp-download-futures.py +++ b/qhp-download-futures.py @@ -27,7 +27,7 @@ def timeframe_to_seconds(tf): raise ValueError('Invalid value') class BarAggregator: - def __init__(self, timeframe): + def __init__(self, timeframe, tz): self.open_ = 0 self.high = 0 self.low = 0 @@ -36,7 +36,17 @@ class BarAggregator: self.timestamp = None self.current_bar_number = None self.timeframe = timeframe + self.tz = tz + def clear(self): + self.open_ = 0 + self.high = 0 + self.low = 0 + self.close = 0 + self.volume = 0 + self.timestamp = None + self.current_bar_number = None + def push_bar(self, timestamp, open_, high, low, close, volume): bar_number = timestamp.timestamp() // self.timeframe if bar_number != self.current_bar_number: @@ -46,7 +56,7 @@ class BarAggregator: b_close = self.close b_volume = self.volume if self.current_bar_number is not None: - b_timestamp = datetime.datetime.fromtimestamp(self.current_bar_number * self.timeframe) + b_timestamp = datetime.datetime.fromtimestamp(self.current_bar_number * self.timeframe, self.tz) self.open_ = open_ self.high = high @@ -67,16 +77,19 @@ class BarAggregator: return None def get_bar(self): + if self.timestamp is None: + return None b_open = self.open_ b_high = self.high b_low = self.low b_close = self.close b_volume = self.volume - b_timestamp = datetime.datetime.fromtimestamp(self.timeframe * ( self.timestamp.timestamp() // self.timeframe)) + b_timestamp = datetime.datetime.fromtimestamp(self.timeframe * ( self.timestamp.timestamp() // self.timeframe), self.tz) return (b_timestamp, b_open, b_high, b_low, b_close, b_volume) def get_data(qhp, ticker, start_time, end_time, period, tz, timedelta): + utctz = dateutil.tz.gettz('UTC') rq = { "ticker" : ticker, "from" : start_time.strftime("%Y-%m-%dT%H:%M:%S"), @@ -105,16 +118,29 @@ def get_data(qhp, ticker, start_time, end_time, period, tz, timedelta): low = float(line[3]) close = float(line[4]) volume = int(line[5]) - dt = datetime.datetime.fromtimestamp(timestamp, tz) + timedelta + dt = datetime.datetime.fromtimestamp(timestamp, utctz) + timedelta + dt = dt.astimezone(utctz) bar_count += 1 result.append((dt, open_, high, low, close, volume)) return result -def write_to_file(writer, bars, ticker, period): +def write_to_file(writer, bars, ticker, period, agg): for bar in bars: - writer.writerow([ticker, period, bar[0].strftime("%Y%m%d"), bar[0].strftime("%H%M%S"), bar[1], bar[2], bar[3], bar[4], bar[5]]) + if agg is not None: + mbar = agg.push_bar(bar[0], bar[1], bar[2], bar[3], bar[4], bar[5]) + if mbar is not None: + writer.writerow([ticker, agg.timeframe, mbar[0].strftime("%Y%m%d"), mbar[0].strftime("%H%M%S"), mbar[1], mbar[2], mbar[3], mbar[4], mbar[5]]) + else: + writer.writerow([ticker, period, bar[0].strftime("%Y%m%d"), bar[0].strftime("%H%M%S"), bar[1], bar[2], bar[3], bar[4], bar[5]]) + if agg is not None: + mbar = agg.get_bar() + if mbar is not None: + writer.writerow([ticker, agg.timeframe, mbar[0].strftime("%Y%m%d"), mbar[0].strftime("%H%M%S"), mbar[1], mbar[2], mbar[3], mbar[4], mbar[5]]) + agg.clear() + else: + print('none bar' ,ticker ) def make_tickers_list(base, start_time, end_time, futures_interval): result = [] @@ -148,6 +174,7 @@ def main(): parser.add_argument('-i', '--futures-interval', action='store', dest='futures_interval', help='Futures interval between exprations in month', required=True) parser.add_argument('-s', '--stitch-delta', action='store', dest='stitch_delta', help='Futures interval between exprations in month', required=True) parser.add_argument('-e', '--replace-ticker', action='store', dest='replace_ticker', help='Replace ticker id in file', required=False) + parser.add_argument('-z', '--timezone', action='store', dest='timezone', help='Convert bar timestamps to given timezone', required=False) args = parser.parse_args() @@ -162,32 +189,53 @@ def main(): start_time = datetime.datetime.strptime(args.from_, "%Y%m%d") end_time = datetime.datetime.strptime(args.to, "%Y%m%d") + if filename == "!": + if args.rescale is not None: + filename = "{}_{}_{}_{}.csv".format(symbol, args.from_, args.to, args.rescale) + else: + filename = "{}_{}_{}_{}.csv".format(symbol, args.from_, args.to, args.period) + + print("Assuming filename: {}".format(filename)) + timedelta = datetime.timedelta() if args.time_delta: timedelta = datetime.timedelta(seconds=int(args.time_delta)) + tz = dateutil.tz.gettz('UTC') + if args.timezone is not None: + tz = dateutil.tz.gettz(args.timezone) + delta = int(args.stitch_delta) agg = None if args.rescale: - agg = BarAggregator(int(args.rescale)) + agg = BarAggregator(int(args.rescale), tz) data = {} tickers = make_tickers_list(symbol, start_time, end_time, int(args.futures_interval)) print("Tickers: {}".format(tickers)) + empty_tickers = [] for ticker in tickers: print("Requesting data: {}".format(ticker)) - bars = get_data(s, ticker, start_time, end_time, period, dateutil.tz.gettz('UTC'), timedelta) + bars = get_data(s, ticker, start_time, end_time, period, tz, timedelta) if len(bars) > 0: data[ticker] = { 'bars' : bars } - print("Cutting off trailing data: {}".format(ticker)) - end_date = data[ticker]['bars'][-1][0] + else: + empty_tickers.append(ticker) + + tickers = [t for t in tickers if not (t in empty_tickers)] + + for ticker in tickers: + print("Cutting off trailing data: {}".format(ticker)) + end_date = data[ticker]['bars'][-1][0].date() + if ticker != tickers[-1]: cutoff_date = datetime.date.fromordinal(end_date.toordinal() - delta) - #cutoff_date_num = cutoff_date.year * 10000 + cutoff_date.month * 100 + cutoff_date.day + else: + cutoff_date = datetime.date.fromordinal(end_date.toordinal()) - data[ticker]['bars'] = [s for s in data[ticker]['bars'] if s[0].date() <= cutoff_date] - data[ticker]['end_date'] = cutoff_date + data[ticker]['bars'] = [s for s in data[ticker]['bars'] if s[0].date() <= cutoff_date] + data[ticker]['end_date'] = cutoff_date prev_ticker = None for k, v in sorted(data.items(), key=lambda x: x[1]['end_date']): @@ -195,16 +243,17 @@ def main(): if prev_ticker is not None: start_date = data[prev_ticker]['bars'][-1][0] v['bars'] = [s for s in data[k]['bars'] if s[0] > start_date] + print("{} : {}".format(data[prev_ticker]['bars'][-1][0], data[k]['bars'][0][0]), v['bars'][0][0]) prev_ticker = k - with open(args.output_file, 'w+') as f: + with open(filename, 'w+') as f: writer = csv.writer(f) writer.writerow(['', '', '', '