Getting Binance Historical Data For Specific TimeZone - bitcoin

I found this python script on the web, it gets OHLCV historical data from Binance api by wanted dates, assets and time intervals. The script currently returns the data for UTC time.
I want to modify it so it will return the data (daily/hourly) according to a specified timezone. I guess
it takes only to change one function or add an argument but I can't manage to do it correctly.
How can I change it so it will return data for UTC+2 (or any other time zone)?
import time
import dateparser
import pytz
import os
from datetime import datetime
import binance
print(binance.__file__)
from binance.client import Client
import time
import pandas as pd
def date_to_milliseconds(date_str):
"""Convert UTC date to milliseconds.
If using offset strings add "UTC" to date string e.g. "now UTC", "11 hours ago UTC"
See dateparse docs for formats http://dateparser.readthedocs.io/en/latest/
:param date_str: date in readable format, i.e. "January 01, 2018", "11 hours ago UTC", "now UTC"
:type date_str: str
"""
# get epoch value in UTC
epoch = datetime.utcfromtimestamp(0).replace(tzinfo=pytz.utc)
# parse our date string
d = dateparser.parse(date_str)
# if the date is not timezone aware apply UTC timezone
if d.tzinfo is None or d.tzinfo.utcoffset(d) is None:
d = d.replace(tzinfo=pytz.utc)
# return the difference in time
return int((d - epoch).total_seconds() * 1000.0)
def interval_to_milliseconds(interval):
"""Convert a Binance interval string to milliseconds
:param interval: Binance interval string 1m, 3m, 5m, 15m, 30m, 1h, 2h, 4h, 6h, 8h, 12h, 1d, 3d, 1w
:type interval: str
:return:
None if unit not one of m, h, d or w
None if string not in correct format
int value of interval in milliseconds
"""
ms = None
seconds_per_unit = {
"m": 60,
"h": 60 * 60,
"d": 24 * 60 * 60,
"w": 7 * 24 * 60 * 60
}
unit = interval[-1]
if unit in seconds_per_unit:
try:
ms = int(interval[:-1]) * seconds_per_unit[unit] * 1000
except ValueError:
pass
return ms
def GetUpdateData(kline):
Time = time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime(kline[0]/1000))
Open = kline[1]
High = kline[2]
Low = kline[3]
Close = kline[4]
Volume = kline[5]
Close_time = time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(kline[6]/1000))
Quote_asset_volume = kline[7]
Number_of_trades = kline[8]
Taker_buy_base_asset_volume = kline[9]
Taker_buy_quote_asset_volume = kline[10]
return Time,Open,High,Low,Close,Volume,Close_time,Quote_asset_volume,Number_of_trades,Taker_buy_base_asset_volume,Taker_buy_quote_asset_volume
def get_historical_klines(symbol, interval, start_str, end_str=None):
"""Get Historical Klines from Binance
See dateparse docs for valid start and end string formats http://dateparser.readthedocs.io/en/latest/
If using offset strings for dates add "UTC" to date string e.g. "now UTC", "11 hours ago UTC"
:param symbol: Name of symbol pair e.g BNBBTC
:type symbol: str
:param interval: Biannce Kline interval
:type interval: str
:param start_str: Start date string in UTC format
:type start_str: str
:param end_str: optional - end date string in UTC format
:type end_str: str
:return: list of OHLCV values
"""
# create the Binance client, no need for api key
client = Client("", "")
# init our list
output_data = []
# setup the max limit
limit = 500
# convert interval to useful value in seconds
timeframe = interval_to_milliseconds(interval)
# convert our date strings to milliseconds
start_ts = date_to_milliseconds(start_str)
# if an end time was passed convert it
end_ts = None
if end_str:
end_ts = date_to_milliseconds(end_str)
idx = 0
# it can be difficult to know when a symbol was listed on Binance so allow start time to be before list date
symbol_existed = False
while True:
# fetch the klines from start_ts up to max 500 entries or the end_ts if set
temp_data = client.get_klines(
symbol=symbol,
interval=interval,
limit=limit,
startTime=start_ts,
endTime=end_ts
)
# handle the case where our start date is before the symbol pair listed on Binance
if not symbol_existed and len(temp_data):
symbol_existed = True
if symbol_existed:
# append this loops data to our output data
output_data += temp_data
# update our start timestamp using the last value in the array and add the interval timeframe
start_ts = temp_data[len(temp_data) - 1][0] + timeframe
else:
# it wasn't listed yet, increment our start date
start_ts += timeframe
idx += 1
# check if we received less than the required limit and exit the loop
if len(temp_data) < limit:
# exit the while loop
break
# sleep after every 3rd call to be kind to the API
if idx % 3 == 0:
time.sleep(1)
return output_data
start = "01 January, 2017"
end = "01 February, 2017"
symbols = ['ETHBTC']
interval = '1d'#Client.KLINE_INTERVAL_15MIN
for symbol in symbols:
klines = get_historical_klines(symbol, interval, start, end)
times = []
Opens = []
Highs = []
Lows = []
Closes = []
Volumes = []
Close_times = []
Quote_asset_volumes = []
Number_of_tradess = []
Taker_buy_base_asset_volumes = []
Taker_buy_quote_asset_volumes = []
for k in klines:
Time,Open,High,Low,Close,Volume,Close_time,Quote_asset_volume,Number_of_trades,Taker_buy_base_asset_volume,Taker_buy_quote_asset_volume = GetUpdateData(k)
times.append(Time)
Opens.append(Open)
Highs.append(High)
Lows.append(Low)
Closes.append(Close)
Volumes.append(Volume)
Close_times.append(Close_time)
Quote_asset_volumes.append(Quote_asset_volume)
Number_of_tradess.append(Number_of_trades)
Taker_buy_base_asset_volumes.append(Taker_buy_base_asset_volume)
Taker_buy_quote_asset_volumes.append(Taker_buy_quote_asset_volume)
DataStruct = pd.DataFrame()
DataStruct['time'] = times
DataStruct['Open'] = Opens
DataStruct['High'] = Highs
DataStruct['Low'] = Lows
DataStruct['Close'] = Closes
DataStruct['Volume'] = Volumes
DataStruct['Close_time'] = Close_times
DataStruct['Quote_asset_volume'] = Quote_asset_volumes
DataStruct['Number_of_trades'] = Number_of_tradess
DataStruct['Taker_buy_base_asset_volume'] = Taker_buy_base_asset_volumes
DataStruct['Taker_buy_quote_asset_volume'] = Taker_buy_quote_asset_volumes
FileName = symbol+ '_' + start+ '_' + end + ' .csv'
FileName = FileName.replace(' ','_')
FileName = FileName.replace(',','')
Path2Save = os.path.normpath(r'')
SaveStrFile = os.path.normpath(Path2Save+ '\\' +FileName)
#save FeatureWeights to CSV file
D_S_header = ['time','Open','High','Low','Close','Volume','Close_time','Quote_asset_volume','Number_of_trades','Taker_buy_base_asset_volume','Taker_buy_quote_asset_volume']
DataStruct.to_csv(path_or_buf = SaveStrFile, header = D_S_header )

In these lines you see the timezone being defined:
# get epoch value in UTC
epoch = datetime.utcfromtimestamp(0).replace(tzinfo=pytz.utc)
Just redefine the timezone there. For a list of timezones supported by pytz you can get a list using pytz.all_timezones.

Related

Converting pandas._libs.tslibs.timestamps.Timestamp to seconds since midnight?

I have a pandas._libs.tslibs.timestamps.Timestamp object, e.g., 2016-01-01 07:00:04.85+00:00 and I want to create an int object that stores the number of seconds since the previous midnight.
In the above example, it would return 7 * 3600 + 0 * 60 + 4.85 = 25204.85
Is there a quick way to do this in pandas?
You can use normalize() to subtract the date part:
# ts = pd.to_datetime('2016-01-01 07:00:04.85+00:00')
>>> (ts - ts.normalize()).total_seconds()
25204.85
It also works with DataFrame through dt accessor:
# df = pd.DataFrame({'date': [ts]})
>>> (df['date'] - df['date'].dt.normalize()).dt.total_seconds()
0 25204.85
Name: date, dtype: float64
Not sure if this is what you are looking for but here is an implementation:
import pandas as pd
def seconds_from_midnight(date):
return date.hour * 3600 + date.minute * 60 + date.second + date.microsecond / 1000000
date = pd.Timestamp.now()
print(date)
print(seconds_from_midnight(date))

Extract PI OSIsoft Monthly Interval in Python

I am trying to extract the sum of PI data from OSIsoft 10m (10 minute) data in a one (1) month interval using Python pandas. However, I either get an error from OSIsoft or Python when I choose the internal notation as "M" for OSIsoft or "1mo" for python. Neither notation seems to work w/out an error. I have a function that calls the interval of data to plot and save and this works for intervals of "1d", "30d", "1w", "1y" for example but I cannot get the sum of data for each 1-month interval. Is it a conflict of how python requires a description of "month" with an "M" and OSISoft that requires "1mo"?? thank you, Here is my code:
def get_tag_history2(tagname, starttime, endtime, interval="10m"):
# pull historical data
tag = PIPoint.FindPIPoint(piServer, tagname)
# name = tag.Name.lower()
timerange = AFTimeRange(starttime, endtime)
span = AFTimeSpan.Parse(interval)
#summariesvalues
summaries = tag.Summaries(timerange, span, AFSummaryTypes.Average, AFCalculationBasis.TimeWeighted, AFTimestampCalculation.Auto)
recordedValuesDict = dict()
for summary in summaries:
for event in summary.Value:
dt = datetime.strptime(
event.Timestamp.LocalTime.ToString(),'%m/%d/%Y %I:%M:%S %p')
recordedValuesDict[dt] = event.Value
# turn dictionary into pd.DataFrame
df = pd.DataFrame(
recordedValuesDict.items(), columns=['TimeStamp', 'Value'])
#Send it to a dateTime Index then set the index
df['TimeStamp'] = pd.to_datetime(df['TimeStamp']) + pd.Timedelta(interval)
df.set_index(['TimeStamp'], inplace=True)
return df
if __name__ == '__main__':
"""
Set inputs
"""
pitags = ['JC1.WF.DOMINA.ProdEffective','HO1.WF.DOMINA.ProdEffective','BC1.WF.DOMINA.ProdEffective']
start_time = '2020-01-01 00:00'
end_time = '2022-01-01 00:00'
interval = "M"
"""
Run Script
"""
connect_to_Server('PDXPI01')
output = pd.DataFrame()
for tag in pitags:
values = get_tag_history2(
tag, start_time, end_time, interval=interval)
output[tag] = values['Value']
for i, col in enumerate(output.columns):
output[col].plot(fig=plt.figure(i))
plt.title(col)
plt.show()
The error when using interval = "1mo" is --- >
ValueError: invalid unit abbreviation: mo
The error when using interval = "M" is --- >
FormatException: The 'M' token in the string 'M' was not expected.
at OSIsoft.AF.Time.AFTimeSpan.FormatError(String input, Char token, Boolean throwErrors, AFTimeSpan& result)

Add a column of minutes to a datetime in pandas

I have a dataframe with a start time and the length of operation. I'm trying to figure out out to add the length (in minutes) to the start time in order to figure out the end time of the session. I've run a few different variations of the same general idea and keep getting the same error, "unsupported type for timedelta minutes component: Series". The code extract is below:
data= {'Name': ['John', 'Peter'],
'Start' : [2, 2],
'Length': [120, 90],
}
df = pd.DataFrame.from_records(data)
df['Start'] = pd.to_datetime(df['Start'])
df['Length'] = pd.to_datetime(df['Length'])
df["tdiffinmin"] = df['Start'].apply(lambda x: x + pd.DateOffset(minutes = df["Length"]))
Ive also tried the follow as other methods of doing this math and keep getting similar errors.
df["tdiffinmin"] = df['Start'].apply(lambda x: x -pd.DateOffset(minutes = df["Length"]))
df["tdiffinmin"] = (df['Start']. + timedelta(minutes = df["Length"])).dt.total_seconds() / 60
df['tdiffinmin'] = df['Start'] - pd.DateOffset(minutes = df["Length"])
The full code reads from a data set (excel sheet or CSV), populates a Dataframe, and this is some of the math I am doing. Originally it was done with Start and Stop times, so I know something similar is possible. In the dataset, Length is in minutes and Start is a date and time, so datetime is necessary.
You should convert Length into timedelta, not datetime:
df['Start'] = pd.to_datetime(df['Start'])
df['Length'] = pd.to_timedelta(df['Length'], unit='min')
df['tdiffinmin'] = df['Start'] + df['Length']
Output:
Length Name Start tdiffinmin
0 02:00:00 John 1970-01-01 00:00:00.000000002 1970-01-01 02:00:00.000000002
1 01:30:00 Peter 1970-01-01 00:00:00.000000002 1970-01-01 01:30:00.000000002

dataframe results changes to zero after adding return

I am trying to pass "buy_list" in the code below to df . This is a small section of the code when the full code is executed I get the results of a back test results linked image.
initial results
replacement_stocks = portfolio_size - len(kept_positions)
buy_list = ranking_table.loc[
~ranking_table.index.isin(kept_positions)][:replacement_stocks]
new_portfolio = pd.concat(
(buy_list,
ranking_table.loc[ranking_table.index.isin(kept_positions)])
)
When I define df as in below I get df not defined error
replacement_stocks = portfolio_size - len(kept_positions)
buy_list = ranking_table.loc[
~ranking_table.index.isin(kept_positions)][:replacement_stocks]
new_portfolio = pd.concat(
(buy_list,
ranking_table.loc[ranking_table.index.isin(kept_positions)])
)
df1 = buy_list # ceate df1 with buy_list
df2 = ranking_table.loc[
~ranking_table.index.isin(kept_positions)][:replacement_stocks]# create df2 with buy_list
I tried the solution in the link below
Similar error with suggested fix
Following this I still get df not defined error and the output of my back test changes to 0% in all the month which previously had actual % change negative and positive
replacement_stocks = portfolio_size - len(kept_positions)
buy_list = ranking_table.loc[
~ranking_table.index.isin(kept_positions)][:replacement_stocks]
new_portfolio = pd.concat(
(buy_list,
ranking_table.loc[ranking_table.index.isin(kept_positions)])
)
return buy_list
df2 = ranking_table.loc[
~ranking_table.index.isin(kept_positions)][:replacement_stocks]
print(df2)
This is what I now end up with
Error message
I'd appreciate any suggestions on how I can fix this.
Thanks,
Last1
Below is the full code as requested, it's from a book am working through, Trading Evolved by Andreas Clenow.
Thanks again.
%matplotlib inline
import zipline
from zipline.api import order_target_percent, symbol, \
set_commission, set_slippage, schedule_function, \
date_rules, time_rules, attach_pipeline, pipeline_output
from pandas import Timestamp
import matplotlib.pyplot as plt
import pyfolio as pf
import pandas as pd
import numpy as np
from scipy import stats
from zipline.finance.commission import PerDollar
from zipline.finance.slippage import VolumeShareSlippage, FixedSlippage
from zipline_norgatedata.pipelines import NorgateDataIndexConstituent
from zipline.pipeline import Pipeline
"""
Model Settings
"""
intial_portfolio = 100000
momentum_window1 = 125
momentum_window2 = 125
minimum_momentum = 40
portfolio_size = 30
vola_window = 20
# Trend filter settings
enable_trend_filter = True
trend_filter_symbol = '$SPXTR'
trend_filter_window = 200
"""
Commission and Slippage Settings
"""
enable_commission = True
commission_pct = 0.001
enable_slippage = True
slippage_volume_limit = 0.025
slippage_impact = 0.05
"""
Helper functions.
"""
def momentum_score(ts):
"""
Input: Price time series.
Output: Annualized exponential regression slope,
multiplied by the R2
"""
# Make a list of consecutive numbers
x = np.arange(len(ts))
# Get logs
log_ts = np.log(ts)
# Calculate regression values
slope, intercept, r_value, p_value, std_err = stats.linregress(x, log_ts)
# Annualize percent
annualized_slope = (np.power(np.exp(slope), 252) - 1) * 100
#Adjust for fitness
score = annualized_slope * (r_value ** 2)
return score
def volatility(ts):
return ts.pct_change().rolling(vola_window).std().iloc[-1]
"""
Initialization and trading logic
"""
def make_pipeline():
indexconstituent = NorgateDataIndexConstituent('$SPX')
return Pipeline(
columns={
'NorgateDataIndexConstituent':indexconstituent},
screen = indexconstituent)
def initialize(context):
attach_pipeline(make_pipeline(), 'norgatedata_pipeline', chunks=9999,eager=True)
# Set commission and slippage.
if enable_commission:
comm_model = PerDollar(cost=commission_pct)
else:
comm_model = PerDollar(cost=0.0)
set_commission(comm_model)
if enable_slippage:
slippage_model=VolumeShareSlippage(volume_limit=slippage_volume_limit, price_impact=slippage_impact)
set_slippage(slippage_model)
else:
slippage_model=FixedSlippage(spread=0.0)
# Used only for progress output.
context.last_month = intial_portfolio
# Store index membership
#context.index_members = pd.read_csv('../data/index_members/sp500.csv', index_col=0, parse_dates=[0])
#Schedule rebalance monthly.
schedule_function(
func=rebalance,
date_rule=date_rules.month_start(),
time_rule=time_rules.market_open()
)
def output_progress(context):
"""
Output some performance numbers during backtest run
"""
# Get today's date
today = zipline.api.get_datetime().date()
# Calculate percent difference since last month
perf_pct = (context.portfolio.portfolio_value / context.last_month) - 1
# Print performance, format as percent with two decimals.
print("{} - Last Month Result: {:.2%}".format(today, perf_pct))
# Remember today's portfolio value for next month's calculation
context.last_month = context.portfolio.portfolio_value
def rebalance(context, data):
# Write some progress output during the backtest
output_progress(context)
context.pipeline_data = pipeline_output('norgatedata_pipeline')
todays_universe = context.pipeline_data.index
# Check how long history window we need.
hist_window = max(momentum_window1,
momentum_window2)
# Get historical data
hist = data.history(todays_universe, "close", hist_window, "1d")
# Slice the history to match the two chosen time frames.
momentum_hist1 = hist[(-1 * momentum_window1):]
momentum_hist2 = hist[(-1 * momentum_window2):]
# Calculate momentum values for the two time frames.
momentum_list1 = momentum_hist1.apply(momentum_score)
momentum_list2 = momentum_hist2.apply(momentum_score)
# Now let's put the two momentum values together, and calculate mean.
momentum_concat = pd.concat((momentum_list1, momentum_list2))
mom_by_row = momentum_concat.groupby(momentum_concat.index)
mom_means = mom_by_row.mean()
# Sort by momentum value.
ranking_table = mom_means.sort_values(ascending=False)
"""
Sell Logic
First we check if any existing position should be sold.
* Sell if stock is no longer part of index.
* Sell if stock has too low momentum value.
"""
kept_positions = list(context.portfolio.positions.keys())
for security in context.portfolio.positions:
if (security not in todays_universe):
order_target_percent(security, 0.0)
kept_positions.remove(security)
elif ranking_table[security] < minimum_momentum:
order_target_percent(security, 0.0)
kept_positions.remove(security)
"""
Trend Filter Section
"""
if enable_trend_filter:
ind_hist = data.history(
symbol(trend_filter_symbol),
'close',
trend_filter_window,
'1d'
)
trend_filter = ind_hist.iloc[-1] > ind_hist.mean()
if trend_filter == False:
return
"""
Stock Selection Logic
Check how many stocks we are keeping from last month.
Fill from top of ranking list, until we reach the
desired total number of portfolio holdings.
"""
replacement_stocks = portfolio_size - len(kept_positions)
buy_list = ranking_table.loc[
~ranking_table.index.isin(kept_positions)][:replacement_stocks]
new_portfolio = pd.concat(
(buy_list,
ranking_table.loc[ranking_table.index.isin(kept_positions)])
)
"""
Calculate inverse volatility for stocks,
and make target position weights.
"""
vola_table = hist[new_portfolio.index].apply(volatility)
inv_vola_table = 1 / vola_table
sum_inv_vola = np.sum(inv_vola_table)
vola_target_weights = inv_vola_table / sum_inv_vola
for security, rank in new_portfolio.iteritems():
weight = vola_target_weights[security]
if security in kept_positions:
order_target_percent(security, weight)
else:
if ranking_table[security] > minimum_momentum:
order_target_percent(security, weight)
def analyze(context, perf):
perf['max'] = perf.portfolio_value.cummax()
perf['dd'] = (perf.portfolio_value / perf['max']) - 1
maxdd = perf['dd'].min()
ann_ret = (np.power((perf.portfolio_value.iloc[-1] / perf.portfolio_value.iloc[0]),(252 / len(perf)))) - 1
print("Annualized Return: {:.2%} Max Drawdown: {:.2%}".format(ann_ret, maxdd))
return
start_date = Timestamp('2015-01-01',tz='UTC')
end_date = Timestamp('2020-03-14',tz='UTC')
perf = zipline.run_algorithm(
start=start_date, end=end_date,
initialize=initialize,
analyze=analyze,
capital_base=intial_portfolio,
data_frequency = 'daily',
bundle='norgatedata-sp500' )

Pandas Timeseries: Total duration meeting a specific condition

I have a timeseries
ts = pd.Series(data=[0,1,2,3,4],index=[pd.Timestamp('1991-01-01'),pd.Timestamp('1995-01-01'),pd.Timestamp('1996-01-01'),pd.Timestamp('2010-01-01'),pd.Timestamp('2011-01-01')])
Whats the fastest, most readable, way to get the total duration in which the value is below 2, assuming the values are valid until the next time-step indicates otherwise (no linear interpolation). I imagine there probably is a pandas function for this
This seems to be working quite well, however I am still baffled that there does not seem to be a pandas function for this!
import pandas as pd
import numpy as np
ts = pd.Series(data=[0,1,2,3,4],index=[pd.Timestamp('1991-01-01'),pd.Timestamp('1995-01-01'),pd.Timestamp('1996-01-01'),pd.Timestamp('2010-01-01'),pd.Timestamp('2011-01-01')])
# making the timeseries binary. 1 = meets condition, 0 = does not
ts = ts.where(ts>=2,other=1)
ts = ts.where(ts<2,other=0)
delta_time = ts.index.to_pydatetime()[1:]-ts.index.to_pydatetime()[:-1]
time_below_2 = np.sum(delta_time[np.invert(ts.values[:-1])]).total_seconds()
time_above_2 = np.sum(delta_time[(ts.values[:-1])]).total_seconds()
The above function seems to break for certain timeframes. This option is slower, but did not break in any of my tests:
def get_total_duration_above_and_below_value(value,ts):
# making the timeseries binary. 1 = above value, 0 = below value
ts = ts.where(ts >= value, other=1)
ts = ts.where(ts < value, other=0)
time_above_value = 0
time_below_value = 0
for i in range(ts.size - 1):
if ts[i] == 1:
time_above_value += abs(pd.Timedelta(
ts.index[i] - ts.index[i + 1]).total_seconds()) / 3600
else:
time_below_value += abs(pd.Timedelta(
ts.index[i] - ts.index[i + 1]).total_seconds()) / 3600
return time_above_value, time_below_value