Zipline - How to pass bundle DataPortal to TradeAlgorithm.run()? - zipline

I am trying to run a Zipline back test by calling the run() method of zipline.algorithm.TradeAlgorithm:
algo = TradingAlgorithm(initialize= CandlestickStrategy.initialize,
handle_data= CandlestickStrategy.handle_data,
analyze= CandlestickStrategy.analyze,
data=None,
bundle='quandl')
results = algo.run()
But I'm not sure what or how to pass the data parameter. I have already ingested the data bundle which is called 'quandl'. According to the docs, that parameter should receive a DataPortal instance, but I don't know how to create one of those based on the data I have ingested. What is the best way of doing this/is this necessary?
Essentially my goal is to create a top level 'dashboard' style class which can run multiple back tests using different strategies which exist in separate modules.
Full code (dashboard.py):
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from mpl_finance import candlestick_ohlc
from datetime import datetime, date, tzinfo, timedelta
from dateutil import parser
import pytz
import numpy as np
import talib
import warnings
import logbook
from logbook import Logger
log = Logger('Algorithm')
from zipline.algorithm import TradingAlgorithm
from zipline.api import order_target_percent, order_target, cancel_order, get_open_orders, get_order, get_datetime, record, symbol
from zipline.data import bundles
from zipline.finance import execution
from CandlestickStrategy import CandlestickStrategy
warnings.filterwarnings("ignore")
warnings.filterwarnings("ignore", message="numpy.dtype size changed")
warnings.filterwarnings("ignore", message="numpy.ufunc size changed")
# Choosing a security and a time horizon
logbook.StderrHandler().push_application()
start = datetime(2014, 9, 1, 0, 0, 0, 0, pytz.utc)
end = datetime(2016, 1, 1, 0, 0, 0, 0, pytz.utc)
#dataPortal = data_portal.DataPortal(asset_finder, trading_calendar, first_trading_day, e
#bundle = bundles.load('quandl',None,start)
algo = TradingAlgorithm(initialize= CandlestickStrategy.initialize,
handle_data= CandlestickStrategy.handle_data,
analyze= CandlestickStrategy.analyze,
data=None,
bundle='quandl')
results = algo.run()
CandleStickStrategy.py:
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from mpl_finance import candlestick_ohlc
from zipline.api import order_target_percent, order_target, cancel_order, get_open_orders, get_order, get_datetime, record, symbol
from zipline.finance import execution
from datetime import datetime, date, tzinfo, timedelta
from dateutil import parser
import pytz
import numpy as np
import talib
import warnings
warnings.filterwarnings("ignore", message="numpy.dtype size changed")
warnings.filterwarnings("ignore", message="numpy.ufunc size changed")
class CandlestickStrategy:
def initialize(context):
print "initializing algorythm..."
context.i = 0
context.asset = symbol('AAL')
def handle_data(context, data):
try:
trailing_window = data.history(context.asset, ['open','high','low','close'], 28, '1d')
except:
return
def analyze(context=None, results=None):
print "Analyze"
Hopefully someone can point me in the right direction.
Thanks

I faced the same issue. When running the trading algorithm manually this way the bundle argument is not evaluated. You need to create the data portal yourself. I manually registered the bundle and created a data_portal to run it:
bundles.register('yahoo-xetra',
csvdir_equities(get_calendar("XETRA"), ["daily"],
'/data/yahoo'),
calendar_name='XETRA')
bundle_data = bundles.load(
'yahoo-xetra',
)
first_trading_day = bundle_data.equity_daily_bar_reader.first_trading_day
data = DataPortal(
bundle_data.asset_finder,
trading_calendar=get_calendar("XETRA"),
first_trading_day=first_trading_day,
equity_minute_reader=bundle_data.equity_minute_bar_reader,
equity_daily_reader=bundle_data.equity_daily_bar_reader,
adjustment_reader=bundle_data.adjustment_reader,
)
Strategy = SimpleAlgorithm(trading_calendar=get_calendar("XETRA"), data_frequency='daily',
start=pd.Timestamp('2017-1-1 08:00:00+0200', tz='Europe/Berlin'),
end=pd.Timestamp('2018-12-27 08:00:00+0200', tz='Europe/Berlin'),
capital_base=10000000,
data_portal=data)

Related

OCR in the background while displaying findings in GUI

I am trying to run a OCR function in the background while displaying the findings in a GUI.
The OCR is working fine, but I can't seem to get the GUI started.
I think the issues is that there is not function to start the GUI, but I have not would a solution.
import time
import cv2
import mss
import numpy
import pytesseract
import matplotlib.pyplot as plt
import pandas as pd
from PIL import Image
import PySimpleGUI as sg
import os
import threading
from concurrent.futures import ThreadPoolExecutor
from PIL import Image
import pytesseract
pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'
def ocr_function():
with mss.mss() as mss_instance:
mon = mss_instance.monitors[0]
screenshot = mss_instance.grab(mon) #Read all monitor(s)
with mss.mss() as sct:
while True:
im = numpy.asarray(sct.grab(mon))
plt.imshow(cv2.cvtColor(im, cv2.COLOR_BGR2RGB))
text = pytesseract.image_to_string(im) #image to text
time.sleep(5) #One screenshot per 5 seconds
os.system('cls') #Clear output
print(text) #Print the output text
return text #Return text to function
#plt.show()
Output = ocr_function
t = threading.Thread(target=ocr_function) #Create a thread for the OCR function
t.start() #Start the OCR thread
Output = df.sort_values(by=['Match_Acc.','D-level', 'R-level'], ascending=[False, False, False]) # Sort columes
Output = Output[Output['Match_Acc.'] >= 1]
font = ('Areal', 11)
sg.theme('BrownBlue')
data = Output
headings = ['Result', 'Column1','Column2','Column3','D-level','R-level','n_matches','nan','nonnan','Match_Acc.']
df = pd.DataFrame(data)
headings = df.columns.tolist()
data = df.values.tolist()
layout = [[sg.Table(data, headings=headings, justification='left', key='-TABLE-')],
[sg.Button('Run'), sg.Button('Exit')]]
sg.Window("Overview", layout).read(close=True)

NameError: name 'sparklines' is not defined

Bonjour,
import the data frame:
# Loading a Sample Pandas DataFrame
import pandas as pd
import numpy as np
df = pd.read_csv('https://raw.githubusercontent.com/datagy/data/main/sales.csv', parse_dates=['date'])
code is:
def percentile_90(x):
return x.quantile(.9)
from scipy.stats import trim_mean
def trim_mean_10(x):
return trim_mean(x, 0.1)
def largest(x):
return x.nlargest(1)
import matplotlib.pyplot as plt
import base64
from r-ltxsparklines import sparklines
def sparkline_str(x):
bins=np.histogram(x)[0]
sl = ''.join(sparklines(bins))
return sl
#Les voici tous rassemblés :
agg_func_largest = {
'fare': [percentile_90, trim_mean_10, largest, sparkline_str]
}
df.groupby(['class', 'embark_town']).agg(agg_func_largest)
that produces:
Input In [82]
from r-ltxsparklines import sparklines
^
SyntaxError: invalid syntax
After other modifications, error is:
NameError: name 'sparklines' is not defined
The question is: how to define 'sparklines' or which libraries to import so that the 'sparklines' function is recognized?
Regards,
Atapalou

Why histogram ticks showing different answers in gui in compare with non-gui?

I am using jupyter notebook and I want to draw histograms. When I do not use GUI it is okay and everything is shown correctly but when I use the Tkinter version of the code, all bars in histogram are shifted to left so the first bar is missing.(e.g: It should be 4 on a,3 on b,9 on c but it shows 3 on a,9 on b, where a,b and c are ticks)
this is the first code i do not use gui:
import Tkinter as tk
from Tkinter import*
import tkMessageBox
import tkFileDialog
import pandas as pd
import pyautogui
import os
from PIL import Image, ImageTk
from tkinter import ttk
import pylab as plt
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,
AutoMinorLocator)
from matplotlib.figure import Figure
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
select1=pd.DataFrame()
select2=pd.DataFrame()
year1=1396
year2=1395
select1=df.loc[(df['yearj']==year1)]
select2=df.loc[(df['yearj']==year2)]
x=select1['monthj'].values.tolist()
y=select2['monthj'].values.tolist()
plt.xlabel('month')
plt.ylabel('number of orders')
bins=[1,2,3,4,5,6,7,8,9,10,11,12,13]
axx=plt.subplot()
axx.xaxis.set_major_locator(MultipleLocator(1))
axx.xaxis.set_major_formatter(FormatStrFormatter('%d'))
plt.hist(y,bins,rwidth=0.8)
plt.hist(x,bins,rwidth=0.8, alpha=0.6)
and the output is:
enter image description here
and here is second code:
import pandas as pd
import numpy
import pylab as plt
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,
AutoMinorLocator)
def compare_months(df,hh):
compmon = tk.Toplevel(h
bins=[1,2,3,4,5,6,7,8,9,10,11,12,13]
select1=pd.DataFrame()
select2=pd.DataFrame()
year1=1396
year2=1395
select1=df.loc[(df['yearj']==year1)]
select2=df.loc[(df['yearj']==year2)]
xr=select1['monthj'].values.tolist()
yr=select2['monthj'].values.tolist()
xr.sort(key=int)
yr.sort(key=int)
f = Figure(figsize=(7,6), dpi=80)
f.add_axes([0.15, 0.15,0.8,0.7])
canvas = FigureCanvasTkAgg(f, master=compmon)
canvas.get_tk_widget().grid(row=4, column=5, rowspan=8)
p = f.gca()
p.set_xlabel('month', fontsize = 10)
p.set_ylabel('number of orders', fontsize = 10)
p.hist(yr,bins,rwidth=0.8)
p.hist(xr,bins,rwidth=0.8, alpha=0.6)
p.xaxis.set_major_formatter(FormatStrFormatter('%d'))
p.xaxis.set_major_locator(MultipleLocator(1))
but=Button(compmon, text="ok", command=compare_months(df,root))
but.grid(row=2,column=2)
and the output is:enter image description here
Why does this happen?

ImportError: No module named 'svmutil'

i get stuck right now, my code:
import sys
import os
import itertools
import random
from PIL import Image
from svmutil import *
DIMENSION = 200
ROOT_DIR = "../train/"
NEGATIVE = "negative"
POSITIVE = "positive"
CLASSES = [NEGATIVE, POSITIVE]
....
and it says :
ImportError: No module named 'svmutil'
now I use python 3.5
What should I do now?

seaborn/Matplotlib export EPS causes "falling back to Agg renderer"

Consider the MWE below. This will cause (Seaborn 0.7.1, Matplotlib 1.5.1):
/usr/local/lib/python3.5/dist-packages/matplotlib/tight_layout.py:222:
UserWarning: tight_layout : falling back to Agg renderer
warnings.warn("tight_layout : falling back to Agg renderer")
How to fix this?
MWE
import matplotlib
matplotlib.use('ps')
import pandas as pd
import random
import seaborn as sns
import matplotlib.pyplot as plt
ds = pd.DataFrame()
ds['x'] = random.sample(range(1, 100), 25)
ds['y'] = random.sample(range(1, 100), 25)
p = sns.jointplot(x = ds['x'],y = ds['y'],linewidth=1,edgecolor='black',alpha=0.3,stat_func=None)
plt.savefig("test.eps")