import pandas as pd
import xlrd
import os
import matplotlib.pylab as plt
file_folder_address = 'C:/Users/Amirreza/Desktop/python homeworks/project files'
df_total=pd.DataFrame()
for file in os.listdir(file_folder_address): #os.listdir gives a list of exel file names
df_men_urb = pd.DataFrame()
df_women_urb = pd.DataFrame()
df_men_rural = pd.DataFrame()
df_women_rural = pd.DataFrame()
sheet_names = pd.ExcelFile(os.path.join(file_folder_address, file), engine="xlrd").sheet_names
`
when I use this cod make above error . what should I do ?
Bonjour,
"sparkline" does not work in my code.
Already, I didn't manage to install it. So, I found a function that I call "sparkline_test. Nevertheless, the images that should be integrated in the table are outside. Something is wrong.
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from io import BytesIO
from itertools import islice
import seaborn as sns
import base64
I cannot import sparklines:
#import sparklines
df = sns.load_dataset('titanic')
def percentile_90(x):
return x.quantile(.9)
from scipy.stats import trim_mean
def trim_mean_10(x):
return trim_mean(x, 0.1)
def largest(x):
return x.nlargest(1)
def sparkline_str(x):
bins=np.histogram(x)[0]
sl = ''.join(sparklines(bins))
return sl
def sparkline_test(data, figsize=(4,0.25),**kwags):
data = list(data)
fig,ax = plt.subplots(1,1,figsize=figsize,**kwags)
ax.plot(data)
for k,v in ax.spines.items():
v.set_visible(False)
ax.set_xticks([])
ax.set_yticks([])
plt.plot(len(data)-1, data[len(data)-1], 'r.')
ax.fill_between(range(len(data)), data, len(data)*[min(data)], alpha=0.1)
img = BytesIO()
plt.savefig(img, transparent=True, bbox_inches='tight')
img.seek(0)
plt.show()
# plt.close()
return base64.b64encode(img.read()).decode("utf-8")
def sparkline_str(x):
bins=np.histogram(x)[0]
sl = ''.join(sparkline_test(bins))
return sl
agg_func_largest = {
'fare': [percentile_90, trim_mean_10, largest, sparkline_test]
#'fare': [percentile_90, trim_mean_10, largest]
}
df.groupby(['class', 'embark_town']).agg(agg_func_largest)
that produces:
What is expected is:
Something is wrong....But what?
Do you have any idea?
Regards,
Atapalou
my code for df.interpolate was:
import pandas as pd
import numpy as np
import xlrd
from IPython.display import display
from scipy import interpolate
pd.set_option('display.max_rows',54100)
df = pd.read_excel(r'C:\Users\User\Desktop\tanvir random practice\gazipur.xlsx', parse_date=["DateTime"], index_col='DateTime']
df.interpolate(method="linear").bfill()
display(df)
I am trying to run a Zipline back test by calling the run() method of zipline.algorithm.TradeAlgorithm:
algo = TradingAlgorithm(initialize= CandlestickStrategy.initialize,
handle_data= CandlestickStrategy.handle_data,
analyze= CandlestickStrategy.analyze,
data=None,
bundle='quandl')
results = algo.run()
But I'm not sure what or how to pass the data parameter. I have already ingested the data bundle which is called 'quandl'. According to the docs, that parameter should receive a DataPortal instance, but I don't know how to create one of those based on the data I have ingested. What is the best way of doing this/is this necessary?
Essentially my goal is to create a top level 'dashboard' style class which can run multiple back tests using different strategies which exist in separate modules.
Full code (dashboard.py):
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from mpl_finance import candlestick_ohlc
from datetime import datetime, date, tzinfo, timedelta
from dateutil import parser
import pytz
import numpy as np
import talib
import warnings
import logbook
from logbook import Logger
log = Logger('Algorithm')
from zipline.algorithm import TradingAlgorithm
from zipline.api import order_target_percent, order_target, cancel_order, get_open_orders, get_order, get_datetime, record, symbol
from zipline.data import bundles
from zipline.finance import execution
from CandlestickStrategy import CandlestickStrategy
warnings.filterwarnings("ignore")
warnings.filterwarnings("ignore", message="numpy.dtype size changed")
warnings.filterwarnings("ignore", message="numpy.ufunc size changed")
# Choosing a security and a time horizon
logbook.StderrHandler().push_application()
start = datetime(2014, 9, 1, 0, 0, 0, 0, pytz.utc)
end = datetime(2016, 1, 1, 0, 0, 0, 0, pytz.utc)
#dataPortal = data_portal.DataPortal(asset_finder, trading_calendar, first_trading_day, e
#bundle = bundles.load('quandl',None,start)
algo = TradingAlgorithm(initialize= CandlestickStrategy.initialize,
handle_data= CandlestickStrategy.handle_data,
analyze= CandlestickStrategy.analyze,
data=None,
bundle='quandl')
results = algo.run()
CandleStickStrategy.py:
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from mpl_finance import candlestick_ohlc
from zipline.api import order_target_percent, order_target, cancel_order, get_open_orders, get_order, get_datetime, record, symbol
from zipline.finance import execution
from datetime import datetime, date, tzinfo, timedelta
from dateutil import parser
import pytz
import numpy as np
import talib
import warnings
warnings.filterwarnings("ignore", message="numpy.dtype size changed")
warnings.filterwarnings("ignore", message="numpy.ufunc size changed")
class CandlestickStrategy:
def initialize(context):
print "initializing algorythm..."
context.i = 0
context.asset = symbol('AAL')
def handle_data(context, data):
try:
trailing_window = data.history(context.asset, ['open','high','low','close'], 28, '1d')
except:
return
def analyze(context=None, results=None):
print "Analyze"
Hopefully someone can point me in the right direction.
Thanks
I faced the same issue. When running the trading algorithm manually this way the bundle argument is not evaluated. You need to create the data portal yourself. I manually registered the bundle and created a data_portal to run it:
bundles.register('yahoo-xetra',
csvdir_equities(get_calendar("XETRA"), ["daily"],
'/data/yahoo'),
calendar_name='XETRA')
bundle_data = bundles.load(
'yahoo-xetra',
)
first_trading_day = bundle_data.equity_daily_bar_reader.first_trading_day
data = DataPortal(
bundle_data.asset_finder,
trading_calendar=get_calendar("XETRA"),
first_trading_day=first_trading_day,
equity_minute_reader=bundle_data.equity_minute_bar_reader,
equity_daily_reader=bundle_data.equity_daily_bar_reader,
adjustment_reader=bundle_data.adjustment_reader,
)
Strategy = SimpleAlgorithm(trading_calendar=get_calendar("XETRA"), data_frequency='daily',
start=pd.Timestamp('2017-1-1 08:00:00+0200', tz='Europe/Berlin'),
end=pd.Timestamp('2018-12-27 08:00:00+0200', tz='Europe/Berlin'),
capital_base=10000000,
data_portal=data)
from abupy import ABuSymbolPd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
tsla_df = ABuSymbolPd.make_kl_df('usTSLA', n_folds=8)
tsla_df [['close', 'volume']].plot (subplots = True, style = ['r', 'g'],
grid = True)
print tsla_df [ ['close', 'volume']]
plt.show()
tsla_df.info()
tsla_df.describe(include = "all")
In above python code, I hope last code list the statistical of tsla_df, but it does not and also never give any error information. Anybody has any idea?