Python ML LSTM Stock Prediction with Dash No Output Code Just Keeps Running - tensorflow

I'm trying to run the following code in Jupyter notebook - but it just keeps running endlessly with no output. I'm following the tutorial from: https://data-flair.training/blogs/stock-price-prediction-machine-learning-project-in-python/
The code is from the stock_app.py which doesn't seem to be working:
import dash
import dash_core_components as dcc
import dash_html_components as html
import pandas as pd
import plotly.graph_objs as go
from dash.dependencies import Input, Output
from keras.models import load_model
from sklearn.preprocessing import MinMaxScaler
import numpy as np
app = dash.Dash()
server = app.server
scaler=MinMaxScaler(feature_range=(0,1))
df_nse = pd.read_csv("./NSE-TATA.csv")
df_nse["Date"]=pd.to_datetime(df_nse.Date,format="%Y-%m-%d")
df_nse.index=df_nse['Date']
data=df_nse.sort_index(ascending=True,axis=0)
new_data=pd.DataFrame(index=range(0,len(df_nse)),columns=['Date','Close'])
for i in range(0,len(data)):
new_data["Date"][i]=data['Date'][i]
new_data["Close"][i]=data["Close"][i]
new_data.index=new_data.Date
new_data.drop("Date",axis=1,inplace=True)
dataset=new_data.values
train=dataset[0:987,:]
valid=dataset[987:,:]
scaler=MinMaxScaler(feature_range=(0,1))
scaled_data=scaler.fit_transform(dataset)
x_train,y_train=[],[]
for i in range(60,len(train)):
x_train.append(scaled_data[i-60:i,0])
y_train.append(scaled_data[i,0])
x_train,y_train=np.array(x_train),np.array(y_train)
x_train=np.reshape(x_train,(x_train.shape[0],x_train.shape[1],1))
model=load_model("saved_ltsm_model.h5")
inputs=new_data[len(new_data)-len(valid)-60:].values
inputs=inputs.reshape(-1,1)
inputs=scaler.transform(inputs)
X_test=[]
for i in range(60,inputs.shape[0]):
X_test.append(inputs[i-60:i,0])
X_test=np.array(X_test)
X_test=np.reshape(X_test,(X_test.shape[0],X_test.shape[1],1))
closing_price=model.predict(X_test)
closing_price=scaler.inverse_transform(closing_price)
train=new_data[:987]
valid=new_data[987:]
valid['Predictions']=closing_price
df= pd.read_csv("./stock_data.csv")
app.layout = html.Div([
html.H1("Stock Price Analysis Dashboard", style={"textAlign": "center"}),
dcc.Tabs(id="tabs", children=[
dcc.Tab(label='NSE-TATAGLOBAL Stock Data',children=[
html.Div([
html.H2("Actual closing price",style={"textAlign": "center"}),
dcc.Graph(
id="Actual Data",
figure={
"data":[
go.Scatter(
x=train.index,
y=valid["Close"],
mode='markers'
)
],
"layout":go.Layout(
title='scatter plot',
xaxis={'title':'Date'},
yaxis={'title':'Closing Rate'}
)
}
),
html.H2("LSTM Predicted closing price",style={"textAlign": "center"}),
dcc.Graph(
id="Predicted Data",
figure={
"data":[
go.Scatter(
x=valid.index,
y=valid["Predictions"],
mode='markers'
)
],
"layout":go.Layout(
title='scatter plot',
xaxis={'title':'Date'},
yaxis={'title':'Closing Rate'}
)
}
)
])
]),
dcc.Tab(label='Facebook Stock Data', children=[
html.Div([
html.H1("Facebook Stocks High vs Lows",
style={'textAlign': 'center'}),
dcc.Dropdown(id='my-dropdown',
options=[{'label': 'Tesla', 'value': 'TSLA'},
{'label': 'Apple','value': 'AAPL'},
{'label': 'Facebook', 'value': 'FB'},
{'label': 'Microsoft','value': 'MSFT'}],
multi=True,value=['FB'],
style={"display": "block", "margin-left": "auto",
"margin-right": "auto", "width": "60%"}),
dcc.Graph(id='highlow'),
html.H1("Facebook Market Volume", style={'textAlign': 'center'}),
dcc.Dropdown(id='my-dropdown2',
options=[{'label': 'Tesla', 'value': 'TSLA'},
{'label': 'Apple','value': 'AAPL'},
{'label': 'Facebook', 'value': 'FB'},
{'label': 'Microsoft','value': 'MSFT'}],
multi=True,value=['FB'],
style={"display": "block", "margin-left": "auto",
"margin-right": "auto", "width": "60%"}),
dcc.Graph(id='volume')
], className="container"),
])
])
])
#app.callback(Output('highlow', 'figure'),
[Input('my-dropdown', 'value')])
def update_graph(selected_dropdown):
dropdown = {"TSLA": "Tesla","AAPL": "Apple","FB": "Facebook","MSFT": "Microsoft",}
trace1 = []
trace2 = []
for stock in selected_dropdown:
trace1.append(
go.Scatter(x=df[df["Stock"] == stock]["Date"],
y=df[df["Stock"] == stock]["High"],
mode='lines', opacity=0.7,
name=f'High {dropdown[stock]}',textposition='bottom center'))
trace2.append(
go.Scatter(x=df[df["Stock"] == stock]["Date"],
y=df[df["Stock"] == stock]["Low"],
mode='lines', opacity=0.6,
name=f'Low {dropdown[stock]}',textposition='bottom center'))
traces = [trace1, trace2]
data = [val for sublist in traces for val in sublist]
figure = {'data': data,
'layout': go.Layout(colorway=["#5E0DAC", '#FF4F00', '#375CB1',
'#FF7400', '#FFF400', '#FF0056'],
height=600,
title=f"High and Low Prices for {', '.join(str(dropdown[i]) for i in selected_dropdown)} Over Time",
xaxis={"title":"Date",
'rangeselector': {'buttons': list([{'count': 1, 'label': '1M',
'step': 'month',
'stepmode': 'backward'},
{'count': 6, 'label': '6M',
'step': 'month',
'stepmode': 'backward'},
{'step': 'all'}])},
'rangeslider': {'visible': True}, 'type': 'date'},
yaxis={"title":"Price (USD)"})}
return figure
#app.callback(Output('volume', 'figure'),
[Input('my-dropdown2', 'value')])
def update_graph(selected_dropdown_value):
dropdown = {"TSLA": "Tesla","AAPL": "Apple","FB": "Facebook","MSFT": "Microsoft",}
trace1 = []
for stock in selected_dropdown_value:
trace1.append(
go.Scatter(x=df[df["Stock"] == stock]["Date"],
y=df[df["Stock"] == stock]["Volume"],
mode='lines', opacity=0.7,
name=f'Volume {dropdown[stock]}', textposition='bottom center'))
traces = [trace1]
data = [val for sublist in traces for val in sublist]
figure = {'data': data,
'layout': go.Layout(colorway=["#5E0DAC", '#FF4F00', '#375CB1',
'#FF7400', '#FFF400', '#FF0056'],
height=600,
title=f"Market Volume for {', '.join(str(dropdown[i]) for i in selected_dropdown_value)} Over Time",
xaxis={"title":"Date",
'rangeselector': {'buttons': list([{'count': 1, 'label': '1M',
'step': 'month',
'stepmode': 'backward'},
{'count': 6, 'label': '6M',
'step': 'month',
'stepmode': 'backward'},
{'step': 'all'}])},
'rangeslider': {'visible': True}, 'type': 'date'},
yaxis={"title":"Transactions Volume"})}
return figure
if __name__=='__main__':
app.run_server(debug=True)

Related

Plotly Animation with slider

I want to add two moving points represent the location of two trains according to the day. My day data is as shown in pic starting from 0 to 7. However, in the resulting animation, the slider does not slide into the integer day. It jumped from 1.75 to 2.25 or 2.75 to 3.25 automatically. Can anyone help me to solve that?
trainpath info
import plotly.graph_objects as go
import pandas as pd
dataset = pd.read_csv('trainpath.csv')
days = []
for k in range(len(dataset['day'])):
if dataset['day'][k] not in days:
days.append(dataset['day'][k])
t1 = [-1, 0, 1, 1, 1, 0, -1, -1, -1]
k1 = [-20, -20, -20, 0, 20, 20, 20, 0, -20]
# make list of trains
trains = []
for train in dataset["train"]:
if train not in trains:
trains.append(train)
# make figure
fig_dict = {
"data": [go.Scatter(x=t1, y=k1,
mode="lines",
line=dict(width=2, color="blue")),
go.Scatter(x=t1, y=k1,
mode="lines",
line=dict(width=2, color="blue"))],
"layout": {},
"frames": []
}
# fill in most of layout
fig_dict['layout']['title'] = {'text':'Train Animation'}
fig_dict["layout"]["xaxis"] = {"range": [-10, 10], "title": "xlocation", 'autorange':False, 'zeroline':False}
fig_dict["layout"]["yaxis"] = {"range": [-22, 22], "title": "ylocation", 'autorange':False, 'zeroline':False}
fig_dict["layout"]["hovermode"] = "closest"
fig_dict["layout"]["updatemenus"] = [
{
"buttons": [
{
"args": [None, {"frame": {"duration": 500, "redraw": False},
"fromcurrent": True, "transition": {"duration": 300,
"easing": "quadratic-in-out"}}],
"label": "Play",
"method": "animate"
},
{
"args": [[None], {"frame": {"duration": 0, "redraw": False},
"mode": "immediate",
"transition": {"duration": 0}}],
"label": "Pause",
"method": "animate"
}
],
"direction": "left",
"pad": {"r": 10, "t": 87},
"showactive": False,
"type": "buttons",
"x": 0.1,
"xanchor": "right",
"y": 0,
"yanchor": "top"
}
]
sliders_dict = {
"active": 0,
"yanchor": "top",
"xanchor": "left",
"currentvalue": {
"font": {"size": 20},
"prefix": "Day:",
"visible": True,
"xanchor": "right"
},
"transition": {"duration": 300, "easing": "cubic-in-out"},
"pad": {"b": 10, "t": 50},
"len": 0.9,
"x": 0.1,
"y": 0,
"steps": []
}
# make data
day = 0
for train in trains:
dataset_by_date = dataset[dataset['day']==day]
dataset_by_date_and_train = dataset_by_date[dataset_by_date['train']==train]
data_dict = {
'x': list(dataset_by_date_and_train['x']),
'y': list(dataset_by_date_and_train['y']),
'mode': 'markers',
'text': train,
'marker': {
'sizemode': 'area',
'sizeref': 20,
'size': 20,
# 'size': list(dataset_by_date_and_train['quantity']) # this section can be used to increase or decrease the marker size to reflect the material quantity
},
'name': train
}
fig_dict['data'].append(data_dict)
# make frames
for day in days:
frame={'data': [go.Scatter(x=t1, y=k1,
mode="lines",
line=dict(width=2, color="blue")),
go.Scatter(x=t1, y=k1,
mode="lines",
line=dict(width=2, color="blue"))], 'name':str(day)}
for train in trains:
dataset_by_date = dataset[dataset['day'] == day]
dataset_by_date_and_train = dataset_by_date[dataset_by_date['train'] == train]
data_dict = {
'x': list(dataset_by_date_and_train['x']),
'y': list(dataset_by_date_and_train['y']),
'mode': 'markers',
'text': train,
'marker': {
'sizemode': 'area',
'sizeref': 20,
'size': 20,
# 'size': list(dataset_by_date_and_train['quantity']) # this section can be used to increase or decrease the marker size to reflect the material quantity
},
'name': train
}
frame['data'].append(data_dict)
fig_dict['frames'].append(frame)
slider_step = {'args': [
[day],
{'frame': {'duration':300, 'redraw':False},
'mode': 'immediate',
'transition': {'duration':3000}}
],
'label': day,
'method': 'animate'}
sliders_dict["steps"].append(slider_step)
if day == 7:
print('H')
fig_dict["layout"]["sliders"] = [sliders_dict]
fig = go.Figure(fig_dict)
fig.show()

Data ingestion with dataflow write to bq file error

I'm trying to ingest a csv file into bigquery using apache beam and dataflow here's my code:
import logging
import apache_beam as beam
from apache_beam.options.pipeline_options import PipelineOptions
gcs_bucket_name = "gs://bck-fr-fichiers-manuel-dev/de_par_categorie_et_code_rome/"
target_table_annonce = 'fr-parisraw-dev-8ef8:pole_emploi.de_par_categorie_et_code_rome'
table_schema_annonce = {'fields': [
{'name': 'cd_metier_rome', 'type': 'STRING', 'mode': 'NULLABLE'},
{'name': 'lb_metier_rome', 'type': 'STRING', 'mode': 'NULLABLE'},
{'name': 'cd_departement', 'type': 'STRING', 'mode': 'NULLABLE'},
{'name': 'lb_departement', 'type': 'STRING', 'mode': 'NULLABLE'},
{'name': 'nb_demandeur', 'type': 'STRING', 'mode': 'NULLABLE'},
{'name': 'mois', 'type': 'STRING', 'mode': 'NULLABLE'}
]}
# DoFn
class PrepareBqRowDoFn(beam.DoFn):
def process(self, element, *args, **kwargs):
logging.basicConfig(level=logging.INFO)
DOFN_LOGGER = logging.getLogger("PREPAREBQROWDOFN_LOGGER")
import csv
from datetime import datetime, timedelta
import re
# element = re.sub(r'(?=[^"]+)¤(?=[^"]+)', '', element)
line = csv.reader(element.splitlines(), quotechar='"',
delimiter=';',quoting=csv.QUOTE_ALL, skipinitialspace=True)
for row in line:
try:
bq_row = {"cd_metier_rome": row[0],
"lb_metier_rome": row[1],
"cd_departement": row[2],
"lb_departement": row[3],
"nb_demandeur": row[4],
"mois": row[5]
}
yield bq_row
except IndexError:
DOFN_LOGGER.info("Error Row : " + element)
def run():
pipeline = beam.Pipeline(options=PipelineOptions())
file_patterns = ['de_par_*.csv']
for file_pattern in file_patterns:
csv_lines = pipeline | 'Read File From GCS {}'.format(file_pattern) >> beam.io.ReadFromText(
gcs_bucket_name + file_pattern)
bq_row = csv_lines | 'Create Row {}'.format(file_pattern) >> beam.ParDo(PrepareBqRowDoFn())
bq_row | 'Write to BQ {}'.format(file_pattern) >> beam.io.Write(beam.io.WriteToBigQuery(
target_table_annonce,
schema=table_schema_annonce,
create_disposition=beam.io.BigQueryDisposition.CREATE_IF_NEEDED,
write_disposition=beam.io.BigQueryDisposition.WRITE_APPEND))
pipeline.run()
if __name__ == '__main__':
logging.getLogger().setLevel(logging.INFO)
run()
The pipeline generated looks like this :
Each step I can see the the rows are being treated by Dataflow :
Step 1 (Read File From GCS de_par_*.csv) :
Step 2 (Create Row de_par_*.csv) :
But the final step 3 (Write to BQ de_par_*.csv) :
I get 0 lines

How to use Format for Julia DashTables

I am trying to format numbers in a Julia Dash DataTable so that they are comma separated. However, I cannot get it to work.
(name = id_name, id = id_name, format = (locale = (grouping = [3])))
I have an example below in python where they use this. Could someone show me a working example?
Source: https://dash.plotly.com/datatable/typing
app.layout = html.Div([
dash_table.DataTable(
id='typing_formatting',
data=df_typing_formatting.to_dict('records'),
columns=[{
'id': 'city',
'name': 'City',
'type': 'text'
}, {
'id': 'max',
'name': u'Max Temperature (˚F)',
'type': 'numeric',
'format': Format(
precision=0,
scheme=Scheme.fixed,
symbol=Symbol.yes,
symbol_suffix=u'˚F'
),
# equivalent manual configuration
# 'format': {
# 'locale': {
# 'symbol': ['', '˚F']
# },
# 'specifier': '$.0f'
# }
}, {
'id': 'max_date',
'name': 'Max Temperature (Date)',
'type': 'datetime'
}, {
'id': 'min',
'name': u'Min Temperature (˚F)',
'type': 'numeric',
'format': Format(
nully='N/A',
precision=0,
scheme=Scheme.fixed,
sign=Sign.parantheses,
symbol=Symbol.yes,
symbol_suffix=u'˚F'
),
# equivalent manual configuration
# 'format': {
# 'locale': {
# 'symbol': ['', '˚F']
# },
# 'nully': 'N/A'
# 'specifier': '($.0f'
# }
'on_change': {
'action': 'coerce',
'failure': 'default'
},
'validation': {
'default': None
}
}, {
'id': 'min_date',
'name': 'Min Temperature (Date)',
'type': 'datetime',
'on_change': {
'action': 'none'
}
}]

Jupyter .save_to_html function does not store config

I'm trying to use the .save_to_html() function for a kepler.gl jupyter notebook map.
It all works great inside jupyter, and I can re-load the same map with a defined config.
Where it goes wrong is when I use the save_to_html() function. The map will save to an html, but the configuration reverts to the basic configuration, before I customized it.
Please help! I love kepler, when I solve this little thing, it will be our absolute go-to tool
Thanks
Have tried to change the filters, colours, and point sizes. None of this works.
map_1 = KeplerGl(height=500, data={'data': df},config=config)
map_1
config = map_1.config
config
map_1.save_to_html(data={'data_1': df},
file_name='privateers.html',config=config)
Config
{'version': 'v1',
'config': {'visState': {'filters': [{'dataId': 'data',
'id': 'x8t9c53mf',
'name': 'time_update',
'type': 'timeRange',
'value': [1565687902187.5417, 1565775465282],
'enlarged': True,
'plotType': 'histogram',
'yAxis': None},
{'dataId': 'data',
'id': 'biysqlu36',
'name': 'user_id',
'type': 'multiSelect',
'value': ['HNc0SI3WsQfhOFRF2THnUEfmqJC3'],
'enlarged': False,
'plotType': 'histogram',
'yAxis': None}],
'layers': [{'id': 'ud6168',
'type': 'point',
'config': {'dataId': 'data',
'label': 'Point',
'color': [18, 147, 154],
'columns': {'lat': 'lat', 'lng': 'lng', 'altitude': None},
'isVisible': True,
'visConfig': {'radius': 5,
'fixedRadius': False,
'opacity': 0.8,
'outline': False,
'thickness': 2,
'strokeColor': None,
'colorRange': {'name': 'Uber Viz Qualitative 1.2',
'type': 'qualitative',
'category': 'Uber',
'colors': ['#12939A',
'#DDB27C',
'#88572C',
'#FF991F',
'#F15C17',
'#223F9A'],
'reversed': False},
'strokeColorRange': {'name': 'Global Warming',
'type': 'sequential',
'category': 'Uber',
'colors': ['#5A1846',
'#900C3F',
'#C70039',
'#E3611C',
'#F1920E',
'#FFC300']},
'radiusRange': [0, 50],
'filled': True},
'textLabel': [{'field': None,
'color': [255, 255, 255],
'size': 18,
'offset': [0, 0],
'anchor': 'start',
'alignment': 'center'}]},
'visualChannels': {'colorField': {'name': 'ride_id', 'type': 'string'},
'colorScale': 'ordinal',
'strokeColorField': None,
'strokeColorScale': 'quantile',
'sizeField': None,
'sizeScale': 'linear'}},
{'id': 'an8tbef',
'type': 'point',
'config': {'dataId': 'data',
'label': 'previous',
'color': [221, 178, 124],
'columns': {'lat': 'previous_lat',
'lng': 'previous_lng',
'altitude': None},
'isVisible': False,
'visConfig': {'radius': 10,
'fixedRadius': False,
'opacity': 0.8,
'outline': False,
'thickness': 2,
'strokeColor': None,
'colorRange': {'name': 'Global Warming',
'type': 'sequential',
'category': 'Uber',
'colors': ['#5A1846',
'#900C3F',
'#C70039',
'#E3611C',
'#F1920E',
'#FFC300']},
'strokeColorRange': {'name': 'Global Warming',
'type': 'sequential',
'category': 'Uber',
'colors': ['#5A1846',
'#900C3F',
'#C70039',
'#E3611C',
'#F1920E',
'#FFC300']},
'radiusRange': [0, 50],
'filled': True},
'textLabel': [{'field': None,
'color': [255, 255, 255],
'size': 18,
'offset': [0, 0],
'anchor': 'start',
'alignment': 'center'}]},
'visualChannels': {'colorField': None,
'colorScale': 'quantile',
'strokeColorField': None,
'strokeColorScale': 'quantile',
'sizeField': None,
'sizeScale': 'linear'}},
{'id': 'ilpixu9',
'type': 'arc',
'config': {'dataId': 'data',
'label': ' -> previous arc',
'color': [146, 38, 198],
'columns': {'lat0': 'lat',
'lng0': 'lng',
'lat1': 'previous_lat',
'lng1': 'previous_lng'},
'isVisible': True,
'visConfig': {'opacity': 0.8,
'thickness': 2,
'colorRange': {'name': 'Global Warming',
'type': 'sequential',
'category': 'Uber',
'colors': ['#5A1846',
'#900C3F',
'#C70039',
'#E3611C',
'#F1920E',
'#FFC300']},
'sizeRange': [0, 10],
'targetColor': None},
'textLabel': [{'field': None,
'color': [255, 255, 255],
'size': 18,
'offset': [0, 0],
'anchor': 'start',
'alignment': 'center'}]},
'visualChannels': {'colorField': None,
'colorScale': 'quantile',
'sizeField': None,
'sizeScale': 'linear'}},
{'id': 'inv52pp',
'type': 'line',
'config': {'dataId': 'data',
'label': ' -> previous line',
'color': [136, 87, 44],
'columns': {'lat0': 'lat',
'lng0': 'lng',
'lat1': 'previous_lat',
'lng1': 'previous_lng'},
'isVisible': False,
'visConfig': {'opacity': 0.8,
'thickness': 2,
'colorRange': {'name': 'Global Warming',
'type': 'sequential',
'category': 'Uber',
'colors': ['#5A1846',
'#900C3F',
'#C70039',
'#E3611C',
'#F1920E',
'#FFC300']},
'sizeRange': [0, 10],
'targetColor': None},
'textLabel': [{'field': None,
'color': [255, 255, 255],
'size': 18,
'offset': [0, 0],
'anchor': 'start',
'alignment': 'center'}]},
'visualChannels': {'colorField': None,
'colorScale': 'quantile',
'sizeField': None,
'sizeScale': 'linear'}}],
'interactionConfig': {'tooltip': {'fieldsToShow': {'data': ['time_ride_start',
'user_id',
'ride_id']},
'enabled': True},
'brush': {'size': 0.5, 'enabled': False}},
'layerBlending': 'normal',
'splitMaps': []},
'mapState': {'bearing': 0,
'dragRotate': False,
'latitude': 49.52565611453996,
'longitude': 6.2730441822977845,
'pitch': 0,
'zoom': 9.244725880765998,
'isSplit': False},
'mapStyle': {'styleType': 'dark',
'topLayerGroups': {},
'visibleLayerGroups': {'label': True,
'road': True,
'border': False,
'building': True,
'water': True,
'land': True,
'3d building': False},
'threeDBuildingColor': [9.665468314072013,
17.18305478057247,
31.1442867897876],
'mapStyles': {}}}}
Expected:
Fully configurated map as in Jupyter widget
Actuals
Colors and filters are not configured. Size and position of map is sent along, so if I store it looking at an empty area, when I open the html file it looks at the same field
In the Jupyter user guide for kepler.gl under the save section
# this will save current map
map_1.save_to_html(file_name='first_map.html')
# this will save map with provided data and config
map_1.save_to_html(data={'data_1': df}, config=config, file_name='first_map.html')
# this will save map with the interaction panel disabled
map_1.save_to_html(file_name='first_map.html', read_only=True)
So it looks like its a bug if the configuration parameter doesn't work or you are making the changes to the map configure after you set it equal to config. This would be fixed if you set
map_1.save_to_html(data={'data_1': df},
file_name='privateers.html',config=map_1.config)
I think it is a bug (or feature?) happens when you use the same cell to save the map configuration or still not print the map out yet. Generally, the config only exists after you really print the map out.
the problem, as far as I see it and how I solved at a similar problem is, that you 1) named your 'data' key in instancing the map different than you told it to save in the HTML 2).
map_1 = KeplerGl(height=500, data={'data': df},config=config)
map_1.save_to_html(data={'data_1': df}, file_name='privateers.html',config=config)
Name both keys the same and your HTML file will use the correct configuration.
Had this issue as well. Solved it by converting all pandas column dtypes to those that are json serializable: i.e. converting 'datetime' column from dtype <m8[ns] to object.

Failing to generate scalar predictions from NuPIC CLA model

I'm failing to get scalar predictions out of a CLA model.
Here's a self-contained example. It uses config to create a model using the ModelFactory. Then it trains it with a simple data set ({input_field=X, output_field=X} where X is random between 0-1). Then it attempts to extract predictions with input of the form {input_field=X, output_field=None}.
#!/usr/bin/python
import random
from nupic.frameworks.opf.modelfactory import ModelFactory
config = {
'model': "CLA",
'version': 1,
'modelParams': {
'inferenceType': 'NontemporalClassification',
'sensorParams': {
'verbosity' : 0,
'encoders': {
'_classifierInput': {
'classifierOnly': True,
'clipInput': True,
'fieldname': u'output_field',
'maxval': 1.0,
'minval': 0.0,
'n': 100,
'name': '_classifierInput',
'type': 'ScalarEncoder',
'w': 21},
u'input_field': {
'clipInput': True,
'fieldname': u'input_field',
'maxval': 1.0,
'minval': 0.0,
'n': 100,
'name': u'input_field',
'type': 'ScalarEncoder',
'w': 21},
},
},
'spEnable': False,
'tpEnable' : False,
'clParams': {
'regionName' : 'CLAClassifierRegion',
'clVerbosity' : 0,
'alpha': 0.001,
'steps': '0',
},
},
}
model = ModelFactory.create(config)
ROWS = 100
def sample():
return random.uniform(0.0, 1.0)
# training data is {input_field: X, output_field: X}
def training():
for r in range(ROWS):
value = sample()
yield {"input_field": value, "output_field": value}
# testing data is {input_field: X, output_field: None} (want output_field predicted)
def testing():
for r in range(ROWS):
value = sample()
yield {"input_field": value, "output_field": None}
model.enableInference({"predictedField": "output_field"})
model.enableLearning()
for row in training():
model.run(row)
#model.finishLearning() fails in clamodel.py
model.disableLearning()
for row in testing():
result = model.run(row)
print result.inferences # Shows None as value
The output I see is high confidence None rather than what I expect, which is something close to the input value (since the model was trained on input==output).
{'multiStepPredictions': {0: {None: 1.0}}, 'multiStepBestPredictions': {0: None}, 'anomalyScore': None}
{'multiStepPredictions': {0: {None: 0.99999999999999978}}, 'multiStepBestPredictions': {0: None}, 'anomalyScore': None}
{'multiStepPredictions': {0: {None: 1.0000000000000002}}, 'multiStepBestPredictions': {0: None}, 'anomalyScore': None}
{'multiStepPredictions': {0: {None: 1.0}}, 'multiStepBestPredictions': {0: None}, 'anomalyScore': None}
'NontemporalClassification' seems to be the right inferenceType, because it's a simple classification. But does that work with scalars?
Is there a different way of expressing that I want a prediction other than output_field=None?
I need output_field to be classifierOnly=True. Is there related configuration missing or wrong?
Thanks for your help.
Here's the working example. The key changes were
Use TemporalMultiStep as recommended by #matthew-taylor (adding required parameters)
Use "implementation": "py" in clParams. My values are in the range 0.0-1.0. The fast classifier always returns None for values in that range. The same code with "py" implementation returns valid values. Change the range to 10-100 and the fast algorithm returns valid values also. It was this change that finally produced non-None results.
Less significant than #2, in order to improve the results I repeat each training row in order to let it sink in, which makes sense for training.
To see the classifier bug, comment out line 19 "implementation": "py". The results will be None. Then change MIN_VAL to 10 and MAX_VAL to 100 and watch the results come back.
#!/usr/bin/python
import random
from nupic.frameworks.opf.modelfactory import ModelFactory
from nupic.support import initLogging
from nupic.encoders import ScalarEncoder
import numpy
MIN_VAL = 0.0
MAX_VAL = 1.0
config = {
'model': "CLA",
'version': 1,
'predictAheadTime': None,
'modelParams': {
'clParams': {
"implementation": "py", # cpp version fails with small numbers
'regionName' : 'CLAClassifierRegion',
'clVerbosity' : 0,
'alpha': 0.001,
'steps': '1',
},
'inferenceType': 'TemporalMultiStep',
'sensorParams': {
'encoders': {
'_classifierInput': {
'classifierOnly': True,
'clipInput': True,
'fieldname': 'output_field',
'maxval': MAX_VAL,
'minval': MIN_VAL,
'n': 200,
'name': '_classifierInput',
'type': 'ScalarEncoder',
'w': 21},
u'input_field': {
'clipInput': True,
'fieldname': 'input_field',
'maxval': MAX_VAL,
'minval': MIN_VAL,
'n': 100,
'name': 'input_field',
'type': 'ScalarEncoder',
'w': 21},
},
'sensorAutoReset' : None,
'verbosity' : 0,
},
'spEnable': True,
'spParams': {
'columnCount': 2048,
'globalInhibition': 1,
'spatialImp': 'cpp',
},
'tpEnable' : True,
'tpParams': { 'activationThreshold': 12,
'cellsPerColumn': 32,
'columnCount': 2048,
'temporalImp': 'cpp',
},
'trainSPNetOnlyIfRequested': False,
},
}
# end of config dictionary
model = ModelFactory.create(config)
TRAINING_ROWS = 100
TESTING_ROWS = 100
def sample(r = 0.0):
return random.uniform(MIN_VAL, MAX_VAL)
def training():
for r in range(TRAINING_ROWS):
value = sample(r / TRAINING_ROWS)
for rd in range(5):
yield {
"input_field": value,
"output_field": value,
'_reset': 1 if (rd==0) else 0,
}
def testing():
for r in range(TESTING_ROWS):
value = sample()
yield {
"input_field": value,
"output_field": None,
}
model.enableInference({"predictedField": "output_field"})
for row in training():
model.run(row)
for row in testing():
result = model.run(row)
prediction = result.inferences['multiStepBestPredictions'][1]
if prediction==None:
print "Input %f, Output None" % (row['input_field'])
else:
print "Input %f, Output %f (err %f)" % (row['input_field'], prediction, prediction - row['input_field'])
The inferenceType you want is TemporalMultistep.
See this example for a complete walkthrough.