How to concat two bar charts in Altair with space between series? - slider

I have the following code to generate two bar charts. The first one is a "Central" scenario that needs to be always visible. The second represents multiple stress scenarios with values depending on two sliders.
My problem is to concat the two charts, letting spaces between the two series and making them visible in any cases (like a grouped bar chart).
Here is my code :
import altair as alt
from vega_datasets import data
pvfp=Res.loc[(Res.Item=="PVFP")&(Res.annee>0)]
base = alt.Chart(pvfp, width=500, height=300).mark_bar(color="Green").encode(
x=alt.X('annee:Q'),
y='valeur:Q',
tooltip="valeur:Q"
)
central = alt.Chart(pvfp.loc[(Res.TS=='Central')&(Res.TRA=='Central')], width=500, height=300).mark_bar().encode(
x=alt.X('annee:Q'),
y='valeur:Q',
tooltip="valeur:Q"
)
# A slider filter
TRA_slider = alt.binding_range(min=-40, max=20, step=10,name="Sensi TRA :")
TS_slider = alt.binding_range(min=-20, max=20, step=5,name="Sensi TS : ")
slider1 = alt.selection_single(bind=TRA_slider, fields=['TRA2'],init={'TRA2': 0})
slider2 = alt.selection_single(bind=TS_slider, fields=['TS2'],init={'TS2': 0})
filter_TRA = base.add_selection(
slider1,slider2
).transform_filter(
slider1&slider2
).properties(title="Sensi_TRA")
central + filter_TRA
And a view of the chart I obtain for now :
If you have any idea of a way to do that, I would be very grateful.
UPDATE :
Here is a reproductible example of the same problem.
import altair as alt
import pandas as pd
from vega_datasets import data
dataset = data.population.url
source=pd.read_json(dataset)
source2=df.loc[df.year==1900]
pink_blue = alt.Scale(domain=('Male', 'Female'),
range=["steelblue", "salmon"])
slider = alt.binding_range(min=1900, max=2000, step=10)
select_year = alt.selection_single(name="year", fields=['year'],
bind=slider, init={'year': 2000})
chart1 = alt.Chart(source).mark_bar().encode(
x=alt.X('age:O', title=None),
y=alt.Y('people:Q', scale=alt.Scale(domain=(0, 12000000))),
).properties(
width=300
).add_selection(
select_year
).transform_filter(
select_year
)
chart2 = alt.Chart(source2).mark_bar(color="green").encode(
x=alt.X('age:O', title=None),
y=alt.Y('people:Q', scale=alt.Scale(domain=(0, 12000000))),
)
chart1+chart2
As described, what I would like is to find a way to separate the two series and obtain an output like in the example mentioned by #joelostblom
Hope it's more clear

You can do this with a combination of bandPaddingInner and xOffset. For example:
import altair as alt
import pandas as pd
from vega_datasets import data
dataset = data.population.url
source=pd.read_json(dataset)
source2=source.loc[source.year==1900]
pink_blue = alt.Scale(domain=('Male', 'Female'),
range=["steelblue", "salmon"])
slider = alt.binding_range(min=1900, max=2000, step=10)
select_year = alt.selection_single(name="year", fields=['year'],
bind=slider, init={'year': 2000})
chart1 = alt.Chart(source).mark_bar(
xOffset=-3
).encode(
x=alt.X('age:O', title=None),
y=alt.Y('people:Q', scale=alt.Scale(domain=(0, 12000000))),
).properties(
width=300
).add_selection(
select_year
).transform_filter(
select_year
)
chart2 = alt.Chart(source2).mark_bar(
xOffset=5,
color="green",
).encode(
x=alt.X('age:O', title=None),
y=alt.Y('people:Q', scale=alt.Scale(domain=(0, 12000000))),
)
(chart1+chart2).configure_scale(bandPaddingInner=0.6)

Related

3d interactive graph linked with three sliders

I’m trying to create a 3d interactive graph linked with three sliders. I used dash plotly. But when I run this code, I get a blank 2d graph with sliders. If anyone can help me to find mistakes in my code, it would be very helpful. Thank you
I coded this in python.
Below is my data alonwith code
|A |C |B|
|191|11870402.57|150927.156|
|194|11534176.96|150926.613|
|200|8791715.569|150309.893|
|219|9058784.693|130344.409|
|193|11710374.76|150993.204|
|230|8966576.793|121803.204|
|196|11563137.82|147352.525|
|197|11559778.19|147360.662|
|232|8145250.015|134850.363|
|230|8960357.94|122119.87|
|241|8343604.908|118177.929|
'''
from dash import Dash, dcc, html, Input, Output
import plotly.express as px
import pandas as pd
app = Dash(name)
app.layout = html.Div([
html.H4(‘Illustrations’),
dcc.Graph(id=“graph_scatter”),
html.P(“A:”),
dcc.Slider(
id=‘3d-scatter-plot-x-range-slider’,
min=df[‘A’].min(), max=df[‘A’].max(),
value=df[‘A’].max()),
html.P(“B:”),
dcc.Slider(
id=‘3d-scatter-plot-y-range-slider’,
min=df[‘B’].min(), max=df[‘B’].max(),
value=df[‘B’].max()),
html.P(“C:”),
dcc.Slider(
id=‘3d-scatter-plot-z-range-slider’,
min=df[‘C’].min(), max=df[‘C’].max(),
value=df[‘C’].max())
])
#app.callback(
Output(‘graph’, ‘figure’),
[Input(‘3d-scatter-plot-x-range-slider’, ‘value’),
Input(‘3d-scatter-plot-y-range-slider’, ‘value’),
Input(‘3d-scatter-plot-z-range-slider’, ‘value’)
])
def update_bar_chart(slider_range_x, slider_range_y, slider_range_z):
df = pd.read_csv(‘ABC.csv’) # replace with your own data source
low_x, high_x = slider_range_x
low_y, high_y = slider_range_y
low_z, high_z = slider_range_z
mask = (df.A > low_x) &
(df.A < high_x) & (df.B > low_y) & (df.B < high_y) & (df.C > low_z) & (df.C <
high_z)
fig = px.scatter_3d(mask,
x ='A', z='C',y='B')
return fig
if name == “main”:
app.run_server(debug=False)
'''
I see some problem in your code.
First: You set id in your dcc.Graph is graph_scatter but in you callback you set it is graph
Second: You are using Slider so that you can not change the low_x, low_y, low_z but high_x, high_y, high_y. So that you can not use something like low_x, high_x = slider_range_x. Based on your code I revised as below:
from dash import Dash, dcc, html, Input, Output
import plotly.express as px
import pandas as pd
import dash_bootstrap_components as dbc
app = Dash(__name__, external_stylesheets=[dbc.themes.LUX])
app.layout = html.Div([
html.H4('Illustrations'),
dcc.Graph(id='graph_scatter',figure={}),
html.P('A:'),
dcc.Slider(
id='3d-scatter-plot-x-range-slider',
min=df['A'].min(), max=df['A'].max(),
value=df['A'].max()),
html.P('B:'),
dcc.Slider(
id='3d-scatter-plot-y-range-slider',
min=df['B'].min(), max=df['B'].max(),
value=df['B'].max()),
html.P('C:'),
dcc.Slider(
id='3d-scatter-plot-z-range-slider',
min=df['C'].min(), max=df['C'].max(),
value=df['C'].max())
])
#app.callback(
Output('graph_scatter', 'figure'),
[Input('3d-scatter-plot-x-range-slider', 'value'),
Input('3d-scatter-plot-y-range-slider', 'value'),
Input('3d-scatter-plot-z-range-slider', 'value')
])
def update_bar_chart(slider_range_x, slider_range_y, slider_range_z):
high_x = slider_range_x
high_y = slider_range_y
high_z = slider_range_z
dff = df[(df['A'] < high_x)&(df['B'] < high_y)&(df['C'] < high_z)]
print(high_x)
fig = px.scatter_3d(dff,x ='A', z='C',y='B')
return fig
if __name__ == "__main__":
app.run_server(debug=False)

Altering the X-axis in Altair

I'd like to fill the charts with selectors like the example below. Any tips on how to get this to work in a faceted chart?
np.random.seed(42)
source = pd.DataFrame(np.cumsum(np.random.rand(8, 4), 0).round(2),
columns=['A', 'B', 'C', 'D'], index=pd.RangeIndex(8, name='x'))
source = source.reset_index().melt('x', var_name='category', value_name='y')
xRange= pd.DataFrame(np.linspace(min(source['x']), max(source['x']), num=100), columns=['x'])
pts = alt.selection_multi(fields=['x'], nearest=True, on='click',empty='none')
# The basic line
main = alt.Chart(source).mark_line(interpolate='basis').encode(
x='x:Q',
y='y:Q',
).transform_filter(
alt.FieldEqualPredicate(field='category', equal='A')
)
line = alt.Chart(source).mark_line(color='Maroon').encode(
x='x:Q',
y='y:Q',
).transform_filter(
alt.FieldEqualPredicate(field='category', equal='B')
)
# Transparent selectors across the chart. This is what tells us
# the x-value of the cursor
selectors = alt.Chart(xRange).mark_rule(size=2).encode(
x='x:Q',
#y='y:Q',
#opacity=alt.value(0.4),
opacity = alt.condition(pts, alt.value(1.0), alt.value(0.2))
).add_selection(pts)
position = alt.Chart(xRange).mark_text(
align='right', dy=140, dx=-8, fontSize=14).encode(
x=alt.X('x'),
text=alt.Text('x',format='.1f')
).transform_filter(pts)
alt.vconcat(
main + selectors + position,
line + selectors + position
)
But ideally using facet, however i have not found a way around that you can only use a single DataFrame/source. Is there a way to use alt.sequence of impute to generate additional points on the x-axis?
pts = alt.selection_multi(fields=['x'], nearest=True, on='click',empty='none')
# The basic line
line = alt.Chart().mark_line(interpolate='basis').encode(
x='x:Q',
y='y:Q',
)
# Transparent rules across the chart.
rules = alt.Chart().mark_rule(size=2).encode(
x='x:Q',
opacity = alt.condition(pts, alt.value(1.0), alt.value(0.3))
).add_selection(pts)
text = alt.Chart().mark_text(
align='right', dy=140, dx=-8, fontSize=14).encode(
x=alt.X('x'),
text=alt.Text('x',format='.1f')
).transform_filter(pts)
alt.layer(line, rules, text, data=source).facet(
'category:N',
columns=2
)
You can use the sequence generator. It is almost the same to what you had already:
import numpy as np
import pandas as pd
import altair as alt
np.random.seed(42)
source = pd.DataFrame(np.cumsum(np.random.rand(8, 4), 0).round(2),
columns=['A', 'B', 'C', 'D'], index=pd.RangeIndex(8, name='x'))
source = source.reset_index().melt('x', var_name='category', value_name='y')
# xRange= pd.DataFrame(np.linspace(min(source['x']), max(source['x']), num=100), columns=['x'])
xRange = alt.sequence(0, 7.1, 0.1, as_='x')
pts = alt.selection_multi(fields=['x'], nearest=True, on='mouseover',empty='none')
# The basic line
line = alt.Chart().mark_line(interpolate='linear').encode(
x='x:Q',
y='y:Q',
)
# Transparent rules across the chart.
rules = alt.Chart(xRange).mark_rule(size=2).encode(
x='x:Q',
opacity = alt.condition(pts, alt.value(1.0), alt.value(0.3))
).add_selection(pts)
text = alt.Chart(xRange).mark_text(
align='right', dy=140, dx=-8, fontSize=14).encode(
x=alt.X('x:Q'),
text=alt.Text('x:Q',format='.1f')
).transform_filter(pts)
alt.layer(line, rules, text, data=source).facet(
'category:N',
columns=2
)

Filter Dataframe Using Bokeh Dropdown Widget/CustomJS

I have to make a standalone html dashboard so I'm trying to figure out how to add a callback to a bokeh dropdown widget using CustomJS. Problem is even after consulting other posts on variations on the subject I still can't figure it out. Any help would be appreciated! Ultimately, I would use the dropdown to filter a stacked bar chart, but I want to take a stab at figuring that out on my own after messing with filtering the datatable first.
I've consulted Filtering dataframe using Bokeh/Widget/Callback, Bokeh datatable filtering inconsistency, and Filtering dataframe using Bokeh/Widget/Callback, and Python bokeh CustomJS callback update DataTable widget. Additionally, I've been going through the docs at https://docs.bokeh.org/en/1.3.4/docs/user_guide/interaction/callbacks.html#userguide-interaction-jscallbacks,
import pandas as pd
from bokeh.models.widgets import Dropdown
from bokeh.layouts import widgetbox
from bokeh.models import ColumnDataSource, DataTable, TableColumn, CustomJS
from bokeh.io import show, output_file, output_notebook, reset_output
raw_data = {'ORG': ['APPLE', 'ORANGE', 'MELON'],
'APPROVED': [5, 10, 15],
'CREATED': [1, 3, 5],
'INPROCESS': [4,2,16]}
df = pd.DataFrame(raw_data)
# create list of orgs to use later
org_l = list(df['ORG'].unique())
# create CDS for source
src = ColumnDataSource(df)
# create cols
table_columns = [TableColumn(field = Ci, title = Ci) for Ci in df.columns]
# create filtered table
filtered_df = df.loc[df['ORG']=='f']
# create CDS for filtered source
new_src = ColumnDataSource(filtered_df)
# create dropdown
dropdown = Dropdown(label="Dropdown button", button_type="warning", menu = org_l)
callback_code = """"
var data = src.data;
var new_data = new_src.data;
var f = cb_obj.value;
var list = org_l;
if var i = org_list[i] {
new_src.data = src.data
}
"""
callback=CustomJS(args=dict(dropdown = dropdown,source=src),
code=callback_code)
# create table
member_table = DataTable(source = new_src, columns = table_columns)
dropdown.js_on_change('value', callback)
show(widgetbox(dropdown, member_table))
''''
The only Bokeh object accessible as named variables in the JS code, are those which you explicitly include in the args dict. That is the purpose of the args dict, to automagically make the JavaScript counterparts of Python Bokeh objects easily accessible. Browsers do not know anything about Python or the Python variables in your script. You reference src, new_src in your JS code, but have not passed any of these in the args dict. Also the plain Python value org_l will need to be included literally in the JS code text, with string text formatting, e.g. with the % operator.
You also have a syntax error in the JS code, which is reported in the Browser's JS console (which you should become familiar with as it is the best tool for debugging these issues). This is not valid JS code:
if var i = org_list[i] {
new_src.data = src.data
}
Ok with a lot of trial and error and with pointers from bigreddot I got the below to work.
import pandas as pd
from bokeh.models.widgets import Select
from bokeh.layouts import widgetbox
from bokeh.models import ColumnDataSource, DataTable, TableColumn, CustomJS
from bokeh.io import show, output_file, output_notebook, reset_output
from bokeh.layouts import row, column, layout
raw_data = {'ORG': ['APPLE', 'ORANGE', 'MELON'],
'APPROVED': [5, 10, 15],
'CREATED': [1, 3, 5],
'INPROCESS': [4,2,16]}
df = pd.DataFrame(raw_data)
# create CDS for source
src1 = ColumnDataSource(df)
# create cols
table_columns1 = [TableColumn(field = Ci, title = Ci) for Ci in df.columns]
# original data table
data_table1 = DataTable(source=src1,
columns=table_columns, width=400, height=280)
# create empty dataframe to hold variables based on selected ORG value
df2 = pd.DataFrame({'status':['APPROVED', 'CREATED', 'INPROCESS'],
'count':[float('nan'), float('nan'), float('nan')]})
# create CDS for empty dataframe
src2 = ColumnDataSource(df2)
# create cols
table_columns2 = [TableColumn(field = Ci, title = Ci) for Ci in df2.columns]
callback = CustomJS(args=dict(src1=src1, src2=src2), code='''
var count = ['APPROVED', 'CREATED', 'INPROCESS'];
if (cb_obj.value != 'Please choose...') {
var org = src1.data['ORG'];
var ind = org.indexOf(cb_obj.value);
for (var i = 0; i < count.length; i++) {
src2.data['count'][i] = src1.data[count[i]][ind];
}
}
else {
for (var i = 0; i < status.length; i++) {
src2.data['status'][i] = undefined;
}
}
src2.change.emit();
''')
options = ['Please choose...'] + list(src1.data['ORG'])
select = Select(title='Test', value=options[0], options=options)
select.js_on_change('value', callback2)
show(column(select, data_table2))

Bokeh: Bad date format?

would anyone advise me how to adjust the X axis to better display the date on this graph?
from math import pi
import pandas as pd
from bokeh.io import show
from bokeh.models import LinearColorMapper, BasicTicker, PrintfTickFormatter, ColorBar
from bokeh.plotting import figure
#cesta k souboru
path = "C://Users//Zemi4//Desktop//zpr3//all2.csv"
#nacteni dataframu
data = pd.read_csv(path, delimiter = ",")
data['Cas'] = data['Cas'].astype(str)
data = data.set_index('Cas')
data.columns.name = 'Mistnost'
times = list(data.index)
rooms = list(data.columns)
df = pd.DataFrame(data.stack(), columns=['float']).reset_index()
colors = ['#440154', '#404387', '#29788E', '#22A784', '#79D151', '#FDE724', '#FCFEA4', '#FBA40A', '#DC5039']
mapper = LinearColorMapper(palette=colors, low=df.float.min(), high=df.float.max())
TOOLS = "hover,save,pan,box_zoom,reset,wheel_zoom"
p = figure(title="Heatmap ({0} - {1})".format(times[0], times[-1]),
x_range=times, y_range=list(reversed(rooms)),
x_axis_location="above", plot_width=1500, plot_height=900,
tools=TOOLS, toolbar_location='below',
tooltips=[('Time: ', '#Cas'), ('Temperature: ', '#float'), ('Room: ', '#Mistnost')],
x_axis_type='datetime')
p.grid.grid_line_color = None
p.axis.axis_line_color = None
p.axis.major_tick_line_color = None
p.axis.major_label_text_font_size = "5pt"
p.axis.major_label_standoff = 0
p.xaxis.major_label_orientation = pi / 3
p.rect(x="Cas", y="Mistnost", width=1, height=1,
source=df,
fill_color={'field': 'float', 'transform': mapper},
line_color=None)
color_bar = ColorBar(color_mapper=mapper, major_label_text_font_size="5pt",
ticker=BasicTicker(desired_num_ticks=len(colors)),
formatter=PrintfTickFormatter(format="%f"),
label_standoff=6, border_line_color=None, location=(0, 0))
p.add_layout(color_bar, 'right')
show(p) # show the pl
Try: p.xaxis[0].ticker.desired_num_ticks = <number_ticks_you_want_to_display>.
Or apply a specific ticker (see Bokeh docs) like you did for the ColorBar.

Concatenate Data From URLS Recursively Inside one DataFrame

I'm trying to create one dataframe with data from multiple urls I'm scraping. The code works however I'm unable to store the data in one DataFrame recursively. The DataFrame (called frame) is replaced with a new url's data each time rather than having the new data concatenated to the same frame. Thank you, I deeply appreciate your help!
import urllib
import re
import json
import pandas
import pylab
import numpy
import matplotlib.pyplot
from pandas import *
from pylab import *
from threading import Thread
import sqlite3
urls = ['http://www.ratemyprofessors.com/ShowRatings.jsp?tid=1176131' , 'http://www.ratemyprofessors.com/ShowRatings.jsp?tid=795226', 'http://www.ratemyprofessors.com/ShowRatings.jsp?tid=1176131' , 'http://www.ratemyprofessors.com/ShowRatings.jsp?tid=1807944', 'http://www.ratemyprofessors.com/ShowRatings.jsp?tid=277459' , 'http://www.ratemyprofessors.com/ShowRatings.jsp?tid=1076779' , 'http://www.ratemyprofessors.com/ShowRatings.jsp?tid=971546']
i=0
regex = '<p class="commentText">(.+?)</p>'
regex2 = '<strong>Easiness</strong><span>(.+?)</span></p>'
regex3 = 'Helpfulness</strong><span>(.+?)</span></p>'
regex4 = 'Clarity</strong><span>(.+?)</span></p>'
regex5 = 'Rater Interest</strong><span>(.+?)</span></p>'
regex6 = '<div class="date">(.+?)</div>'
regex7 = '<div class="class"><p style="word-wrap:break-word;">(.+?)</p>'
regex8 = '<meta name="prof_name" content="(.+?)"/>'
pattern = re.compile(regex)
easiness = re.compile(regex2)
helpfulness = re.compile(regex3)
clarity = re.compile(regex4)
interest = re.compile(regex5)
date = re.compile(regex6)
mathclass = re.compile(regex7)
prof_name = re.compile(regex8)
while i < len(urls):
htmlfile = urllib.urlopen(urls[i])
htmltext = htmlfile.read()
content = re.findall(pattern,htmltext)
Easiness = re.findall(easiness,htmltext)
Helpfulness = re.findall(helpfulness, htmltext)
Clarity = re.findall(clarity, htmltext)
Interest = re.findall(interest, htmltext)
Date = re.findall(date, htmltext)
Class = re.findall(mathclass, htmltext)
PROFNAME=re.findall(prof_name, htmltext)
i+=1
frame = DataFrame({'Comments': content, 'Easiness': Easiness, 'Helpfulness': Helpfulness,
'Clarity': Clarity, 'Rater Interest': Interest, 'Class': Class,
'Date': Date[1:len(Date)], 'Professor': PROFNAME[0]})
print frame
Use pd.concat:
frames = []
while i < len(urls):
htmlfile = urllib.urlopen(urls[i])
htmltext = htmlfile.read()
content = re.findall(pattern,htmltext)
Easiness = re.findall(easiness,htmltext)
Helpfulness = re.findall(helpfulness, htmltext)
Clarity = re.findall(clarity, htmltext)
Interest = re.findall(interest, htmltext)
Date = re.findall(date, htmltext)
Class = re.findall(mathclass, htmltext)
PROFNAME=re.findall(prof_name, htmltext)
i+=1
frames.append(DataFrame({'Comments': content, 'Easiness': Easiness, 'Helpfulness': Helpfulness,
'Clarity': Clarity, 'Rater Interest': Interest, 'Class': Class,
'Date': Date[1:len(Date)], 'Professor': PROFNAME[0]}))
pd.concat(frames)
You are overwriting your frame with each iteration of the loop. As Phillip Cloud suggested, you can make a list of frames that you append with each loop. I simplified your code differently, but I think this gives you what you want.
import urllib
import re
import pandas as pd
urls = ['http://www.ratemyprofessors.com/ShowRatings.jsp?tid=1176131',
'http://www.ratemyprofessors.com/ShowRatings.jsp?tid=795226',
'http://www.ratemyprofessors.com/ShowRatings.jsp?tid=1176131',
'http://www.ratemyprofessors.com/ShowRatings.jsp?tid=1807944',
'http://www.ratemyprofessors.com/ShowRatings.jsp?tid=277459',
'http://www.ratemyprofessors.com/ShowRatings.jsp?tid=1076779',
'http://www.ratemyprofessors.com/ShowRatings.jsp?tid=971546']
regex = {'pattern' : re.compile('<p class="commentText">(.+?)</p>'),
'easiness' : re.compile('<strong>Easiness</strong><span>(.+?)</span></p>'),
'helpfulness' : re.compile('Helpfulness</strong><span>(.+?)</span></p>'),
'clarity' : re.compile('Clarity</strong><span>(.+?)</span></p>'),
'interest' : re.compile('Rater Interest</strong><span>(.+?)</span></p>'),
'date' : re.compile('<div class="date">(.+?)</div>'),
'mathclass' : re.compile('<div class="class"><p style="word-wrap:break-word;">(.+?)</p>'),
'prof_name' : re.compile('<meta name="prof_name" content="(.+?)"/>')}
# Make a dictionary with empty lists using the same keys
d = {}
for k in regex.keys():
d[k] = []
# Now fill those lists
for url in urls:
htmlfile = urllib.urlopen(url)
htmltext = htmlfile.read()
for k, v in regex.iteritems():
d[k].append(re.findall(v, htmltext))
frame = pd.DataFrame(d) # Dump the dict into a DataFrame
print frame