Having a hard time overlaying graphs with plotly.graph_objects - matplotlib

I tried to overlay a 'threashold' line on a bar graph but didn't managed to have a clean graph.
Here is my pandas DataFrame:
import pandas as pd
df = pd.DataFrame({'Nutriment' : ['Glucides (g)', 'Lipides (g)', 'Protéines (g)'],
'ail' : [4, 0.056, 7.82],
'basilic' : [0.05, 0.009, 0.42],
'carotte' : [2.6, 0.1, 3.44],
'citron vert' : [0.063, 0.0, 0.14]})
and this is what I tried to do :
from plotly.subplots import make_subplots
import plotly.graph_objects as go
fig = make_subplots()
fig.update_layout(xaxis2= {'anchor' : 'y', 'overlaying' : 'x', 'side' : 'bottom'})
ingredients = df.columns[1:]
clrs = {}
n = 100
for ingredient in ingredients :
clrs[ingredient] = 'rgb(0,' + str(n) + ',' + str(n) + ')'
n += 40
for ingredient in ingredients :
fig.add_trace(go.Bar(x=df['Nutriment'],
y=df[ingredient],
name=ingredient,
marker= {'color' : clrs[ingredient]}, xaxis = 'x2'))
fig.add_trace(go.Scatter(x=[0,1], y=[4.5, 4.5], name='Apport recommandé en glucides (min)', line_color='#ff0040'))
fig.add_trace(go.Scatter(x=[1,2], y=[1.9, 1.9], name='Apport recommandé en lipides (min)', line_color='#00ff00'))
fig.add_trace(go.Scatter(x=[2,3], y=[1, 1], name='Apport recommandé en protéines (min)', line_color='#0000f0'))
fig.update_layout(width=800, height=500, title_text='Apports nutritionnels par portion', barmode='stack')
this is what I get
this is what I expect :
I would like to be able to see the limits (blue, green and red lines) completely
I would like to reset the origin of the grid at (0,0)
and I would like to delete the xaxis that is 0, 1, 2, 3 only to have "Glucides (g)", "Lipides(g)" and "Protéines (g)".
Could somebody help me ? Thanks !!!

I think the best way to resolve the mix of numerical and categorical axes would be to code the categorical variable and once the x-axis is on the numerical axis, then update that numerical axis to a string scale at the end.
df['Nutriment'] = df['Nutriment'].astype('category')
df['Nutriment_cd'] = df['Nutriment'].astype('category').cat.codes
from plotly.subplots import make_subplots
import plotly.graph_objects as go
fig = make_subplots()#specs=[[{"secondary_y": True}]]
ingredients = df.columns[1:]
clrs = {}
n = 100
for ingredient in ingredients :
clrs[ingredient] = 'rgb(0,' + str(n) + ',' + str(n) + ')'
n += 40
for ingredient in ingredients :
fig.add_trace(go.Bar(x=df['Nutriment_cd'],
y=df[ingredient],
name=ingredient,
marker= {'color' : clrs[ingredient]}, xaxis = 'x2'), secondary_y=False)
fig.add_trace(go.Scatter(x=[-0.5,0.5], y=[4.5, 4.5], name='Apport recommandé en glucides (min)', line_color='#ff0040'))
fig.add_trace(go.Scatter(x=[0.5,1.5], y=[1.9, 1.9], name='Apport recommandé en lipides (min)', line_color='#00ff00'))
fig.add_trace(go.Scatter(x=[1.5,2.5], y=[1, 1], name='Apport recommandé en protéines (min)', line_color='#0000f0'))
fig.update_layout(xaxis2= {'anchor' : 'y', 'overlaying' : 'x', 'side' : 'bottom'})
fig.update_layout(width=800, height=500, title_text='Apports nutritionnels par portion', barmode='stack')
fig.update_xaxes(tickvals=[0,1,2], ticktext=df['Nutriment'].tolist())
fig.show()

Related

plotly layout issues with dropdown not placed next to subplots

I am trying to place two subplots beside a dropdown but for some reason they keep going to the next row. Could someone tell me what I'm doing wrong?
import pandas as pd
pd.set_option('display.max_rows', None)
# Run this app with `python app.py`
from dash import Dash, dcc, html
#import plotly.express as px
from plotly.subplots import make_subplots
from plotly import graph_objects as go
import dash_bootstrap_components as dbc
import pandas as pd
Dash(assets_ignore='.*ignored.*')
app = Dash(__name__)
colors = {
'grey_plot_bg':'#353535',
'background': '#111111',
'text': 'teal',
'plottext':'crimson',
"0":"silver",
"1":"#FBEC5D",
"5":"#50C878",
"10":"#40E0D0",
"15":"#A23D60",
0:"silver",
1:"#FBEC5D",
5:"#50C878",
10:"#40E0D0",
15:"#A23D60",
'Linux':'#50C878',
'Windows':'#66D3F4',
'Primer':'#FFD700',
}
# pandas dataframe used for plots
current_merged_df = pd.DataFrame({
'student': ['scooby','doobie','doo'],
'completed' : [ 55 , 55 , 100 ]
})
students_topics_earned = pd.DataFrame({
'topic': ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J','A','A','B','B','C','C'],
'completed' : [ 55 , 55 , 100 , 95 , 45 , 99 , 75 , 64 , 93 , 10 , 15 , 55 , 45 , 78 , 98 , 33 ],
'platform' : ['Primer','Primer','Primer','Primer','Primer','Primer','Primer','Primer', 'Primer', 'Primer','Linux','Windows','Linux','Windows','Linux','Windows']
})
# add the average scatter polar graphs for the class
class_category_earned = students_category_earned.groupby(['platform','Category']).mean().round().reset_index()
class_topics_earned = students_topics_earned.groupby(['platform','topic']).mean().round().reset_index()
# functions to creating the plots
def student_topic_scatter_polar_graph(df,platform):
print(df)
df_filtered = df[df['platform']==platform].sort_values(by='topic',ascending=True)
color = colors[platform]
fig = go.Scatterpolar(
r=df_filtered.completed,
theta=df_filtered.topic,
fill='toself',
name="%s - Focused Topics"%platform,
fillcolor=color,
opacity=0.6,
line=dict(color=color),
mode='markers'
)
return fig
fig = go.Figure()
# calling the plot functions
class_linux_topic_scatter_polar_fig = student_topic_scatter_polar_graph(class_topics_earned,'Linux')
class_windows_topic_scatter_polar_fig = student_topic_scatter_polar_graph(class_topics_earned,'Windows')
class_Primer_topic_scatter_polar_fig = student_topic_scatter_polar_graph(class_topics_earned,'Primer')
class_topic_subplot = make_subplots(rows=2, cols=2,
subplot_titles=['Primer','Linux','Windows'],
specs=[
[{"rowspan": 1,"colspan":2,"type": "polar"},None],
[ {"rowspan": 1,"type": "polar"}, {"rowspan": 1,"type": "polar"}]
])
class_topic_subplot.add_trace(class_Primer_topic_scatter_polar_fig,row=1,col=1)
class_topic_subplot.add_trace(class_windows_topic_scatter_polar_fig,row=2,col=1)
class_topic_subplot.add_trace(class_linux_topic_scatter_polar_fig,row=2,col=2)
class_topic_subplot.update_layout(
autosize=False,
width=800,
height=600,)
def populate_student_selection_dropdown(current_merged_df):
if current_merged_df is not None:
merged_df = pd.DataFrame.from_records(current_merged_df)
return [s for s in merged_df['student'].unique()]
student_selection_card = dbc.Card(
[
html.H6("Select Individual Students for Analysis:",className="card-text"),
dcc.Dropdown(['scoobie','doobie','doo'],id="student_selection_dropdown",multi=True,style={'display':False},persistence=True)
]
)
# creating card that the subplots will be in
card2 = dbc.Card(
[
dbc.CardBody([
html.H4("Student Name", className="card-title"),
dcc.Graph(figure=class_topic_subplot),
])
]
)
# second page layout
class_analysis_layout = dbc.Container([
dbc.Row(
[
dbc.Col([card2],width=3),
dbc.Col([student_selection_card],width=3),
dbc.Col([card2],width=3)
])
])
app.layout = html.Div([
class_analysis_layout
],
)
if __name__ == '__main__':
app.run_server(host="0.0.0.0", port=8070, debug=True)
If you take a look at the class_analysis_layout, that is where I'm having the issue, I believe. I looked to see if there was a width property for the dropdown element but did not find own. I would have thought that setting the elements up in columns and setting the column width would do it but no luck. Any advice?

Multiple grouped charts with altair

My data has 4 attributes: dataset (D1/D2), model (M1/M2), layer (L1/L2), scene (S1/S2). I can make a chart grouped by scenes and then merge plots horizontally and vertically (pic above).
However, I would like to have 'double grouping' by scene and dataset, like merging the D1 and D2 plots by placing blue/orange bars from next to each other but with different opacity or pattern/hatch.
Basically something like this (pretend that the black traits are a hatch pattern).
Here is the code to reproduce the first plot
import numpy as np
import itertools
import argparse
import pandas as pd
import matplotlib.pyplot as plt
import os
import altair as alt
alt.renderers.enable('altair_viewer')
np.random.seed(0)
################################################################################
model_keys = ['M1', 'M2']
data_keys = ['D1', 'D2']
scene_keys = ['S1', 'S2']
layer_keys = ['L1', 'L2']
ys = []
models = []
dataset = []
layers = []
scenes = []
for sc in scene_keys:
for m in model_keys:
for d in data_keys:
for l in layer_keys:
for s in range(10):
data_y = list(np.random.rand(10) / 10)
ys += data_y
scenes += [sc] * len(data_y)
models += [m] * len(data_y)
dataset += [d] * len(data_y)
layers += [l] * len(data_y)
# ------------------------------------------------------------------------------
df = pd.DataFrame({'Y': ys,
'Model': models,
'Dataset': dataset,
'Layer': layers,
'Scenes': scenes})
bars = alt.Chart(df, width=100, height=90).mark_bar().encode(
# field to group columns on
x=alt.X('Scenes:N',
title=None,
axis=alt.Axis(
grid=False,
title=None,
labels=False,
),
),
# field to use as Y values and how to calculate
y=alt.Y('Y:Q',
aggregate='mean',
axis=alt.Axis(
grid=True,
title='Y',
titleFontWeight='normal',
),
),
# field to use for sorting
order=alt.Order('Scenes',
sort='ascending',
),
# field to use for color segmentation
color=alt.Color('Scenes',
legend=alt.Legend(orient='bottom',
padding=-10,
),
title=None,
),
)
error_bars = alt.Chart(df).mark_errorbar(extent='ci').encode(
x=alt.X('Scenes:N'),
y=alt.Y('Y:Q'),
)
text = alt.Chart(df).mark_text(align='center',
baseline='line-bottom',
color='black',
dy=-5 # y-shift
).encode(
x=alt.X('Scenes:N'),
y=alt.Y('mean(Y):Q'),
text=alt.Text('mean(Y):Q', format='.1f'),
)
chart_base = bars + error_bars + text
chart_base = chart_base.facet(
# field to use to use as the set of columns to be represented in each group
column=alt.Column('Layer:N',
# header=alt.Header(
# labelFontStyle='bold',
# ),
title=None,
sort=list(set(models)), # get unique indices
),
spacing={"row": 0, "column": 15},
)
def unique(sequence):
seen = set()
return [x for x in sequence if not (x in seen or seen.add(x))]
for i, m in enumerate(unique(models)):
chart_imnet = chart_base.transform_filter(
alt.FieldEqualPredicate(field='Dataset', equal='D1'),
).transform_filter(
alt.FieldEqualPredicate(field='Model', equal=m)
)
chart_places = chart_base.transform_filter(
alt.FieldEqualPredicate(field='Dataset', equal='D2')
).transform_filter(
alt.FieldEqualPredicate(field='Model', equal=m)
)
if i == 0:
title_params = dict({'align': 'center', 'anchor': 'middle', 'dy': -10})
chart_imnet = chart_imnet.properties(title=alt.TitleParams('D1', **title_params))
chart_places = chart_places.properties(title=alt.TitleParams('D2', **title_params))
chart_places = alt.concat(chart_places,
title=alt.TitleParams(
m,
baseline='middle',
orient='right',
anchor='middle',
angle=90,
# dy=10,
dx=30 if i == 0 else 0,
),
)
if i == 0:
chart = (chart_imnet | chart_places).resolve_scale(x='shared')
else:
chart = (chart & (chart_imnet | chart_places).resolve_scale(x='shared'))
chart.save('test.html')
For now, I don't know a good answer, but once https://github.com/altair-viz/altair/pull/2528 is accepted you can use the xOffset encoding channel as such:
alt.Chart(df, height=90).mark_bar(tooltip=True).encode(
x=alt.X("Scenes:N"),
y=alt.Y("mean(Y):Q"),
color=alt.Color("Scenes:N"),
opacity=alt.Opacity("Dataset:N"),
xOffset=alt.XOffset("Dataset:N"),
column=alt.Column('Layer:N'),
row=alt.Row("Model:N")
).resolve_scale(x='independent')
Which will result in:
See Colab Notebook or Vega Editor
EDIT
To control the opacity and legend names one can do as such
alt.Chart(df, height=90).mark_bar(tooltip=True).encode(
x=alt.X("Scenes:N"),
y=alt.Y("mean(Y):Q"),
color=alt.Color("Scenes:N"),
opacity=alt.Opacity("Dataset:N",
scale=alt.Scale(domain=['D1', 'D2'],
range=[0.2, 1.0]),
legend=alt.Legend(labelExpr="datum.label == 'D1' ? 'D1 - transparent' : 'D2 - full'")),
xOffset=alt.XOffset("Dataset:N"),
column=alt.Column('Layer:N'),
row=alt.Row("Model:N")
).resolve_scale(x='independent')

How to create textbox on figure using first row in geodataframe?

I am looking to plot a textbox on a figure displaying the 5-Day NHC forecast cone for a tropical cyclone, in this case Hurricane Dorian. I have the four shapefiles (track line, cone, points, and watches/warnings). On the figure I want to display the following from the first row of points_gdf (yellow circles in the image; the two commented out lines near the bottom of the code is what I tried initially):
Latest Tracking Information: (regular string; below are variables from points_gdf)
LAT LON
MAXWIND
GUST
MSLP
TCSPD
track_line_gdf = geopandas.read_file('nhc/al052019_5day_037/al052019-037_5day_lin.shp')
cone_gdf = geopandas.read_file('nhc/al052019_5day_037/al052019-037_5day_pgn.shp')
points_gdf = geopandas.read_file('nhc/al052019_5day_037/al052019-037_5day_pts.shp')
ww_gdf = geopandas.read_file('nhc/al052019_5day_037/al052019-037_ww_wwlin.shp')
fig = plt.figure(figsize=(14,12))
fig.set_facecolor('white')
ax = plt.subplot(1,1,1, projection=map_crs)
ax.set_extent([-88,-70,25,50])
ax.add_geometries(cone_gdf['geometry'], crs=data_crs, facecolor='white',
edgecolor='black', linewidth=0.25, alpha=0.4)
ax.add_geometries(track_line_gdf['geometry'], crs=data_crs, facecolor='none',
edgecolor='black', linewidth=2)
sc = ax.scatter(points_gdf['LON'], points_gdf['LAT'], transform=data_crs,
zorder=10, c=points_gdf['MAXWIND'], cmap='jet')
ww_colors = {'Tropical Storm Watch': 'gold',
'Hurricane Watch': 'pink',
'Tropical Storm Warning': 'tab:blue',
'Hurricane Warning': 'tab:red'}
for ww_type in ww_colors.keys():
ww_subset = ww_gdf[ww_gdf['TCWW']==ww_type]
ax.add_geometries(ww_subset['geometry'], facecolor='none',
edgecolor=ww_colors[ww_type], crs=data_crs,
linewidth=5)
markers = [plt.Line2D([0,0],[0,0],color=color, marker='o', linestyle='') for color in ww_colors.values()]
Name = ww_gdf['STORMNAME'][0]
Storm = ww_gdf['STORMTYPE'][0]
AdvDate = ww_gdf['ADVDATE'][0]
AdvNum = ww_gdf['ADVISNUM'][0]
props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
plt.colorbar(sc, label='Wind Speed (mph)')
plt.title(Storm + ' ' + Name + ' - ' + AdvDate + ' Advisory', fontsize=14, fontweight='bold')
plt.legend(markers, ww_colors.keys())
plt.text(0.05, 0.95, 'Testing', transform=ax.transAxes, va='top', bbox=props)
It would help to know either what error you're running into, or what exactly isn't behaving how you want. I can slightly tweak your code to make this:
import cartopy.crs as ccrs
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(14,12))
fig.set_facecolor('white')
ax = plt.subplot(1,1,1, projection=ccrs.LambertConformal())
plt.title('Storm Advisory', fontsize=14, fontweight='bold')
points_gds = pd.DataFrame(dict(GUST=[165.0], LAT=[26.8],
LON=[-78.3], MSLP=[930.2]))
storminfo = f'''Max Wind Gusts: {points_gds.iloc[0]['GUST']:.0f} mph
Current Latitude: {points_gds.iloc[0]['LAT']:.1f}
Current Longitude: {points_gds.iloc[0]['LON']:.1f}
Central Pressure: {points_gds.iloc[0]['MSLP']:.2f} mb'''
props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
plt.text(0.05, 0.95, 'Testing', transform=ax.transAxes, va='top', bbox=props)
ax.coastlines()
ax.set_extent([-88,-70,25,50])
which produces this image:
To make that work I needed to change round (which is a Python built-in function) to the string 'round'. The text is formatted using f-strings ("formatted string literals"), and enclosed as a triple-quoted string to avoid needing to manually put in the newline ('\n') characters. Python's docs can tell you more about how to control the formatting of individual items.

error code : ValueError: x and y must have same first dimension, but have shapes (7,) and (1, 7)

I am trying to do an plt graph. I keep getting the error that x and y do not have the same first dimension. I don't understand why. What am I doing wrong?
I have added more details on the code.
Thanks a lot!
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
dico_dtype={'Country Code':str,'Indicator Name' : str}
liste_annee=['1970', '1971', '1972', '1973', '1974', '1975', '1976', '1977', '1978','1979', '1980', '1981', '1982', '1983', '1984', '1985', '1986', '1987', '1988', '1989', '1990', '1991', '1992', '1993', '1994', '1995', '1996','1997', '1998', '1999', '2000', '2001', '2002', '2003', '2004', '2005','2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014','2015', '2016', '2017', '2020', '2025', '2030', '2035', '2040', '2045','2050', '2055', '2060', '2065', '2070', '2075', '2080', '2085', '2090','2095', '2100']
for annee in liste_annee:
dico_dtype[annee] = np.float
print (dico_dtype)
data = pd.read_csv ('C:/Users/s.guerin/Desktop/OPC/projet edtech/StatsCSV/EdStatsData.csv', dtype=dico_dtype)
datanew=data.dropna(axis=0, thresh=25)
datadrop= data.drop(['1990', '1995','2000', '2005', '2006', '2007','2008','1970','1988','2001','2002','2003','2004' ,'1971','1972','1973','1974','1975','1976','1977','1978','1979','1980','1985','1987', '1989','1991', '1981','1982','1983','1984','1986','1992','1993','1994', '1996','1997', '1998','1999', '2016', '2017', '2020', '2025', '2030', '2035', '2040', '2045','2050', '2055', '2060', '2065', '2070', '2075', '2080', '2085', '2090','2095', '2100'], axis=1)
internet =datadrop.loc[(datadrop['Indicator Name']=="Internet users (per 100 people)"),:]
internet_ok=internet.loc[ (internet['moyenne'] > 61),:]
internet_ok=internet_ok.sort_values(by = '2009')
#courbe evolution => evolution graphic
plt.figure(10, figsize=(50, 20))
plt.xlabel('Année') ; xtitre.set_fontsize(200)
plt.ylabel('%'); ytitre.set_fontsize(200)
titre = plt.title('Evolution par année des utilisateurs Internet en %'); titre.set_fontsize(80)
x=np.arange(2009,2016)
liste_country=['BHS', 'PRI', 'HRV', 'GUM', 'KWT', 'BHR', 'QAT', 'MAC','KNA', 'MYS']
for countrycode in liste_country:
y = internet_ok[internet_ok['Country Code'] == countrycode]
y = y[['2009', '2010','2011','2012', '2013','2014','2015'] ]
y = np.reshape(y.values, (7,))
plt.plot(x,y, label = countrycode)
plt.grid(True)
plt.tick_params(axis = 'both', labelsize = 50)
plt.legend(loc=1, prop={'size': 50})

How to annotate subplots in Plotly inside a for loop

I am trying to annotate my subplots inside a for loop. Each subplot will have RMS value printed on the plot. I tried to do it the following way:
from plotly import tools
figg = tools.make_subplots(rows=4, cols=1)
fake_date = {"X": np.arange(1, 101, 0.5), "Y": np.sin(x), "Z": [x + 1 for x in range(10)] * 20}
fake_date = pd.DataFrame(fake_date)
fake_date.sort_values("Z")
unique_ids = fake_date['Z'].unique()
train_id, test_id = np.split(np.random.permutation(unique_ids), [int(.6 * len(unique_ids))])
for i, j in enumerate(test_id):
x_test = fake_date[fake_date['Z'].isin([test_id[i]])]
y_test = fake_date[fake_date['Z'].isin([test_id[i]])]
# Evaluate
rms_test = 0.04
r_test = 0.9
Real = {'type' : 'scatter',
'x' : x_test.X,
'y' : x_test.Y,
"mode" : 'lines+markers',
"name" : 'Real'}
figg.append_trace(Real, i+1, 1)
figg['layout'].update( annotations=[dict(x = 10,y = 0.2, text= rms_test, xref= "x1",yref="y1")] )
figg['layout'].update(height=1800, width=600, title='Testing')
pyo.iplot(figg)
This does not work, although the answer given here seems to work for others. Can anyone point out what wrong am I doing?
I generated fake date for reproducibility
I am not sure where to exactly place the RMS value, but below is a sample code which will help you achieve what you want.
We create an array annotation_arr where we store the annotations using the for loop.
We need to set the xval and yval for each of the individual axes. Remember, first axis will be x, second will be x2 so, I have written a ternary condition for that, please checkout the below code and let me know if there is any issues!
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs,init_notebook_mode,plot,iplot
from plotly import tools
import numpy as np
import pandas as pd
init_notebook_mode(connected=True)
rows = 4
figg = tools.make_subplots(rows=rows, cols=1)
fake_date = {"X": np.arange(0, 100, 0.5), "Y": [np.sin(x) for x in range(200)], "Z": [x + 1 for x in range(10)] * 20}
fake_date = pd.DataFrame(fake_date)
fake_date.sort_values("Z")
unique_ids = fake_date['Z'].unique()
train_id, test_id = np.split(np.random.permutation(unique_ids), [int(.6 * len(unique_ids))])
top = 0
annotation_arr = []
for i, j in enumerate(test_id):
x_test = fake_date[fake_date['Z'].isin([test_id[i]])]
y_test = fake_date[fake_date['Z'].isin([test_id[i]])]
# Evaluate
rms_test = 0.04
r_test = 0.9
Real = {'type' : 'scatter',
'x' : x_test.X,
'y' : x_test.Y,
"mode" : 'lines+markers',
"name" : 'Real'}
top = top + 1/rows
i_val = "" if i == 0 else i + 1
annotation_arr.append(dict(x = r_test,y = top, text= rms_test, xref= "x"+str(i_val),yref="y"+str(i_val)))
figg.append_trace(Real, i+1, 1)
figg['layout'].update( annotations=annotation_arr )
figg['layout'].update(height=1800, width=600, title='Testing')
iplot(figg)