I have the following dataframe:
import pandas as pd
data = {'country': ['US', 'DE', 'IT', 'US', 'DE', 'IT', 'US', 'DE', 'IT'],
'year': [2000,2000,2000,2001,2001,2001,2002,2002,2002],
'share': [0.5, 0.3, 0.2, 0.6,0.1,0.3,0.4,0.2,0.4]}
data = pd.DataFrame(data)
I want to display the data with a stacked bar chart.
X-axis: year,
Y-axis: share,
Color: country
All the three bars for 2000, 2001 and 2002 should have the same height (for each year, the total of the share == 1)
You can use a pivot and plot.bar with stacked=True:
data.pivot('year', 'country', 'share').plot.bar(stacked=True)
output:
Related
I have a dataset like below.
T/F
Value
category
T
1
A
F
3
B
T
5
C
F
7
A
T
8
B
...
...
...
so, I want to draw a bar chart like below. same categoy has same position
same category has same position, zero centered bar and number of F is bar below the horizontal line, T is upper bar.
How can I make this chart with matplotlib.pyplot? or other library
I need example.
One approach involves making the False values negative, and then creating a Seaborn barplot with T/F as hue. You might want to make a copy of the data if you can't change the original.
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
data = pd.DataFrame({'T/F': ['T', 'F', 'T', 'F', 'T'],
'Value': [1, 3, 5, 7, 8],
'category': ['A', 'B', 'C', 'A', 'B']})
data['Value'] = np.where(data['T/F'] == 'T', data['Value'], -data['Value'])
ax = sns.barplot(data=data, x='category', y='Value', hue='T/F', dodge=False, palette='turbo')
ax.axhline(0, lw=2, color='black')
plt.tight_layout()
plt.show()
I have a pandas dataset with a toy version that can be created with this
#creating a toy pandas dataframe
s1 = pd.Series(['dont have a mortgage',-31.8,'have mortgage',15.65])
s2 = pd.Series(['have utility bill arrears',-21.45,'',0])
s3 = pd.Series(['have interest only mortgage',-19.59,'',0])
s4 = pd.Series(['bank with challenger bank',-19.24,'bank with a traditional bank',32.71])
df = pd.DataFrame([list(s1),list(s2),list(s3),list(s4)], columns = ['label1','value1','label2','value2'])
I want to create a bar chart that looks like this version I hacked together in excel
I want to be able to supply RGB values to customise the two colours for the left and right bars (currently blue and orange)
I tried different versions using “fig.add_trace(go.Bar” but am brand new to plotly and cant get anything to work with different coloured bars on one row with annotation under each bar.
All help greatly appreciated!
thanks
To create a double-sided bar chart, you can create two subplots with shared x- and y-axis. Each subplot is a horizontal bar chart with a specified marker color
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
# define data set
s1 = pd.Series(['dont have a mortgage',-31.8,'have mortgage',15.65])
s2 = pd.Series(['have utility bill arrears',-21.45,'',0])
s3 = pd.Series(['have interest only mortgage',-19.59,'',0])
s4 = pd.Series(['bank with challenger bank',-19.24,'bank with a traditional bank',32.71])
df = pd.DataFrame([list(s1),list(s2),list(s3),list(s4)], columns = ['label1','value1','label2','value2'])
# create subplots
fig = make_subplots(rows=1, cols=2, specs=[[{}, {}]], shared_xaxes=True,
shared_yaxes=True, horizontal_spacing=0)
fig.append_trace(go.Bar(y=df.index, x=df.value1, orientation='h', width=0.4, showlegend=False, marker_color='#4472c4'), 1, 1)
fig.append_trace(go.Bar(y=df.index, x=df.value2, orientation='h', width=0.4, showlegend=False, marker_color='#ed7d31'), 1, 2)
fig.update_yaxes(showticklabels=False) # hide all yticks
The annotations need to be added separately:
annotations = []
for i, row in df.iterrows():
if row.label1 != '':
annotations.append({
'xref': 'x1',
'yref': 'y1',
'y': i,
'x': row.value1,
'text': row.value1,
'xanchor': 'right',
'showarrow': False})
annotations.append({
'xref': 'x1',
'yref': 'y1',
'y': i-0.3,
'x': -1,
'text': row.label1,
'xanchor': 'right',
'showarrow': False})
if row.label2 != '':
annotations.append({
'xref': 'x2',
'yref': 'y2',
'y': i,
'x': row.value2,
'text': row.value2,
'xanchor': 'left',
'showarrow': False})
annotations.append({
'xref': 'x2',
'yref': 'y2',
'y': i-0.3,
'x': 1,
'text': row.label2,
'xanchor': 'left',
'showarrow': False})
fig.update_layout(annotations=annotations)
fig.show()
I'm trying to sort the graph with different colors in the same order as the dataframe, but when I sort the values, the colors don't change.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plot
changelist = (0.1, 0.12, 0.13, -0.1, 0.05, 0.07)
assetlist = ('a', 'b', 'c', 'd', 'e', 'f')
clrs = ('yellow', 'green', 'blue', 'blue', 'green', 'yellow')
data = {"Assets":assetlist,
"Change":changelist,
"Colors":clrs,
}
dataFrame = pd.DataFrame(data=data)
dataFrame.sort_values("Change", ascending=False)
dataFrame.plot.bar(x="Assets", y="Change", rot=90, title="Desempeño Principales Activos Enero en MXN", color=clrs)
plot.show(block=True)
You need to use inplace=True to have the sorting act on the dataframe itself. Otherwise, the function returns the sorted dataframe without changing the original.
Also, you need to give the column from the sorted dataframe as the list of colors, not the original unsorted color list.
(Note that in Python strings need either single or double quotes, and commands aren't ended with a semicolon.)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plot
changelist = (0.1, 0.12, 0.13, -0.1, 0.05, 0.07)
assetlist = ('a', 'b', 'c', 'd', 'e', 'f')
clrs = ('yellow', 'green', 'blue', 'blue', 'green', 'yellow')
data = {"Assets": assetlist,
"Change": changelist,
"Colors": clrs}
dataFrame = pd.DataFrame(data=data)
dataFrame.sort_values("Change", ascending=False, inplace=True)
dataFrame.plot.bar(x="Assets", y="Change", rot=90, title="Desempeño Principales Activos Enero en MXN",
color=dataFrame["Colors"])
plot.show(block=True)
Im am trying to plot multiple lines with their corresponding legend:
regions = ['Wales', 'Scotland', 'London', 'East of England', 'East Midlands',
'Yorkshire and The Humber', 'South East', 'South West',
'West Midlands', 'North West', 'North East']
plt.figure(figsize = (10,8))
plt.title('Number of Vehicles per Region')
plt.xlabel('Year')
plt.ylabel('Number of Vehicles')
plt.legend()
for i in regions:
region = raw_miles_df.loc[i].sum(axis = 1).reset_index()
region = region.rename(columns = {'count_date':'Year', 0: 'vehicles'})
region['Year'] = region['Year'].apply(lambda x: x.year)
region = region.groupby(['Year']).agg(vehicles = ('vehicles', lambda x: x.mean().round(2)))
plt.plot(region)
plt.legend(i)
the method i have is not working:
You need to move plt.legend out of the loop and make it plt.legend(regions). As you can see in the legend, it is treating the string 'North East', which is the last item in regions, as an iterable from which to draw the categories.
But you can make it easier on yourself by using seaborn
import seaborn as sns
# aggregate your data outside of the loop
# then call lineplot
aggdata = df.groupby(...)
sns.lineplot(x=x_column, y=y_column, hue=category_column, data=aggdata)
I am trying to plot a matplotlib graph after based on the value chosen from the dropdown. I have made the dropdown and the plots are also ready for the values but i dont know how to connect both of them together.
Following is the code of the dropdown:
app.layout = html.Div([
dcc.Dropdown(
id='first-dropdown',
options = [
{'label': 'Chest Pain', 'value': 'cp'},
{'label': 'Resting Blood Pressure', 'value': 'trestbps'},
{'label': 'Serum Cholestrol in mg/dl', 'value': 'chol'},
{'label': 'Fasting Blood Pressure', 'value': 'fbs'},
{'label': 'Resting electrocardiographic results', 'value': 'restecg'},
{'label': 'Maximum heart rate achieved', 'value': 'thalach'},
{'label': 'Exercise induced angina', 'value': 'exang'},
{'label': 'Old Peak', 'value': 'oldpeak'},
{'label': 'Slope of the peak exercise ST segment', 'value': 'slope'},
{'label': 'Number of major vessels (0-3) colored by flourosopy', 'value': 'ca'},
{'label': 'Thalassemia', 'value': 'thal'}
],
value= 'thalach'
)
])
and for each value in the dropdown i have a separate function which returns a plot. For eg:
What i am trying to do is that if the Label 'Max Heart Rate Achieved' is selected from the dropdown whose value is 'thalach'. I have a function called plotThalach which returns a plot like this:
def plotThalach(df):
df_men = df[df['sex'] == 1.0]
df_women = df[df['sex'] == 0.0]
plt.figure(figsize=(20, 8))
plt.bar(df_men['age'] + 0.00, df_men['thalach'], color='b', width=0.25, label='Men')
plt.bar(df_women['age'] + 0.25, df_women['thalach'], color='r', width=0.25, label='Women')
plt.legend(loc='upper right')
plt.xlabel("Age")
plt.ylabel("Max Heart Rate")
plt.title("Age vs Max Heart Rate")
return plt
Now how do i connect both of these in such a way that when a value is selected from the dropdown my function gets called and plot gets displayed on the screen.
It's not so clear why you want to mix plotly-dash and matplotlib, you can easily do it using just plotly-dash
Here is a sample code,
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import pandas as pd
import plotly.graph_objs as go
df = pd.read_csv(
'https://raw.githubusercontent.com/plotly/'
'datasets/master/gapminderDataFiveYear.csv')
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
app = dash.Dash(__name__, external_stylesheets=external_stylesheets)
#main div
app.layout = html.Div([
#drop down with a default value set
dcc.Dropdown(
id='xaxis-column',
options=[{'label': str(year), 'value': year} for year in df['year'].unique()],
value=df['year'].min(),
),
#graph that is to be updated
dcc.Graph(id='graph-with-slider')
])
#callback which will be spawned when the input changes, in this case the input is the dropdown value
#app.callback(
Output('graph-with-slider', 'figure'),
[Input('xaxis-column', 'value')])
def update_figure(selected_year):
filtered_df = df[df.year == selected_year]
traces = []
for i in filtered_df.continent.unique():
df_by_continent = filtered_df[filtered_df['continent'] == i]
traces.append(go.Scatter(
x=df_by_continent['gdpPercap'],
y=df_by_continent['lifeExp'],
text=df_by_continent['country'],
mode='markers',
opacity=0.7,
marker={
'size': 15,
'line': {'width': 0.5, 'color': 'white'}
},
name=i
))
return {
'data': traces,
'layout': go.Layout(
xaxis={'type': 'log', 'title': 'GDP Per Capita'},
yaxis={'title': 'Life Expectancy', 'range': [20, 90]},
margin={'l': 40, 'b': 40, 't': 10, 'r': 10},
legend={'x': 0, 'y': 1},
hovermode='closest'
)
}
if __name__ == '__main__':
app.run_server(debug=True)
But if you want to show the matplotlib graph instead of plotly-dash graph, you can refer the "Incorporating Matplotlib Plots" section here