Plotly not loading chart correctly where matplotlib does - pandas

When using plotly I do get this picture with some straight lines on the graph. I do not have the same when using matplotlib.
import pandas as pd
import numpy as np
import cufflinks as cf
from plotly.offline import plot
from datetime import datetime
import io
import requests
df = cf.datagen.lines()
src = "https://iss.moex.com/iss/engines/stock/markets/index/securities/RTSI/candles.csv?iss.only=history&interval=31&iss.reverse=true&from=1995-09-01&till=2019-12-28&iss.json=extended&callback=JSON_CALLBACK&lang=en&limit=100&start=0&sort_order=TRADEDATE&sort_order_desc=desc&_=1563185736134'"
r = requests.get(src)
df = pd.read_csv(io.StringIO(r.content.decode('utf-8')),
sep=';',
names=[
'Open', 'Close', 'High', 'Low', 'Value',
'Volume', 'Date', 'End'
]).iloc[2:]
frame = {
'Date': df['Date'].astype(np.datetime64),
'Open': df['Open'].astype('float64'),
'Close': df['Close'].astype('float64'),
'High': df['High'].astype('float64'),
'Low': df['Low'].astype('float64'),
'Value': df['Value'].astype('float64'),
'Volume': df['Volume'].astype('float64'),
}
df = pd.DataFrame(frame)
plot([{
'x': df['Date'],
'y': df[col],
'name': col
} for col in df.columns[1:]])
df.iplot()
Is this a bug with plotly or am I doinf something wrong?

The principal problem is that the date values don't appear in sorted order. It is necessary to sort them explicitly.
Another problem is that the numbers in the 'Value' column have a complete different range than the others. To display them in the same plot, one could add a secondary y-axis.
As the 'Volume' column isn't filled in (containing only zeros), it can be left out.
Here is some sample code, skipping conversion steps which probably are unnecessary for the latest plotly versions:
import plotly.graph_objects as go
import pandas as pd
src = "https://...."
df = pd.read_csv(src,
sep=';',
names=[
'Open', 'Close', 'High', 'Low', 'Value',
'Volume', 'Date', 'End'
]).iloc[2:]
df = df.sort_values(by='Date')
for col in df.columns:
print(col, df[col].min(), df[col].max())
fig = go.Figure()
for col in ['Open', 'Close', 'High', 'Low', 'Value']:
fig.add_trace(
go.Scatter(
x=df['Date'],
y=df[col],
name=col,
yaxis='y1' if col == 'Value' else 'y2'
))
fig.update_layout(
yaxis=dict(
title="Price",
),
yaxis2=dict(
title="Volume",
anchor="x",
overlaying="y",
side="right"
))
fig.show()

I see several small problems plus data is not sorted as stated by #JohanC. Then you should really use 2 yaxis as suggested by #JohanC Here is my full code with comments
import pandas as pd
import io
import requests
import plotly.graph_objects as go
# get Data
src = "https://iss.moex.com/iss/engines/stock/markets/index/securities/RTSI/candles.csv?iss.only=history&interval=31&iss.reverse=true&from=1995-09-01&till=2019-12-28&iss.json=extended&callback=JSON_CALLBACK&lang=en&limit=100&start=0&sort_order=TRADEDATE&sort_order_desc=desc&_=1563185736134'"
r = requests.get(src)
df = pd.read_csv(io.StringIO(r.content.decode('utf-8')),
sep=';',
names=[
'Open', 'Close', 'High', 'Low', 'Value',
'Volume', 'Date', 'End'
]).iloc[2:]
# set Date as first column and drop End
df = df.set_index('Date')\
.drop("End", axis=1)\
.reset_index()
# change dtypes
df["Date"] = df["Date"].astype("M8[us]")
for col in df.columns[1:]:
df[col] = df[col].astype(float)
# sort Date
df = df.sort_values("Date")\
.reset_index(drop=True)
fig = go.Figure()
for col in df.columns[1:]:
fig.add_trace(
go.Scatter(x=df["Date"],
y=df[col],
name=col))
fig.show()

Related

Python-Plotly grouped bar chart with the five highest values from five pandas.dataframe columns

A radiation dose simulation gave the [following organ doses:][1]
I would like to use Python - Plotly to make a grouped bar chart with the five highest organ dose values from each age group columns '0', '1', '5', '10', and '15'.
The x-axis should show those five organs from each age column that gets the highest dose.
I have started with finding the five largest for the '0' age group and make a bar graph:
import pandas as pd
import plotl.express as px
df = pd.read_csv('data_org_dos.csv')
data = df.set_index('Organ')['0'].nlargest(5)
fig = px.bar(data)
fig.show()
How do I make a grouped bar graph with all the ages '0', '1', '5', '10' and '15' presented in the same graph?
Following #Timeless answer I could change the code to produce the grouped bar chart I aimed for:
import pandas as pd
import plotly.graph_objs as go
df = pd.read_csv("data_org_dos.csv")
labels = df.columns.map(lambda x: "Age: "+ x)[1:]
fig = go.Figure()
for idx, age in enumerate(labels):
data = df.set_index('Organ')[age.split()[-1]].nlargest(5).reset_index()
fig.add_trace(go.Bar(x=data['Organ'], y=data[age.split()[-1]], name=age))
fig.update_xaxes(title_text='Organ')
fig.update_yaxes(title_text='E [mSv]')
fig.update_layout(
font=dict(
size=18
)
)
fig.show()
[ see image link][2]
[1]: https://www.dropbox.com/s/qux9v9ubp5cskl6/data_org_dos.csv?dl=0
[2]: https://i.stack.imgur.com/iiEAt.png
You need a combo of nlargest and go.Bar :
import pandas as pd
import plotly.subplots as sp
import plotly.graph_objs as go
df = pd.read_csv("Downloads/data_org_dos.csv")
labels = df.columns.map(lambda x: "Age: "+ x)[1:]
fig = sp.make_subplots(rows=len(labels), cols=1)
for idx, age in enumerate(labels):
data = df.set_index('Organ')[age.split()[-1]].nlargest(5).reset_index()
fig.add_trace(go.Bar(x=data['Organ'], y=data[age.split()[-1]], name=age), row=idx+1, col=1)
fig.update_layout(title="StackOverflow: OrganDoses", barmode="group")
fig.show()
Output :

How to create percentage stacked bar chart in plotly?

I have this code but want to create a stacked % bar chart. How would I do that?
fig = px.bar(df, x='date', y=['var1','var2','var3', 'var4'],barmode='stack')
fig.show()
I think you should make a new column that calculate % of each var and then use it to deploy chart. Please refer my below code:
import pandas as pd
import numpy as np
from datetime import datetime as dt
import plotly.express as px
df = pd.DataFrame({
'date': ['2022-01-07','2022-02-07','2022-03-07','2022-04-07','2022-05-07','2022-06-07','2022-07-07','2022-08-07'],
'var1': [5,7,2,4,6,8,10,9],
'var2': [6,7,8,5,2,6,3,1],
'var3':[8,5,6,2,8,3,5,4],
'var4':[7,9,7,5,3,4,2,1]})
df_melt = df.melt(id_vars=['date'],var_name='var',value_name='Amount',value_vars=df.columns[1:],ignore_index=True)
df_melt['%'] = 100 * df_melt['Amount'] / df_melt.groupby('date')['Amount'].transform('sum')
fig = px.bar(df_melt, x="date", y="%",color='var',
title="Bar Plot",
template="plotly_white")
fig.update_layout(barmode="relative")
fig.update_layout(plot_bgcolor='white')
fig.update_yaxes(showline=False,showgrid=False)
fig.update_xaxes(showline=False,showgrid=False)
fig.show()
And here is the Output:

matplotlib - plot merged dataframe with group bar

I try to plot a grouped bar chart from a merged dataframe. below code the bar is stacked, how can I put it side by side just like a grouped bar chart?
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(0)
df1 = pd.DataFrame({
'key': ['A', 'B', 'C', 'D'],
'value':[ 10 ,6, 6, 8]})
df2 = pd.DataFrame({
'key': ['B', 'D', 'A', 'F'],
'value':[ 3, 5, 5, 7]})
df3 = pd.merge(df1, df2, how='inner', on=['key'])
print(df1)
print(df2)
print(df3)
fig, ax = plt.subplots(figsize=(12, 8))
b1 = ax.bar(df3['key'],df3['value_x'])
b2 = ax.bar(df3['key'],df3['value_y'])
pngname = "demo.png"
fig.savefig(pngname, dpi=fig.dpi)
print("[[./%s]]"%(pngname))
Current output:
The problem is that the x axis data is the same, in your case it aren't numbers, it are the keys: "A", "B", "C". So matplotlib stacks them one onto another.
There's a simple way around it, as some tutorials online show https://www.geeksforgeeks.org/create-a-grouped-bar-plot-in-matplotlib/.
So, what you do is basically enumerate the keys, i.e. A=1, B=2, C=3. After this, choose your desired bar width, I chose 0.4 for example. And now, shift one group of bars to the left by bar_width/2, and shift the other one to the right by bar_width/2.
Perhaps the code explains it better than I did:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(0)
df1 = pd.DataFrame({
'key': ['A', 'B', 'C', 'D'],
'value':[ 10 ,6, 6, 8]})
df2 = pd.DataFrame({
'key': ['B', 'D', 'A', 'F'],
'value':[ 3, 5, 5, 7]})
df3 = pd.merge(df1, df2, how='inner', on=['key'])
fig, ax = plt.subplots(figsize=(12, 8))
# modifications
x = np.arange(len(df3['key'])) # enumerate the keys
bar_width = 0.4 # choose bar length
b1 = ax.bar(x - bar_width/2,df3['value_x'], width=bar_width, label='value_x') # shift x values left
b2 = ax.bar(x + bar_width/2,df3['value_y'], width=bar_width, label='value_y') # shift x values right
plt.xticks(x, df3['key']) # replace x axis ticks with keys from df3.
plt.legend(['value_x', 'value_y'])
plt.show()
Result:

Error while adding error bars to subplots in seaborn

I have the following example code which I want to plot as bar subplots using seaborn in one figure. I can plot the actual data as bar plots but when i try to add error bars, i get the following error:
AttributeError: 'NoneType' object has no attribute 'seq'
code is:
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
df1 = pd.DataFrame({
'A': ['7.5'],
'B': ['2.4']
})
df1_err = pd.DataFrame({
'A': ['2.3'],
'B': ['1.2']
})
df2 = pd.DataFrame({
'A': ['5.5'],
'B': ['4.2']
})
df2_err = pd.DataFrame({
'A': ['1.7'],
'B': ['2.1']
})
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(6, 4), sharey=True)
my_pal = {"A": "green", "B":"orange"}
sns.set_style("whitegrid")
plt.tight_layout()
sns.barplot(data=df1, palette=my_pal, yerr = df1_err, linewidth=2,edgecolor=[".1","0.1"], ax=axes[0])
sns.barplot(data=df2, palette=my_pal, yerr = df2_err, linewidth=2,edgecolor=[".1","0.1"], ax=axes[1])
plt.show()
If I remove yerr from the sns.barplot() commands, it does create bar plots as I want, but I could not manage to add pre-calculated error bars to these subplots. Any help please?
Maybe you mean something like this:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df1 = pd.DataFrame({
'A': ['7.5'],
'B': ['2.4']
}).astype(float)
df1_err = pd.DataFrame({
'A': ['2.3'],
'B': ['1.2']
}).astype(float)
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(6, 4), sharey=True)
axes[0].bar(df1.T.index.values, np.squeeze(df1.T.values), yerr=np.squeeze(df1_err.T.values))
plt.show()

Pandas bar plot -- specify bar color by column

Is there a simply way to specify bar colors by column name using Pandas DataFrame.plot(kind='bar') method?
I have a script that generates multiple DataFrames from several different data files in a directory. For example it does something like this:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pds
data_files = ['a', 'b', 'c', 'd']
df1 = pds.DataFrame(np.random.rand(4,3), columns=data_files[:-1])
df2 = pds.DataFrame(np.random.rand(4,3), columns=data_files[1:])
df1.plot(kind='bar', ax=plt.subplot(121))
df2.plot(kind='bar', ax=plt.subplot(122))
plt.show()
With the following output:
Unfortunately, the column colors aren't consistent for each label in the different plots. Is it possible to pass in a dictionary of (filenames:colors), so that any particular column always has the same color. For example, I could imagine creating this by zipping up the filenames with the Matplotlib color_cycle:
data_files = ['a', 'b', 'c', 'd']
colors = plt.rcParams['axes.color_cycle']
print zip(data_files, colors)
[('a', u'b'), ('b', u'g'), ('c', u'r'), ('d', u'c')]
I could figure out how to do this directly with Matplotlib: I just thought there might be a simpler, built-in solution.
Edit:
Below is a partial solution that works in pure Matplotlib. However, I'm using this in an IPython notebook that will be distributed to non-programmer colleagues, and I'd like to minimize the amount of excessive plotting code.
import numpy as np
import matplotlib.pyplot as plt
import pandas as pds
data_files = ['a', 'b', 'c', 'd']
mpl_colors = plt.rcParams['axes.color_cycle']
colors = dict(zip(data_files, mpl_colors))
def bar_plotter(df, colors, sub):
ncols = df.shape[1]
width = 1./(ncols+2.)
starts = df.index.values - width*ncols/2.
plt.subplot(120+sub)
for n, col in enumerate(df):
plt.bar(starts + width*n, df[col].values, color=colors[col],
width=width, label=col)
plt.xticks(df.index.values)
plt.grid()
plt.legend()
df1 = pds.DataFrame(np.random.rand(4,3), columns=data_files[:-1])
df2 = pds.DataFrame(np.random.rand(4,3), columns=data_files[1:])
bar_plotter(df1, colors, 1)
bar_plotter(df2, colors, 2)
plt.show()
You can pass a list as the colors. This will require a little bit of manual work to get it to line up, unlike if you could pass a dictionary, but may be a less cluttered way to accomplish your goal.
import numpy as np
import matplotlib.pyplot as plt
import pandas as pds
data_files = ['a', 'b', 'c', 'd']
df1 = pds.DataFrame(np.random.rand(4,3), columns=data_files[:-1])
df2 = pds.DataFrame(np.random.rand(4,3), columns=data_files[1:])
color_list = ['b', 'g', 'r', 'c']
df1.plot(kind='bar', ax=plt.subplot(121), color=color_list)
df2.plot(kind='bar', ax=plt.subplot(122), color=color_list[1:])
plt.show()
EDIT
Ajean came up with a simple way to return a list of the correct colors from a dictionary:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pds
data_files = ['a', 'b', 'c', 'd']
color_list = ['b', 'g', 'r', 'c']
d2c = dict(zip(data_files, color_list))
df1 = pds.DataFrame(np.random.rand(4,3), columns=data_files[:-1])
df2 = pds.DataFrame(np.random.rand(4,3), columns=data_files[1:])
df1.plot(kind='bar', ax=plt.subplot(121), color=map(d2c.get,df1.columns))
df2.plot(kind='bar', ax=plt.subplot(122), color=map(d2c.get,df2.columns))
plt.show()
Pandas version 1.1.0 makes this easier. You can pass a dictionary to specify different color for each column in the pandas.DataFrame.plot.bar() function:
Here is an example:
df1 = pd.DataFrame({'a': [1.2, .8, .9], 'b': [.2, .9, .7]})
df2 = pd.DataFrame({'b': [0.2, .5, .4], 'c': [.5, .6, .7], 'd': [1.1, .6, .7]})
color_dict = {'a':'green', 'b': 'red', 'c':'blue', 'd': 'cyan'}
df1.plot.bar(color = color_dict)
df2.plot.bar(color = color_dict)