Plot rows of df on Plotly - dataframe

The df is like this:
X Y Label
0 [16, 37, 38] [7968, 4650, 3615] 0.7
1 [29, 37, 12] [4321, 4650, 1223] 0.8
2 [12, 2, 445] [1264, 3456, 2112] 0.9
This should plot three lines on the same plot with labels as continuous variables. What is the fastest & simplest way to plot it using plotly?

Taking This should plot three lines on the same plot as the requirement. (Which is inconsistent with where I want subplots from each row of the df)
Simple case of create a trace for each row, using https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.explode.html to prepare x and y
import pandas as pd
import plotly.graph_objects as go
df = pd.DataFrame(
{
"X": [[16, 37, 38], [29, 37, 12], [12, 2, 445]],
"Y": [[7968, 4650, 3615], [4321, 4650, 1223], [1264, 3456, 2112]],
"Label": [0.7, 0.8, 0.9],
}
)
go.Figure(
[
go.Scatter(
x=r["X"].explode(), y=r["Y"].explode(), name=str(r["Label"].values[0])
)
for _, r in df.groupby(df.index)
]
)
with continuous color defined by label
import pandas as pd
import plotly.graph_objects as go
from plotly.colors import sample_colorscale
import plotly.express as px
df = pd.DataFrame(
{
"X": [[16, 37, 38], [29, 37, 12], [12, 2, 445]],
"Y": [[7968, 4650, 3615], [4321, 4650, 1223], [1264, 3456, 2112]],
"Label": [0.1, 0.5, 0.9],
}
)
fig = px.scatter(x=[0], y=[0], color=[.5], color_continuous_scale="YlGnBu")
fig = fig.add_traces(
[
go.Scatter(
x=r["X"].explode(),
y=r["Y"].explode(),
name=str(r["Label"].values[0]),
line_color=sample_colorscale("YlGnBu", r["Label"].values[0])[0],
showlegend=False
)
for _, r in df.groupby(df.index)
]
)
fig

Related

Matplotlib add line to connect related scatter points

Is there a built-in way to add a line connecting scatter points with the same y-val?
Currently have this:
x1 = [6, 11, 7, 13, 6, 7.5]
x2 = [np.nan, np.nan, np.nan, np.nan, np.nan, 8.6]
y = [2, 10, 2, 14, 9, 10]
df = pd.DataFrame(data=zip(x1, x2, y), columns=["x1", "x2", "y"])
fig, ax = plt.subplots()
ax.set_xlabel("x")
ax.set_ylabel("y")
ax.scatter(df["x1"], df["y"], c="k")
ax.scatter(df["x2"], df["y"], edgecolor="k", facecolors="none")
Want this:
You can pair the points (x1[i],y[i]) and (x2[i],y[i]) iteratively by using the following code:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
x1 = [6, 11, 7, 13, 6, 7.5]
x2 = [np.nan, np.nan, np.nan, np.nan, np.nan, 8.6]
y = [2, 10, 2, 14, 9, 10]
df = pd.DataFrame(data=zip(x1, x2, y), columns=["x1", "x2", "y"])
fig, ax = plt.subplots()
ax.set_xlabel("x")
ax.set_ylabel("y")
ax.scatter(df["x1"], df["y"], c="k")
ax.scatter(df["x2"], df["y"], edgecolor="k", facecolors="none")
for i in range(len(y)):
plt.plot([x1[i],x2[i]],[y[i],y[i]])
plt.show()
The output of this code gives:

Select appropriate colors in stacked Seaborn barplot

I want to create a stacked barplot using Seaborn with this MiltiIndex DataFrame
header = pd.MultiIndex.from_product([['#'],
['TE', 'SS', 'M', 'MR']])
dat = ([[100, 20, 21, 35], [100, 12, 5, 15]])
df = pd.DataFrame(dat, index=['JC', 'TTo'], columns=header)
df = df.stack()
df = df.sort_values('#', ascending=False).sort_index(level=0, sort_remaining=False)
The code I'm using for the plot is:
fontP = FontProperties()
fontP.set_size('medium')
colors = {'TE': 'green', 'SS': 'blue', 'M': 'yellow', 'MR': 'red'}
kwargs = {'alpha':0.5}
plt.figure(figsize=(12, 9))
sns.barplot(x=df2.index.get_level_values(0).unique(),
y=df2.loc[pd.IndexSlice[:, df2.index[0]], '#'],
color=colors[df2.index[0][1]], **kwargs)
sns.barplot(x=df2.index.get_level_values(0).unique(),
y=df2.loc[pd.IndexSlice[:, df2.index[1]], '#'],
color=colors[df2.index[1][1]], **kwargs)
sns.barplot(x=df2.index.get_level_values(0).unique(),
y=df2.loc[pd.IndexSlice[:, df2.index[2]], '#'],
color=colors[df2.index[2][1]], **kwargs)
bottom_plot = sns.barplot(x=df2.index.get_level_values(0).unique(),
y=df2.loc[pd.IndexSlice[:, df2.index[3]], '#'],
color=colors[df2.index[3][1]], **kwargs)
bar1 = plt.Rectangle((0, 0), 1, 1, fc='green', edgecolor="None")
bar2 = plt.Rectangle((0, 0), 0, 0, fc='yellow', edgecolor="None")
bar3 = plt.Rectangle((0, 0), 2, 2, fc='red', edgecolor="None")
bar4 = plt.Rectangle((0, 0), 3, 3, fc='blue', edgecolor="None")
l = plt.legend([bar1, bar2, bar3, bar4], [
"TE", "M",
'MR', 'SS'
],
bbox_to_anchor=(0.95, 1),
loc='upper left',
prop=fontP)
l.draw_frame(False)
sns.despine()
bottom_plot.set_ylabel("#")
axes = plt.gca()
axes.yaxis.grid()
And I get:
My problem is the order of the colors in the second bar ('TTo'), I want the colors to be automatically selected based on the level 1 index value (['TE', 'SS', 'M', 'MR']) so that they are ordered correctly. Further down the one with the highest value with its corresponding color, in front the next one with the next highest value and its color and so on, as the first bar shows ('JC).
Maybe there is a simpler way to do this in Seaborn than the one I'm using...
I'm not sure how to create such a plot with seaborn. Here is a way to create it with a loop through the rows and adding one matplotlib bar at each step:
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
sns.set()
header = pd.MultiIndex.from_product([['#'],
['TE', 'SS', 'M', 'MR']])
dat = ([[100, 20, 21, 35], [100, 12, 5, 15]])
df = pd.DataFrame(dat, index=['JC', 'TTo'], columns=header)
df = df.stack()
df = df.sort_values('#', ascending=False).sort_index(level=0, sort_remaining=False)
colors = {'TE': 'green', 'SS': 'blue', 'M': 'yellow', 'MR': 'red'}
prev_index0 = None
for (index0, index1), quantity in df.itertuples():
if index0 != prev_index0:
bottom = 0
plt.bar(index0, quantity, fc=colors[index1], ec='none', bottom=bottom, label=index1)
bottom += quantity
prev_index0 = index0
legend_handles = [plt.Rectangle((0, 0), 0, 0, color=colors[c], label=c) for c in colors]
plt.legend(handles=legend_handles)
plt.show()
To plot the bars back to front without stacking, the code can be simplified:
colors = {'TE': 'forestgreen', 'SS': 'cornflowerblue', 'M': 'gold', 'MR': 'crimson'}
for (index0, index1), quantity in df.itertuples():
plt.bar(index0, quantity, fc=colors[index1], ec='none', label=index1)
legend_handles = [plt.Rectangle((0, 0), 0, 0, color=colors[c], label=c, ec='black') for c in colors]
plt.legend(handles=legend_handles, bbox_to_anchor=(1.02, 1.02), loc='upper left')
plt.tight_layout()

matplotlib histogram with equal bars width

I use a histogram to display the distribution. Everything works fine if the spacing of the bins is uniform. But if the interval is different, then the bar width is appropriate (as expected). Is there a way to set the width of the bar independent of the size of the bins ?
This is what i have
This what i trying to draw
from matplotlib import pyplot as plt
my_bins = [10, 20, 30, 40, 50, 120]
my_data = [5, 5, 6, 8, 9, 15, 25, 27, 33, 45, 46, 48, 49, 111, 113]
fig1 = plt.figure()
ax1 = fig1.add_subplot(121)
ax1.set_xticks(my_bins)
ax1.hist(my_data, my_bins, histtype='bar', rwidth=0.9,)
fig1.show()
I cannot mark your question as a duplicate, but I think my answer to this question might be what you are looking for?
I'm not sure how you'll make sense of the result, but you can use numpy.histogram to calculate the height of your bars, then plot those directly against an arbitrary x-scale.
x = np.random.normal(loc=50, scale=200, size=(2000,))
bins = [0,1,10,20,30,40,50,75,100]
fig = plt.figure()
ax = fig.add_subplot(211)
ax.hist(x, bins=bins, edgecolor='k')
ax = fig.add_subplot(212)
h,e = np.histogram(x, bins=bins)
ax.bar(range(len(bins)-1),h, width=1, edgecolor='k')
EDIT Here's with the adjustment to the x-tick labels so that the correspondence is easier to see.
my_bins = [10, 20, 30, 40, 50, 120]
my_data = [5, 5, 6, 8, 9, 15, 25, 27, 33, 45, 46, 48, 49, 111, 113]
fig = plt.figure()
ax = fig.add_subplot(211)
ax.hist(my_data, bins=my_bins, edgecolor='k')
ax = fig.add_subplot(212)
h,e = np.histogram(my_data, bins=my_bins)
ax.bar(range(len(my_bins)-1),h, width=1, edgecolor='k')
ax.set_xticks(range(len(my_bins)-1))
ax.set_xticklabels(my_bins[:-1])

Trying to create a Seaborn heatmap from a Pandas Dataframe

This is first time trying this. I actually have a dict of lists I am generating in a program, but since this is my first time ever trying this, I am using a dummy dict just for testing.
I am following this:
python Making heatmap from DataFrame
but I am failing with the following:
Traceback (most recent call last):
File "C:/Users/Mark/PycharmProjects/main/main.py", line 20, in <module>
sns.heatmap(df, cmap='RdYlGn_r', linewidths=0.5, annot=True)
File "C:\Users\Mark\AppData\Roaming\Python\Python36\site-packages\seaborn\matrix.py", line 517, in heatmap
yticklabels, mask)
File "C:\Users\Mark\AppData\Roaming\Python\Python36\site-packages\seaborn\matrix.py", line 168, in __init__
cmap, center, robust)
File "C:\Users\Mark\AppData\Roaming\Python\Python36\site-packages\seaborn\matrix.py", line 205, in _determine_cmap_params
calc_data = plot_data.data[~np.isnan(plot_data.data)]
TypeError: ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''
My code:
import pandas as pd
import seaborn as sns
Index = ['key1', 'key2', 'key3', 'key4', 'key5']
Cols = ['A', 'B', 'C', 'D']
testdict = {
"key1": [1, 2, 3, 4],
"key2": [5, 6, 7, 8],
"key3": [9, 10, 11, 12],
"key4": [13, 14, 15, 16],
"key5": [17, 18, 19, 20]
}
df = pd.DataFrame(testdict, index=Index, columns=Cols)
df = df.transpose()
sns.heatmap(df, cmap='RdYlGn_r', linewidths=0.5, annot=True)
You need to switch your column and index labels
Cols = ['key1', 'key2', 'key3', 'key4', 'key5']
Index = ['A', 'B', 'C', 'D']

matplotlib advanced stacked bar

matplotlib plot bars
It can be regular like http://matplotlib.org/examples/api/barchart_demo.html
Let's define this as [M, F]
It can be stacked like http://matplotlib.org/examples/pylab_examples/bar_stacked.html
Let's define this as [M + F]
Now how to plot [M, F + other]
If I understand you correctly, you want to have a stack plot with more than two elements stacked? If yes, that goes pretty straight forward as in the example you posted:
#!/usr/bin/env python
# a stacked bar plot with errorbars
import numpy as np
import matplotlib.pyplot as plt
N = 5
menMeans = [20, 35, 30, 35, 27]
womenMeans = [25, 32, 34, 20, 25]
otherMeans = [5, 2, 4, 8, 5]
menStd = [2, 3, 4, 1, 2]
womenStd = [3, 5, 2, 3, 3]
otherStd = [1, 1, 1, 1, 1]
ind = np.arange(N) # the x locations for the groups
width = 0.35 # the width of the bars: can also be len(x) sequence
p1 = plt.bar(ind, menMeans, width, color='r', yerr=womenStd)
p2 = plt.bar(ind, womenMeans, width, color='y',
bottom=menMeans, yerr=menStd)
p3 = plt.bar(ind, otherMeans, width, color='b',
bottom=[menMeans[j] + womenMeans[j] for j in range(len(menMeans)) ],
yerr=otherStd)
plt.ylabel('Scores')
plt.title('Scores by group and gender')
plt.xticks(ind+width/2., ('G1', 'G2', 'G3', 'G4', 'G5') )
plt.yticks(np.arange(0,81,10))
plt.legend( (p1[0], p2[0], p3[0]), ('Men', 'Women', 'Other') )
plt.show()