Multicolored graph based on data frame values - pandas

Im plotting chart based on the data frame as below., I want to show the graph line in different colour based on the column Condition. Im trying the following code but it shows only one colour throughout the graph.
df = pd.DataFrame(dict(
Day=pd.date_range('2018-01-01', periods = 60, freq='D'),
Utilisation = np.random.rand(60) * 100))
df = df.astype(dtype= {"Utilisation":"int64"})
df['Condition'] = np.where(df.Utilisation < 10, 'Winter',
np.where(df.Utilisation < 30, 'Summer', 'Spring'))
condition_map = {'Winter': 'r', 'Summer': 'k', 'Spring': 'b'}
df[['Utilisation','Day']].set_index('Day').plot(figsize=(10,4), rot=90,
color=df.Condition.map(condition_map))

So, I assume you want a graph for each condition.
I would use groupby to separate the data.
# Color setting
season_color = {'Winter': 'r', 'Summer': 'k', 'Spring': 'b'}
# Create figure and axes
f, ax = plt.subplots(figsize = (10, 4))
# Loop over and plot each group of data
for cond, data in df.groupby('Condition'):
ax.plot(data.Day, data.Utilisation, color = season_color[cond], label = cond)
# Fix datelabels
f.autofmt_xdate()
f.legend()
f.show()
If you truly want the date ticks to be rotated 90 degrees, use autofmt_xdate(rotation = 90)
Update:
If you want to plot everything in a single line it's a bit trickier since a line only can have one color associated to it.
You could plot a line between each point and split a line if it crosses a "color boundary", or check out this pyplot example: multicolored line
Another possibility is to plot a lot of scatter points between each point and create a own colormap that represents your color boundaries.
To create a colormap (and norm) I use from_levels_and_colors
import matplotlib.colors
colors = ['#00BEC5', '#a0c483', '#F9746A']
boundaries = [0, 10, 30, 100]
cm, nrm = matplotlib.colors.from_levels_and_colors(boundaries, colors)
To connect each point with next you could shift the dataframe, but here I just zip the original df with a sliced version
from itertools import islice
f, ax = plt.subplots(figsize = (10,4))
for (i,d0), (i,d1) in zip(df.iterrows(), islice(df.iterrows(), 1, None)):
d_range = pd.date_range(d0.Day, d1.Day, freq = 'h')
y_val = np.linspace(d0.Utilisation, d1.Utilisation, d_range.size)
ax.scatter(d_range, y_val, c = y_val, cmap = cm, norm = nrm)
f.autofmt_xdate()
f.show()

Related

Matplotlib: how to automatically draw an axes title at the left-most position?

I'm drawing my axes title with the method ax.set_title("Horizontal Bars", ha="left", x=0, fontsize=16) and it draw as below:
How do I draw it in the left-most position, as the "title here" in red above? I know I can use a negative value for x, but I'd like to find this value automatically.
To dynamically generate the bounds you would do:
import matplotlib.pyplot as plt
import numpy as np
# Fixing random state for reproducibility
np.random.seed(19680801)
plt.rcdefaults()
fig, ax = plt.subplots()
# Example data
people = ('Tom', 'Dick', 'Harry', 'Slim', 'Jim')
y_pos = np.arange(len(people))
performance = 3 + 10 * np.random.rand(len(people))
error = np.random.rand(len(people))
ax.barh(y_pos, performance, xerr=error, align='center')
ax.set_yticks(y_pos)
ax.set_yticklabels(people)
ax.invert_yaxis() # labels read top-to-bottom
ax.set_xlabel('Performance')
# Get min x and max y
# get the inverse of the transformation from data coordinates to pixels
transf = ax.transData.inverted()
bb = plt.figure().get_window_extent(renderer = plt.figure().canvas.get_renderer())
bb_datacoords = bb.transformed(transf)
points = bb_datacoords.get_points()
x_lim = points[0][0]
y_lim = points[1][1]
ax.text(x=x_lim, y=y_lim, s="Horizontal Bars", weight="bold", fontsize=16) # <- Use text instead of title
which gives you an output of:

How to create a heatmap with marginal histograms, similar to a jointplot?

I want to plot 2-dimensional scalar data, which I would usually plot using matplotlib.pyplot.imshow or sns.heatmap. Consider this example:
data = [[10, 20, 30], [50, 50, 100], [80, 60, 10]]
fix, ax = plt.subplots()
ax.imshow(data, cmap=plt.cm.YlGn)
Now I additionally would like to have one-dimonsional bar plots at the top and the right side, showing the sum of the values in each column / row - just as sns.jointplot does. However, sns.jointplot seems only to work with categorical data, producing histograms (with kind='hist'), scatterplots or the like - I don't see how to use it if I want to specify the values of the cells directly. Is such a thing possible with seaborn?
The y axis in my plot is going to be days (within a month), the x axis is going to be hours. My data looks like this:
The field Cost Difference is what should make up the shade of the respective field in the plot.
Here is an approach that first creates a dummy jointplot and then uses its axes to add a heatmap and bar plots of the sums.
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
D = 28
H = 24
df = pd.DataFrame({'day': np.repeat(range(1, D + 1), H),
'hour': np.tile(range(H), D),
'Cost Dif.': np.random.uniform(10, 1000, D * H)})
# change the random df to have some rows/columns stand out (debugging, checking)
df.loc[df['hour'] == 10, 'Cost Dif.'] = 150
df.loc[df['hour'] == 12, 'Cost Dif.'] = 250
df.loc[df['day'] == 20, 'Cost Dif.'] = 800
g = sns.jointplot(data=df, x='day', y='hour', kind='hist', bins=(D, H))
g.ax_marg_y.cla()
g.ax_marg_x.cla()
sns.heatmap(data=df['Cost Dif.'].to_numpy().reshape(D, H).T, ax=g.ax_joint, cbar=False, cmap='Blues')
g.ax_marg_y.barh(np.arange(0.5, H), df.groupby(['hour'])['Cost Dif.'].sum().to_numpy(), color='navy')
g.ax_marg_x.bar(np.arange(0.5, D), df.groupby(['day'])['Cost Dif.'].sum().to_numpy(), color='navy')
g.ax_joint.set_xticks(np.arange(0.5, D))
g.ax_joint.set_xticklabels(range(1, D + 1), rotation=0)
g.ax_joint.set_yticks(np.arange(0.5, H))
g.ax_joint.set_yticklabels(range(H), rotation=0)
# remove ticks between heatmao and histograms
g.ax_marg_x.tick_params(axis='x', bottom=False, labelbottom=False)
g.ax_marg_y.tick_params(axis='y', left=False, labelleft=False)
# remove ticks showing the heights of the histograms
g.ax_marg_x.tick_params(axis='y', left=False, labelleft=False)
g.ax_marg_y.tick_params(axis='x', bottom=False, labelbottom=False)
g.fig.set_size_inches(20, 8) # jointplot creates its own figure, the size can only be changed afterwards
# g.fig.subplots_adjust(hspace=0.3) # optionally more space for the tick labels
g.fig.subplots_adjust(hspace=0.05, wspace=0.02) # less spaced needed when there are no tick labels
plt.show()

Tick labels in between colors (discrete colorer)

Hi I want to put the ticklabels between colors (center of the intervals), and the figure is plotted by discrete colors. But the min value is not 0. How can I write the code to do that?
I used following code to do that, but what I got is wrong...
n_clusters = len(cbar_tick_label)
tick_locs = (np.arange(n_clusters)+0.5)*(n_clusters-1)/(n_clusters)
cbar.set_ticks(tick_locs)
cbar.set_ticklabels(cbar_tick_label)
This code is from question: Discrete Color Bar with Tick labels in between colors. But it does not work when the min value of data is not zero.
Thanks!
Suppose there are N (e.g. 6) clusters. If you subdivide the range from the lowest number (e.g. 5) to the highest number (e.g. 10) into N equal parts, there will be a tick at every border between color cells. Subdividing into 2*N+1 equal parts, will also have a tick in the center of each color cell. Now, skipping every other of these 2*N+1 ticks will leave us with only the cell centers. So, np.linspace(5, 10, 6*2+1) are the ticks for borders and centers; taking np.linspace(5, 10, 6*2+1)[1::2] will be only the centers.
import numpy as np
import matplotlib.pyplot as plt
x, y = np.random.rand(2, 100)
c = np.random.randint(5, 11, x.shape)
n_clusters = c.max() - c.min() + 1
fig, ax = plt.subplots()
cmap = plt.get_cmap('inferno_r', n_clusters)
scat = ax.scatter(x, y, c=c, cmap=cmap)
cbar = plt.colorbar(scat)
tick_locs = np.linspace(c.min(), c.max(), 2 * n_clusters + 1)[1::2]
cbar_tick_label = np.arange(c.min(), c.max() + 1)
cbar.set_ticks(tick_locs)
cbar.set_ticklabels(cbar_tick_label)
plt.show()

Create multiple columns and rows of unequal size in matplotlib

I need to create multiple columns and rows of unequal size in matplotlib. Here is a sample code:
a = np.random.rand(20, 20)
b = np.random.rand(20, 5)
c = np.random.rand(5, 20)
d = np.random.rand(5,5)
arrays = [a,b,c,d]
fig, axs = plt.subplots(2, 2, sharex='col', sharey= 'row', figsize=(10,10))
for ax, ar in zip(axs.flatten(), arrays):
ax.imshow(ar)
However, I get this as a result.
The right column has images of unequal width for the first and second row, and I would want them to be equal (basically shrink the bottom right image to have the same scale as other images).
I had researched this a fair amount, but nothing seems to work. I had tried tight_layout(), some other formatting tricks, all to no avail...
You can use the gridspec's height_ratios and width_ratios argument to set the desired proportion the subplots shall occupy.
In this case, due to the symmetry, this is simply the shape of e.g. b.
import numpy as np
import matplotlib.pyplot as plt
a = np.random.rand(20, 20)
b = np.random.rand(20, 5)
c = np.random.rand(5, 20)
d = np.random.rand(5,5)
arrays = [a,b,c,d]
fig, axs = plt.subplots(2, 2, sharex='col', sharey= 'row', figsize=(10,10),
gridspec_kw={"height_ratios" : b.shape,
"width_ratios" : b.shape})
for ax, ar in zip(axs.flatten(), arrays):
ax.imshow(ar)
plt.show()
Or, more generally,
gridspec_kw={"height_ratios" : [a.shape[0], c.shape[0]],
"width_ratios" : [a.shape[1], b.shape[1]]}

How to color bars who make up 50% of the data?

I am plotting a histogram for some data points with bar heights being the percentage of that bin from the whole data:
x = normal(size=1000)
hist, bins = np.histogram(x, bins=20)
plt.bar(bins[:-1], hist.astype(np.float32) / hist.sum(), width=(bins[1]-bins[0]), alpha=0.6)
The result is:
I would like all bars that sum up to be 50% of the data to be in a different color, for example:
(I selected the colored bars without actually checking whether their sum adds to 50%)
Any suggestions how to accomplish this?
Here is how you can plot the first half of the bins with a different color, this looks like your mock, but I am not sure it complies to %50 of the data (it is not clear to me what do you mean by that).
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
mu, sigma = 100, 15
x = mu + sigma * np.random.randn(10000)
fig = plt.figure()
ax = fig.add_subplot(111)
# the histogram of the data
n, bins, patches = ax.hist(x, 50, normed=1, facecolor='green', alpha=0.75)
# now that we found the index we color all the beans smaller than middle index
for p in patches[:len(bins)/2]:
p.set_facecolor('red')
# hist uses np.histogram under the hood to create 'n' and 'bins'.
# np.histogram returns the bin edges, so there will be 50 probability
# density values in n, 51 bin edges in bins and 50 patches. To get
# everything lined up, we'll compute the bin centers
bincenters = 0.5*(bins[1:]+bins[:-1])
# add a 'best fit' line for the normal PDF
y = mlab.normpdf( bincenters, mu, sigma)
l = ax.plot(bincenters, y, 'r--', linewidth=1)
ax.set_xlabel('Smarts')
ax.set_ylabel('Probability')
ax.set_xlim(40, 160)
ax.set_ylim(0, 0.03)
ax.grid(True)
plt.show()
And the output is:
update
The key method you want to look at is patch.set_set_facecolor. You have to understand that almost everything you plot inside the axes object is a Patch, and as such it has this method, here is another example, I arbitrary choose the first 3 bars to have another color, you can choose based on what ever you decide:
import numpy as np
import matplotlib.pyplot as plt
fig = plt.figure()
ax = fig.add_subplot(111)
## the data
N = 5
menMeans = [18, 35, 30, 35, 27]
## necessary variables
ind = np.arange(N) # the x locations for the groups
width = 0.35 # the width of the bars
## the bars
rects1 = ax.bar(ind, menMeans, width,
color='black',
error_kw=dict(elinewidth=2,ecolor='red'))
for patch in rects1.patches[:3]:
patch.set_facecolor('red')
ax.set_xlim(-width,len(ind)+width)
ax.set_ylim(0,45)
ax.set_ylabel('Scores')
xTickMarks = ['Group'+str(i) for i in range(1,6)]
ax.set_xticks(ind)
xtickNames = ax.set_xticklabels(xTickMarks)
plt.setp(xtickNames, rotation=45, fontsize=10)
plt.show()